Yes, i have successfully converted my annotation file from .XML to ms coco .json but when I try using a single image in the pipeline it gives me black and white patches in the batch, so how can I load the .scn image in fastai pipeline properly?
This is the o/p that I get after loading the image to see if it gets loaded properly or not:

I have cropped the image, but the bottom part of the image contains the slide and the boxes are being shown at the top most region.
I might have not done the conversion properly, I used my own code to convert . XML annotation to .json format.
Also @VDM the above tool uses qupath in between does anyone know how to properly convert the. XML aperio annotation to .json COCO file?
This is the conversion code that i used:
def to_coco(self) -> dict:
labels = sorted(self._labels())
label_to_id = {l: i for i, l in enumerate(labels)}
imageid_to_id = {n: i for i, n in enumerate(self.image_ids)}
annotations = []
for annotation in tqdm(self, desc="Saving"):
for idx, box in enumerate(annotation.boxes):
box_annotation = {
#"iscrowd": 0, "ignore": 0,
"image_id": imageid_to_id[annotation.image_id],
"bbox": box.ltrb,
"category_id": label_to_id[box.label],
"id": idx}
if box.is_detection:
box_annotation["score"] = box.confidence
annotations.append(box_annotation)
images = [{
"id": imageid_to_id[a.image_id],
"file_name": a.image_id,
"width": a.image_width,
"height": a.image_height} for a in self]
#categories = [{"supercategory": "none", "id": label_to_id[l], "name": l} for l in labels]
categories = [{"id": label_to_id[l], "name": l} for l in labels]
return {"images": images, "annotations": annotations, "categories": categories}
def save_coco(self, path: Path):
if path.suffix == "":
path = path.with_suffix(".json")
assert path.suffix == ".json"
content = json.dumps(self.to_coco(), allow_nan=False)
path.write_text(content)
def from_aperio(
file_path: Path,
image_size: "tuple[int, int]",
image_extension: str = ".scn"
) -> "Annotation":
with file_path.open() as f:
root = et.parse(f).getroot()
image_id = file_path.with_suffix(image_extension).name
annotation = root.find("Annotation")
regions = annotation.findall("Regions/Region")
boxes = [BoundingBox.from_aperio(r) for r in regions]
return Annotation(image_id, image_size, boxes)
def from_aperio(node: et.Element) -> "BoundingBox":
label = node.attrib["Text"]
typean=node.attrib["Type"]
if typean=="2":
p1, p2 = node.findall("Vertices/Vertex")
xmin, ymin = min(int(p1.attrib["X"]), int(p2.attrib["X"])), min(int(p1.attrib["Y"]), int(p2.attrib["Y"]))
xmax, ymax = max(int(p1.attrib["X"]), int(p2.attrib["X"])),max(int(p1.attrib["Y"]), int(p2.attrib["Y"]))
elif typean=="1":
p1, p2, p3, p4 = node.findall("Vertices/Vertex")
xmin, ymin = min(int(p1.attrib["X"]),int(p2.attrib["X"]),int(p3.attrib["X"]),int(p4.attrib["X"])),min(int(p1.attrib["Y"]),int(p2.attrib["Y"]),int(p3.attrib["Y"]),int(p4.attrib["Y"]))
xmax, ymax = max(int(p1.attrib["X"]),int(p2.attrib["X"]),int(p3.attrib["X"]),int(p4.attrib["X"])),max(int(p1.attrib["Y"]),int(p2.attrib["Y"]),int(p3.attrib["Y"]),int(p4.attrib["Y"]))
print(xmin, ymin, xmax, ymax)
return BoundingBox(label, xmin, ymin, xmax, ymax)
Can anyone tell me how can I change the following code so that bboxes annotations are contained within the boundaries of the slide, not the whitespace padded onto image?