1. Detection
from tqdm import tqdm
def convert_coco_to_yolo(json_path, labels_dir):
with open(json_path) as f:
coco = json.load(f)
# Map image_id -> file_name
id2file = {img['id']: img['file_name'] for img in coco['images']}
id2size = {img['id']: (img['width'], img['height']) for img in coco['images']}
labels_dir.mkdir(parents=True, exist_ok=True)
for ann in tqdm(coco['annotations'], desc=f"Converting {json_path.name}"):
image_id = ann['image_id']
cat_id = ann['category_id'] - 1 # zero-indexed class IDs
bbox = ann['bbox']
x, y, w, h = bbox
img_w, img_h = id2size[image_id]
# Convert to YOLO format
cx = (x + w / 2) / img_w
cy = (y + h / 2) / img_h
nw = w / img_w
nh = h / img_h
label_file = labels_dir / f"{Path(id2file[image_id]).stem}.txt"
with open(label_file, "a") as f:
f.write(f"{cat_id} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}\n")
# Run it for all splits
root = Path("/home/ngx/Documents/projects/pocr/input/new_coco_dataset")
convert_coco_to_yolo(root / "annotations/annotations_train.json", root / "labels/train")
convert_coco_to_yolo(root / "annotations/annotations_val.json", root / "labels/val")
2. Segmentation
from shutil import copy2
# === CONFIG ===
source_root = Path("../input/new_coco_dataset")
target_root = Path("../input/new_coco_seg")
splits = ["train", "val", "test"]
yaml_dict = {
'train': source_root/'images/train',
'val': source_root/'images/val',
'test': source_root/'images/test', # optional
'nc': 1,
'names': ['text']
}
# === Make folders ===
for split in splits:
(target_root / f"images/{split}").mkdir(parents=True, exist_ok=True)
(target_root / f"labels/{split}").mkdir(parents=True, exist_ok=True)
# === Function to convert COCO to YOLO-seg ===
def coco_to_yolo_seg(coco_json_path: Path, image_dir: Path, label_dir: Path):
with open(coco_json_path, 'r') as f:
coco = json.load(f)
id2image = {img["id"]: img for img in coco["images"]}
ann_map = {}
for ann in coco["annotations"]:
if ann.get("iscrowd", 0):
continue
ann_map.setdefault(ann["image_id"], []).append(ann)
for img in tqdm(coco["images"], desc=f"Converting {coco_json_path.name}"):
file_name = img["file_name"]
img_path = image_dir / file_name
out_img_path = target_root / image_dir.relative_to(source_root) / file_name
out_img_path.parent.mkdir(parents=True, exist_ok=True)
copy2(img_path, out_img_path)
label_path = label_dir / f"{Path(file_name).stem}.txt"
with open(label_path, "w") as f:
for ann in ann_map.get(img["id"], []):
cls_id = ann["category_id"] - 1 # COCO classes start from 1
for seg in ann["segmentation"]:
if len(seg) < 6:
continue
pts = [
f"{seg[i] / img['width']:.6f} {seg[i + 1] / img['height']:.6f}"
for i in range(0, len(seg), 2)
]
line = f"{cls_id} " + " ".join(pts)
f.write(line + "\n")
# === Run conversion for all splits ===
for split in splits:
coco_json = source_root / f"annotations/annotations_{split}.json"
img_dir = source_root / f"images/{split}"
lbl_dir = target_root / f"labels/{split}"
if coco_json.exists() and img_dir.exists():
coco_to_yolo_seg(coco_json, img_dir, lbl_dir)
# === Write YAML ===
yaml_path = target_root / "data.yaml"
with open(yaml_path, "w") as f:
f.write(f"path: {target_root.resolve()}\n")
f.write(f"train: images/train\n")
f.write(f"val: images/val\n")
f.write(f"test: images/test\n")
f.write(f"nc: {yaml_dict['nc']}\n")
f.write(f"names: {yaml_dict['names']}\n")
print("✅ COCO ➜ YOLOv8 segmentation format conversion complete.")
print(f"📄 data.yaml saved to: {yaml_path}")