Spaces:
Sleeping
Sleeping
| """ | |
| prepare_data.py | |
| CarDD COCO formatindaki annotations'lari YOLO segmentation formatina cevirir. | |
| Kullanim: | |
| python prepare_data.py \ | |
| --cardd_root data/CarDD_release/CarDD_COCO \ | |
| --output_dir data/cardd_yolo | |
| YOLO segmentation format: | |
| Her satir: class_id x1 y1 x2 y2 ... xn yn | |
| Tum koordinatlar [0, 1] araliginda normalize edilmis poligon noktalari. | |
| """ | |
| import argparse | |
| import json | |
| import shutil | |
| from collections import Counter | |
| from pathlib import Path | |
| from PIL import Image | |
| from tqdm import tqdm | |
| # CarDD'deki resmi sinif sirasi (kategori id'leri 1'den baslayabilir, biz 0-tabanli yapacagiz) | |
| CARDD_CLASSES = ["dent", "scratch", "crack", "glass_shatter", "lamp_broken", "tire_flat"] | |
| def coco_polygon_to_yolo(polygon, img_w, img_h): | |
| """COCO formatindaki bir poligon listesini YOLO normalize formatina cevir. | |
| COCO: [[x1, y1, x2, y2, ...]] (kuçuk listeler poligonu temsil eder) | |
| YOLO: tek satirda x1/w y1/h x2/w y2/h ... [0,1] arasinda | |
| """ | |
| if not polygon or len(polygon) == 0: | |
| return None | |
| # Cogu CarDD annotation tek poligonludur. Coklu varsa en buyugunu al. | |
| if isinstance(polygon[0], list): | |
| poly = max(polygon, key=len) | |
| else: | |
| poly = polygon | |
| # Tek nokta olmaz; en az 3 nokta = 6 koordinat | |
| if len(poly) < 6: | |
| return None | |
| normalized = [] | |
| for i in range(0, len(poly), 2): | |
| x = poly[i] / img_w | |
| y = poly[i + 1] / img_h | |
| # Sinir clip | |
| x = max(0.0, min(1.0, x)) | |
| y = max(0.0, min(1.0, y)) | |
| normalized.extend([x, y]) | |
| return normalized | |
| def convert_split(split_name, coco_json, img_src_dir, img_dst_dir, lbl_dst_dir, | |
| category_id_to_idx): | |
| """Bir split (train/val/test) icin COCO -> YOLO donusumu yapar.""" | |
| img_dst_dir.mkdir(parents=True, exist_ok=True) | |
| lbl_dst_dir.mkdir(parents=True, exist_ok=True) | |
| with open(coco_json, "r") as f: | |
| coco = json.load(f) | |
| # ID -> image dict | |
| images = {img["id"]: img for img in coco["images"]} | |
| # Image ID -> liste of annotations | |
| img_anns = {} | |
| for ann in coco["annotations"]: | |
| img_anns.setdefault(ann["image_id"], []).append(ann) | |
| class_counter = Counter() | |
| skipped = 0 | |
| processed = 0 | |
| for img_id, img_info in tqdm(images.items(), desc=f"{split_name}"): | |
| fname = img_info["file_name"] | |
| src_path = img_src_dir / fname | |
| if not src_path.exists(): | |
| # Bazi CarDD klasoru farkli isimde olabilir | |
| skipped += 1 | |
| continue | |
| # Goruntuyu kopyala (sembolik link daha hizli, OS'a gore degisir) | |
| dst_img_path = img_dst_dir / fname | |
| if not dst_img_path.exists(): | |
| shutil.copy2(src_path, dst_img_path) | |
| # Boyut COCO json'da gelir ama dogrula | |
| img_w = img_info.get("width") | |
| img_h = img_info.get("height") | |
| if not img_w or not img_h: | |
| with Image.open(src_path) as im: | |
| img_w, img_h = im.size | |
| # YOLO label dosyasi | |
| lbl_path = lbl_dst_dir / (Path(fname).stem + ".txt") | |
| lines = [] | |
| for ann in img_anns.get(img_id, []): | |
| cat_id = ann["category_id"] | |
| if cat_id not in category_id_to_idx: | |
| continue | |
| yolo_idx = category_id_to_idx[cat_id] | |
| polygon = ann.get("segmentation") | |
| if polygon is None or len(polygon) == 0: | |
| continue | |
| norm = coco_polygon_to_yolo(polygon, img_w, img_h) | |
| if norm is None: | |
| continue | |
| coords_str = " ".join(f"{c:.6f}" for c in norm) | |
| lines.append(f"{yolo_idx} {coords_str}") | |
| class_counter[CARDD_CLASSES[yolo_idx]] += 1 | |
| # Bos label dosyasi bile yaz (YOLO'nun background icin gerekli) | |
| with open(lbl_path, "w") as f: | |
| f.write("\n".join(lines)) | |
| processed += 1 | |
| print(f"\n[{split_name}] Islenen: {processed}, Atlanan: {skipped}") | |
| print(f"[{split_name}] Sinif dagilimi: {dict(class_counter)}") | |
| return class_counter | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--cardd_root", type=str, required=True, | |
| help="CarDD_COCO klasoru (annotations/ ve train2017/ icerir)") | |
| parser.add_argument("--output_dir", type=str, required=True, | |
| help="YOLO formatli ciktinin yazilacagi klasor") | |
| args = parser.parse_args() | |
| cardd_root = Path(args.cardd_root) | |
| output_dir = Path(args.output_dir) | |
| # Once category id eslemesini ogren (CarDD'de 1-6 idi) | |
| with open(cardd_root / "annotations" / "instances_train2017.json") as f: | |
| train_coco = json.load(f) | |
| categories = sorted(train_coco["categories"], key=lambda c: c["id"]) | |
| print("CarDD kategorileri:") | |
| for c in categories: | |
| print(f" id={c['id']} name={c['name']}") | |
| # COCO category id -> 0-tabanli YOLO index | |
| # CarDD'nin standart sirasiyla esletiriz | |
| category_id_to_idx = {} | |
| for c in categories: | |
| name_normalized = c["name"].lower().replace(" ", "_") | |
| if name_normalized in CARDD_CLASSES: | |
| category_id_to_idx[c["id"]] = CARDD_CLASSES.index(name_normalized) | |
| else: | |
| print(f"UYARI: bilinmeyen kategori: {c['name']}") | |
| print(f"\nCategory id -> YOLO index esleme: {category_id_to_idx}\n") | |
| # Her split'i isle | |
| splits = [ | |
| ("train", "instances_train2017.json", "train2017"), | |
| ("val", "instances_val2017.json", "val2017"), | |
| ("test", "instances_test2017.json", "test2017"), | |
| ] | |
| total_counter = Counter() | |
| for split_name, ann_file, img_subdir in splits: | |
| ann_path = cardd_root / "annotations" / ann_file | |
| img_src = cardd_root / img_subdir | |
| img_dst = output_dir / "images" / split_name | |
| lbl_dst = output_dir / "labels" / split_name | |
| if not ann_path.exists(): | |
| print(f"UYARI: {ann_path} bulunamadi, atlandi.") | |
| continue | |
| if not img_src.exists(): | |
| print(f"UYARI: {img_src} bulunamadi, atlandi.") | |
| continue | |
| counter = convert_split(split_name, ann_path, img_src, img_dst, lbl_dst, | |
| category_id_to_idx) | |
| total_counter.update(counter) | |
| # cardd.yaml dosyasini guncelle/yaz | |
| yaml_path = Path("cardd.yaml") | |
| yaml_content = f"""# YOLO segmentation veri konfigi - CarDD | |
| # Otomatik olarak prepare_data.py tarafindan uretildi | |
| path: {output_dir.resolve()} | |
| train: images/train | |
| val: images/val | |
| test: images/test | |
| # Sinif sayisi | |
| nc: {len(CARDD_CLASSES)} | |
| # Sinif isimleri (0-tabanli sira) | |
| names: | |
| """ | |
| for idx, name in enumerate(CARDD_CLASSES): | |
| yaml_content += f" {idx}: {name}\n" | |
| with open(yaml_path, "w") as f: | |
| f.write(yaml_content) | |
| print(f"\n=== Donusum tamamlandi ===") | |
| print(f"Cikti: {output_dir.resolve()}") | |
| print(f"Veri konfigi: {yaml_path.resolve()}") | |
| print(f"Toplam etiket: {dict(total_counter)}") | |
| print(f"\nSonraki adim: python train.py --data {yaml_path} --model yolo26n-seg --epochs 50") | |
| if __name__ == "__main__": | |
| main() | |