File size: 7,091 Bytes
e327f0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
"""
prepare_data.py
CarDD COCO formatindaki annotations'lari YOLO segmentation formatina cevirir.

Kullanim:
    python prepare_data.py \
        --cardd_root data/CarDD_release/CarDD_COCO \
        --output_dir data/cardd_yolo

YOLO segmentation format:
    Her satir: class_id x1 y1 x2 y2 ... xn yn
    Tum koordinatlar [0, 1] araliginda normalize edilmis poligon noktalari.
"""
import argparse
import json
import shutil
from collections import Counter
from pathlib import Path

from PIL import Image
from tqdm import tqdm


# CarDD'deki resmi sinif sirasi (kategori id'leri 1'den baslayabilir, biz 0-tabanli yapacagiz)
CARDD_CLASSES = ["dent", "scratch", "crack", "glass_shatter", "lamp_broken", "tire_flat"]


def coco_polygon_to_yolo(polygon, img_w, img_h):
    """COCO formatindaki bir poligon listesini YOLO normalize formatina cevir.

    COCO: [[x1, y1, x2, y2, ...]] (kuçuk listeler poligonu temsil eder)
    YOLO: tek satirda x1/w y1/h x2/w y2/h ... [0,1] arasinda
    """
    if not polygon or len(polygon) == 0:
        return None
    # Cogu CarDD annotation tek poligonludur. Coklu varsa en buyugunu al.
    if isinstance(polygon[0], list):
        poly = max(polygon, key=len)
    else:
        poly = polygon

    # Tek nokta olmaz; en az 3 nokta = 6 koordinat
    if len(poly) < 6:
        return None

    normalized = []
    for i in range(0, len(poly), 2):
        x = poly[i] / img_w
        y = poly[i + 1] / img_h
        # Sinir clip
        x = max(0.0, min(1.0, x))
        y = max(0.0, min(1.0, y))
        normalized.extend([x, y])
    return normalized


def convert_split(split_name, coco_json, img_src_dir, img_dst_dir, lbl_dst_dir,
                  category_id_to_idx):
    """Bir split (train/val/test) icin COCO -> YOLO donusumu yapar."""
    img_dst_dir.mkdir(parents=True, exist_ok=True)
    lbl_dst_dir.mkdir(parents=True, exist_ok=True)

    with open(coco_json, "r") as f:
        coco = json.load(f)

    # ID -> image dict
    images = {img["id"]: img for img in coco["images"]}

    # Image ID -> liste of annotations
    img_anns = {}
    for ann in coco["annotations"]:
        img_anns.setdefault(ann["image_id"], []).append(ann)

    class_counter = Counter()
    skipped = 0
    processed = 0

    for img_id, img_info in tqdm(images.items(), desc=f"{split_name}"):
        fname = img_info["file_name"]
        src_path = img_src_dir / fname
        if not src_path.exists():
            # Bazi CarDD klasoru farkli isimde olabilir
            skipped += 1
            continue

        # Goruntuyu kopyala (sembolik link daha hizli, OS'a gore degisir)
        dst_img_path = img_dst_dir / fname
        if not dst_img_path.exists():
            shutil.copy2(src_path, dst_img_path)

        # Boyut COCO json'da gelir ama dogrula
        img_w = img_info.get("width")
        img_h = img_info.get("height")
        if not img_w or not img_h:
            with Image.open(src_path) as im:
                img_w, img_h = im.size

        # YOLO label dosyasi
        lbl_path = lbl_dst_dir / (Path(fname).stem + ".txt")
        lines = []
        for ann in img_anns.get(img_id, []):
            cat_id = ann["category_id"]
            if cat_id not in category_id_to_idx:
                continue
            yolo_idx = category_id_to_idx[cat_id]

            polygon = ann.get("segmentation")
            if polygon is None or len(polygon) == 0:
                continue
            norm = coco_polygon_to_yolo(polygon, img_w, img_h)
            if norm is None:
                continue

            coords_str = " ".join(f"{c:.6f}" for c in norm)
            lines.append(f"{yolo_idx} {coords_str}")
            class_counter[CARDD_CLASSES[yolo_idx]] += 1

        # Bos label dosyasi bile yaz (YOLO'nun background icin gerekli)
        with open(lbl_path, "w") as f:
            f.write("\n".join(lines))
        processed += 1

    print(f"\n[{split_name}] Islenen: {processed}, Atlanan: {skipped}")
    print(f"[{split_name}] Sinif dagilimi: {dict(class_counter)}")
    return class_counter


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--cardd_root", type=str, required=True,
                        help="CarDD_COCO klasoru (annotations/ ve train2017/ icerir)")
    parser.add_argument("--output_dir", type=str, required=True,
                        help="YOLO formatli ciktinin yazilacagi klasor")
    args = parser.parse_args()

    cardd_root = Path(args.cardd_root)
    output_dir = Path(args.output_dir)

    # Once category id eslemesini ogren (CarDD'de 1-6 idi)
    with open(cardd_root / "annotations" / "instances_train2017.json") as f:
        train_coco = json.load(f)
    categories = sorted(train_coco["categories"], key=lambda c: c["id"])
    print("CarDD kategorileri:")
    for c in categories:
        print(f"  id={c['id']}  name={c['name']}")

    # COCO category id -> 0-tabanli YOLO index
    # CarDD'nin standart sirasiyla esletiriz
    category_id_to_idx = {}
    for c in categories:
        name_normalized = c["name"].lower().replace(" ", "_")
        if name_normalized in CARDD_CLASSES:
            category_id_to_idx[c["id"]] = CARDD_CLASSES.index(name_normalized)
        else:
            print(f"UYARI: bilinmeyen kategori: {c['name']}")

    print(f"\nCategory id -> YOLO index esleme: {category_id_to_idx}\n")

    # Her split'i isle
    splits = [
        ("train", "instances_train2017.json", "train2017"),
        ("val", "instances_val2017.json", "val2017"),
        ("test", "instances_test2017.json", "test2017"),
    ]

    total_counter = Counter()
    for split_name, ann_file, img_subdir in splits:
        ann_path = cardd_root / "annotations" / ann_file
        img_src = cardd_root / img_subdir
        img_dst = output_dir / "images" / split_name
        lbl_dst = output_dir / "labels" / split_name

        if not ann_path.exists():
            print(f"UYARI: {ann_path} bulunamadi, atlandi.")
            continue
        if not img_src.exists():
            print(f"UYARI: {img_src} bulunamadi, atlandi.")
            continue

        counter = convert_split(split_name, ann_path, img_src, img_dst, lbl_dst,
                                category_id_to_idx)
        total_counter.update(counter)

    # cardd.yaml dosyasini guncelle/yaz
    yaml_path = Path("cardd.yaml")
    yaml_content = f"""# YOLO segmentation veri konfigi - CarDD
# Otomatik olarak prepare_data.py tarafindan uretildi
path: {output_dir.resolve()}
train: images/train
val: images/val
test: images/test

# Sinif sayisi
nc: {len(CARDD_CLASSES)}

# Sinif isimleri (0-tabanli sira)
names:
"""
    for idx, name in enumerate(CARDD_CLASSES):
        yaml_content += f"  {idx}: {name}\n"

    with open(yaml_path, "w") as f:
        f.write(yaml_content)

    print(f"\n=== Donusum tamamlandi ===")
    print(f"Cikti: {output_dir.resolve()}")
    print(f"Veri konfigi: {yaml_path.resolve()}")
    print(f"Toplam etiket: {dict(total_counter)}")
    print(f"\nSonraki adim: python train.py --data {yaml_path} --model yolo26n-seg --epochs 50")


if __name__ == "__main__":
    main()