| |
| |
| import os, shutil, glob |
| from pathlib import Path |
|
|
| HOME = os.path.expanduser('~') |
| OUT = f'{HOME}/merged_v3' |
| BASE = f'{HOME}/clean_merged_data' |
|
|
| |
| if os.path.exists(OUT): shutil.rmtree(OUT) |
| for s in ('train','valid','test'): |
| os.makedirs(f'{OUT}/{s}/images', exist_ok=True) |
| os.makedirs(f'{OUT}/{s}/labels', exist_ok=True) |
|
|
| stats = {s: {c: 0 for c in (0,1,2)} for s in ('train','valid','test')} |
| imgcount = {s: 0 for s in ('train','valid','test')} |
|
|
| def copy_split(src_img_dir, src_lbl_dir, target_split, cmap, name_suffix): |
| n = 0 |
| for lbl_path in glob.glob(f'{src_lbl_dir}/*.txt'): |
| stem = Path(lbl_path).stem |
| img_path = None |
| for ext in ('.jpg','.jpeg','.png','.JPG','.PNG'): |
| p = f'{src_img_dir}/{stem}{ext}' |
| if os.path.exists(p): img_path = p; break |
| if img_path is None: continue |
| lines = [] |
| with open(lbl_path) as f: |
| for line in f: |
| parts = line.strip().split() |
| if not parts: continue |
| cid = int(parts[0]) |
| if cid not in cmap: continue |
| lines.append(' '.join([str(cmap[cid])] + parts[1:])) |
| if not lines: continue |
| new_stem = f'{stem}{name_suffix}' |
| ext = Path(img_path).suffix |
| dst_img = f'{OUT}/{target_split}/images/{new_stem}{ext}' |
| dst_lbl = f'{OUT}/{target_split}/labels/{new_stem}.txt' |
| if not os.path.exists(dst_img): |
| try: os.link(img_path, dst_img) |
| except: shutil.copy(img_path, dst_img) |
| with open(dst_lbl, 'w') as f: |
| f.write('\n'.join(lines) + '\n') |
| for ln in lines: |
| stats[target_split][int(ln.split()[0])] += 1 |
| imgcount[target_split] += 1 |
| n += 1 |
| return n |
|
|
| |
| |
| print('--- base v2 data ---') |
| for s in ('train','valid','test'): |
| n = copy_split(f'{BASE}/{s}/images', f'{BASE}/{s}/labels', s, {0:0,1:1,2:2}, '') |
| print(f' base -> {s}: {n}') |
|
|
| |
| EXTRAS = [ |
| ('khadatkar', f'{HOME}/extra_khadatkar', {0:1, 1:0}), |
| ('learning', f'{HOME}/extra_learning', {0:1, 1:0}), |
| ] |
| print('--- extras -> train ---') |
| for name, root, cmap in EXTRAS: |
| for src_split in ('train','valid','test'): |
| img_dir = f'{root}/{src_split}/images' |
| lbl_dir = f'{root}/{src_split}/labels' |
| if not os.path.isdir(lbl_dir): continue |
| n = copy_split(img_dir, lbl_dir, 'train', cmap, f'_{name}_{src_split}') |
| print(f' {name} {src_split} -> train: {n}') |
|
|
| yaml = f'''path: {OUT} |
| train: train/images |
| val: valid/images |
| test: test/images |
| nc: 3 |
| names: |
| 0: no-helmet |
| 1: with-helmet |
| 2: triple-riding |
| ''' |
| with open(f'{OUT}/data.yaml','w') as f: f.write(yaml) |
|
|
| print('\n=== V3 MERGE COMPLETE ===') |
| for s in ('train','valid','test'): |
| tot = sum(stats[s].values()) |
| print(f' {s:6s} images={imgcount[s]:5d} | no-helmet={stats[s][0]:5d} with-helmet={stats[s][1]:5d} triple={stats[s][2]:4d} | instances={tot}') |
|
|