#!/usr/bin/env python3 # v3: use clean_merged_data (v2) as base, add Khadatkar + Learning to train import os, shutil, glob from pathlib import Path HOME = os.path.expanduser('~') OUT = f'{HOME}/merged_v3' BASE = f'{HOME}/clean_merged_data' # Fresh output if os.path.exists(OUT): shutil.rmtree(OUT) for s in ('train','valid','test'): os.makedirs(f'{OUT}/{s}/images', exist_ok=True) os.makedirs(f'{OUT}/{s}/labels', exist_ok=True) stats = {s: {c: 0 for c in (0,1,2)} for s in ('train','valid','test')} imgcount = {s: 0 for s in ('train','valid','test')} def copy_split(src_img_dir, src_lbl_dir, target_split, cmap, name_suffix): n = 0 for lbl_path in glob.glob(f'{src_lbl_dir}/*.txt'): stem = Path(lbl_path).stem img_path = None for ext in ('.jpg','.jpeg','.png','.JPG','.PNG'): p = f'{src_img_dir}/{stem}{ext}' if os.path.exists(p): img_path = p; break if img_path is None: continue lines = [] with open(lbl_path) as f: for line in f: parts = line.strip().split() if not parts: continue cid = int(parts[0]) if cid not in cmap: continue lines.append(' '.join([str(cmap[cid])] + parts[1:])) if not lines: continue new_stem = f'{stem}{name_suffix}' ext = Path(img_path).suffix dst_img = f'{OUT}/{target_split}/images/{new_stem}{ext}' dst_lbl = f'{OUT}/{target_split}/labels/{new_stem}.txt' if not os.path.exists(dst_img): try: os.link(img_path, dst_img) except: shutil.copy(img_path, dst_img) with open(dst_lbl, 'w') as f: f.write('\n'.join(lines) + '\n') for ln in lines: stats[target_split][int(ln.split()[0])] += 1 imgcount[target_split] += 1 n += 1 return n # 1) Copy clean_merged_data AS-IS (identity mapping for 0,1,2), no extra suffix # Images already have _cctv_dataset / _helmet_dataset / _yolo_project suffixes print('--- base v2 data ---') for s in ('train','valid','test'): n = copy_split(f'{BASE}/{s}/images', f'{BASE}/{s}/labels', s, {0:0,1:1,2:2}, '') print(f' base -> {s}: {n}') # 2) Add Khadatkar + Learning ONLY to train split EXTRAS = [ ('khadatkar', f'{HOME}/extra_khadatkar', {0:1, 1:0}), # 0=With Helmet->1, 1=Without Helmet->0, drop 2=licence ('learning', f'{HOME}/extra_learning', {0:1, 1:0}), # 0=With Helmet->1, 1=Without Helmet->0 ] print('--- extras -> train ---') for name, root, cmap in EXTRAS: for src_split in ('train','valid','test'): img_dir = f'{root}/{src_split}/images' lbl_dir = f'{root}/{src_split}/labels' if not os.path.isdir(lbl_dir): continue n = copy_split(img_dir, lbl_dir, 'train', cmap, f'_{name}_{src_split}') print(f' {name} {src_split} -> train: {n}') yaml = f'''path: {OUT} train: train/images val: valid/images test: test/images nc: 3 names: 0: no-helmet 1: with-helmet 2: triple-riding ''' with open(f'{OUT}/data.yaml','w') as f: f.write(yaml) print('\n=== V3 MERGE COMPLETE ===') for s in ('train','valid','test'): tot = sum(stats[s].values()) print(f' {s:6s} images={imgcount[s]:5d} | no-helmet={stats[s][0]:5d} with-helmet={stats[s][1]:5d} triple={stats[s][2]:4d} | instances={tot}')