import os import cv2 import random import numpy as np from glob import glob from tqdm import tqdm BASE_DIR = r"C:\Users\charu\Documents\goyam\roboflow" IMAGES_DIR = os.path.join(BASE_DIR, "train", "images") LABELS_DIR = os.path.join(BASE_DIR, "train", "labels") TARGET_PER_CLASS = 100 CLASS_NAMES = {0: "Blast", 1: "Brown Spot", 2: "Sheath Blight"} def load_dataset(): dataset = {0: [], 1: [], 2: []} if not os.path.exists(IMAGES_DIR) or not os.path.exists(LABELS_DIR): print(f" Error: Could not find folders!") print(f" Looking for: {IMAGES_DIR}") print(f" Looking for: {LABELS_DIR}") print(" -> Check if your Roboflow folder has a 'train' subfolder or not.") return dataset txt_files = glob(os.path.join(LABELS_DIR, "*.txt")) print(f"📂 Scanning Labels: {LABELS_DIR}") print(f" -> Found {len(txt_files)} text files.") for txt_path in txt_files: filename = os.path.basename(txt_path).replace('.txt', '') img_path = None for ext in ['.jpg', '.jpeg', '.png', '.JPG']: try_path = os.path.join(IMAGES_DIR, filename + ext) if os.path.exists(try_path): img_path = try_path break if img_path is None: continue with open(txt_path, 'r') as f: lines = f.readlines() if lines: try: class_id = int(lines[0].split()[0]) if class_id in dataset: dataset[class_id].append((img_path, lines)) except: pass return dataset def augment_polygon(img_path, lines, new_filename): img = cv2.imread(img_path) if img is None: return action = random.choice(["h_flip", "v_flip", "bright", "noise"]) new_lines = [] if action == "h_flip": new_img = cv2.flip(img, 1) for line in lines: parts = line.strip().split() cls = parts[0] coords = [float(x) for x in parts[1:]] new_coords = [] for i, val in enumerate(coords): if i % 2 == 0: new_coords.append(1.0 - val) else: new_coords.append(val) new_lines.append(f"{cls} " + " ".join([f"{c:.6f}" for c in new_coords]) + "\n") elif action == "v_flip": new_img = cv2.flip(img, 0) for line in lines: parts = line.strip().split() cls = parts[0] coords = [float(x) for x in parts[1:]] new_coords = [] for i, val in enumerate(coords): if i % 2 == 0: new_coords.append(val) else: new_coords.append(1.0 - val) new_lines.append(f"{cls} " + " ".join([f"{c:.6f}" for c in new_coords]) + "\n") elif action == "bright": beta = random.randint(-30, 30) new_img = cv2.convertScaleAbs(img, alpha=1.0, beta=beta) new_lines = lines elif action == "noise": noise = np.random.normal(0, 15, img.shape) img_float = img.astype(np.float32) new_img = np.clip(img_float + noise, 0, 255).astype(np.uint8) new_lines = lines else: new_img = img new_lines = lines cv2.imwrite(os.path.join(IMAGES_DIR, new_filename + ".jpg"), new_img) with open(os.path.join(LABELS_DIR, new_filename + ".txt"), 'w') as f: f.writelines(new_lines) def main(): print(" Loading Roboflow Dataset...") data_map = load_dataset() if sum(len(v) for v in data_map.values()) == 0: return print("\nCurrent Counts:") for cid in [0, 1, 2]: print(f" - {CLASS_NAMES[cid]}: {len(data_map[cid])} images") print("\n Augmenting to 100 per class...") for cid in [0, 1, 2]: items = data_map[cid] current_count = len(items) needed = TARGET_PER_CLASS - current_count if needed > 0 and items: print(f" -> Generating {needed} images for {CLASS_NAMES[cid]}...") for i in tqdm(range(needed)): src_img, src_lines = random.choice(items) augment_polygon(src_img, src_lines, f"aug_{cid}_{i}") elif needed <= 0: print(f" -> {CLASS_NAMES[cid]} is already full ({current_count} images). Skipping.") print("\n dataset now has 300 balanced images.") if __name__ == "__main__": main()