Spaces:
Sleeping
Sleeping
| import os | |
| import cv2 | |
| import random | |
| import numpy as np | |
| from glob import glob | |
| from tqdm import tqdm | |
| BASE_DIR = r"C:\Users\charu\Documents\goyam\roboflow" | |
| IMAGES_DIR = os.path.join(BASE_DIR, "train", "images") | |
| LABELS_DIR = os.path.join(BASE_DIR, "train", "labels") | |
| TARGET_PER_CLASS = 100 | |
| CLASS_NAMES = {0: "Blast", 1: "Brown Spot", 2: "Sheath Blight"} | |
| def load_dataset(): | |
| dataset = {0: [], 1: [], 2: []} | |
| if not os.path.exists(IMAGES_DIR) or not os.path.exists(LABELS_DIR): | |
| print(f" Error: Could not find folders!") | |
| print(f" Looking for: {IMAGES_DIR}") | |
| print(f" Looking for: {LABELS_DIR}") | |
| print(" -> Check if your Roboflow folder has a 'train' subfolder or not.") | |
| return dataset | |
| txt_files = glob(os.path.join(LABELS_DIR, "*.txt")) | |
| print(f"๐ Scanning Labels: {LABELS_DIR}") | |
| print(f" -> Found {len(txt_files)} text files.") | |
| for txt_path in txt_files: | |
| filename = os.path.basename(txt_path).replace('.txt', '') | |
| img_path = None | |
| for ext in ['.jpg', '.jpeg', '.png', '.JPG']: | |
| try_path = os.path.join(IMAGES_DIR, filename + ext) | |
| if os.path.exists(try_path): | |
| img_path = try_path | |
| break | |
| if img_path is None: | |
| continue | |
| with open(txt_path, 'r') as f: | |
| lines = f.readlines() | |
| if lines: | |
| try: | |
| class_id = int(lines[0].split()[0]) | |
| if class_id in dataset: | |
| dataset[class_id].append((img_path, lines)) | |
| except: | |
| pass | |
| return dataset | |
| def augment_polygon(img_path, lines, new_filename): | |
| img = cv2.imread(img_path) | |
| if img is None: return | |
| action = random.choice(["h_flip", "v_flip", "bright", "noise"]) | |
| new_lines = [] | |
| if action == "h_flip": | |
| new_img = cv2.flip(img, 1) | |
| for line in lines: | |
| parts = line.strip().split() | |
| cls = parts[0] | |
| coords = [float(x) for x in parts[1:]] | |
| new_coords = [] | |
| for i, val in enumerate(coords): | |
| if i % 2 == 0: new_coords.append(1.0 - val) | |
| else: new_coords.append(val) | |
| new_lines.append(f"{cls} " + " ".join([f"{c:.6f}" for c in new_coords]) + "\n") | |
| elif action == "v_flip": | |
| new_img = cv2.flip(img, 0) | |
| for line in lines: | |
| parts = line.strip().split() | |
| cls = parts[0] | |
| coords = [float(x) for x in parts[1:]] | |
| new_coords = [] | |
| for i, val in enumerate(coords): | |
| if i % 2 == 0: new_coords.append(val) | |
| else: new_coords.append(1.0 - val) | |
| new_lines.append(f"{cls} " + " ".join([f"{c:.6f}" for c in new_coords]) + "\n") | |
| elif action == "bright": | |
| beta = random.randint(-30, 30) | |
| new_img = cv2.convertScaleAbs(img, alpha=1.0, beta=beta) | |
| new_lines = lines | |
| elif action == "noise": | |
| noise = np.random.normal(0, 15, img.shape) | |
| img_float = img.astype(np.float32) | |
| new_img = np.clip(img_float + noise, 0, 255).astype(np.uint8) | |
| new_lines = lines | |
| else: | |
| new_img = img | |
| new_lines = lines | |
| cv2.imwrite(os.path.join(IMAGES_DIR, new_filename + ".jpg"), new_img) | |
| with open(os.path.join(LABELS_DIR, new_filename + ".txt"), 'w') as f: | |
| f.writelines(new_lines) | |
| def main(): | |
| print(" Loading Roboflow Dataset...") | |
| data_map = load_dataset() | |
| if sum(len(v) for v in data_map.values()) == 0: | |
| return | |
| print("\nCurrent Counts:") | |
| for cid in [0, 1, 2]: | |
| print(f" - {CLASS_NAMES[cid]}: {len(data_map[cid])} images") | |
| print("\n Augmenting to 100 per class...") | |
| for cid in [0, 1, 2]: | |
| items = data_map[cid] | |
| current_count = len(items) | |
| needed = TARGET_PER_CLASS - current_count | |
| if needed > 0 and items: | |
| print(f" -> Generating {needed} images for {CLASS_NAMES[cid]}...") | |
| for i in tqdm(range(needed)): | |
| src_img, src_lines = random.choice(items) | |
| augment_polygon(src_img, src_lines, f"aug_{cid}_{i}") | |
| elif needed <= 0: | |
| print(f" -> {CLASS_NAMES[cid]} is already full ({current_count} images). Skipping.") | |
| print("\n dataset now has 300 balanced images.") | |
| if __name__ == "__main__": | |
| main() |