Spaces:
Sleeping
Sleeping
File size: 4,658 Bytes
a677b76 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | import os
import cv2
import random
import numpy as np
from glob import glob
from tqdm import tqdm
BASE_DIR = r"C:\Users\charu\Documents\goyam\roboflow"
IMAGES_DIR = os.path.join(BASE_DIR, "train", "images")
LABELS_DIR = os.path.join(BASE_DIR, "train", "labels")
TARGET_PER_CLASS = 100
CLASS_NAMES = {0: "Blast", 1: "Brown Spot", 2: "Sheath Blight"}
def load_dataset():
dataset = {0: [], 1: [], 2: []}
if not os.path.exists(IMAGES_DIR) or not os.path.exists(LABELS_DIR):
print(f" Error: Could not find folders!")
print(f" Looking for: {IMAGES_DIR}")
print(f" Looking for: {LABELS_DIR}")
print(" -> Check if your Roboflow folder has a 'train' subfolder or not.")
return dataset
txt_files = glob(os.path.join(LABELS_DIR, "*.txt"))
print(f"📂 Scanning Labels: {LABELS_DIR}")
print(f" -> Found {len(txt_files)} text files.")
for txt_path in txt_files:
filename = os.path.basename(txt_path).replace('.txt', '')
img_path = None
for ext in ['.jpg', '.jpeg', '.png', '.JPG']:
try_path = os.path.join(IMAGES_DIR, filename + ext)
if os.path.exists(try_path):
img_path = try_path
break
if img_path is None:
continue
with open(txt_path, 'r') as f:
lines = f.readlines()
if lines:
try:
class_id = int(lines[0].split()[0])
if class_id in dataset:
dataset[class_id].append((img_path, lines))
except:
pass
return dataset
def augment_polygon(img_path, lines, new_filename):
img = cv2.imread(img_path)
if img is None: return
action = random.choice(["h_flip", "v_flip", "bright", "noise"])
new_lines = []
if action == "h_flip":
new_img = cv2.flip(img, 1)
for line in lines:
parts = line.strip().split()
cls = parts[0]
coords = [float(x) for x in parts[1:]]
new_coords = []
for i, val in enumerate(coords):
if i % 2 == 0: new_coords.append(1.0 - val)
else: new_coords.append(val)
new_lines.append(f"{cls} " + " ".join([f"{c:.6f}" for c in new_coords]) + "\n")
elif action == "v_flip":
new_img = cv2.flip(img, 0)
for line in lines:
parts = line.strip().split()
cls = parts[0]
coords = [float(x) for x in parts[1:]]
new_coords = []
for i, val in enumerate(coords):
if i % 2 == 0: new_coords.append(val)
else: new_coords.append(1.0 - val)
new_lines.append(f"{cls} " + " ".join([f"{c:.6f}" for c in new_coords]) + "\n")
elif action == "bright":
beta = random.randint(-30, 30)
new_img = cv2.convertScaleAbs(img, alpha=1.0, beta=beta)
new_lines = lines
elif action == "noise":
noise = np.random.normal(0, 15, img.shape)
img_float = img.astype(np.float32)
new_img = np.clip(img_float + noise, 0, 255).astype(np.uint8)
new_lines = lines
else:
new_img = img
new_lines = lines
cv2.imwrite(os.path.join(IMAGES_DIR, new_filename + ".jpg"), new_img)
with open(os.path.join(LABELS_DIR, new_filename + ".txt"), 'w') as f:
f.writelines(new_lines)
def main():
print(" Loading Roboflow Dataset...")
data_map = load_dataset()
if sum(len(v) for v in data_map.values()) == 0:
return
print("\nCurrent Counts:")
for cid in [0, 1, 2]:
print(f" - {CLASS_NAMES[cid]}: {len(data_map[cid])} images")
print("\n Augmenting to 100 per class...")
for cid in [0, 1, 2]:
items = data_map[cid]
current_count = len(items)
needed = TARGET_PER_CLASS - current_count
if needed > 0 and items:
print(f" -> Generating {needed} images for {CLASS_NAMES[cid]}...")
for i in tqdm(range(needed)):
src_img, src_lines = random.choice(items)
augment_polygon(src_img, src_lines, f"aug_{cid}_{i}")
elif needed <= 0:
print(f" -> {CLASS_NAMES[cid]} is already full ({current_count} images). Skipping.")
print("\n dataset now has 300 balanced images.")
if __name__ == "__main__":
main() |