Spaces:

Goyamproject
/

React_native_app

Sleeping

File size: 4,658 Bytes

a677b76

import os
import cv2
import random
import numpy as np
from glob import glob
from tqdm import tqdm


BASE_DIR = r"C:\Users\charu\Documents\goyam\roboflow"
IMAGES_DIR = os.path.join(BASE_DIR, "train", "images") 
LABELS_DIR = os.path.join(BASE_DIR, "train", "labels")



TARGET_PER_CLASS = 100


CLASS_NAMES = {0: "Blast", 1: "Brown Spot", 2: "Sheath Blight"}

def load_dataset():
    dataset = {0: [], 1: [], 2: []}
    
    if not os.path.exists(IMAGES_DIR) or not os.path.exists(LABELS_DIR):
        print(f" Error: Could not find folders!")
        print(f"   Looking for: {IMAGES_DIR}")
        print(f"   Looking for: {LABELS_DIR}")
        print("   -> Check if your Roboflow folder has a 'train' subfolder or not.")
        return dataset

   
    txt_files = glob(os.path.join(LABELS_DIR, "*.txt"))
    print(f"📂 Scanning Labels: {LABELS_DIR}")
    print(f"   -> Found {len(txt_files)} text files.")

    for txt_path in txt_files:
        filename = os.path.basename(txt_path).replace('.txt', '')
   
        img_path = None
        for ext in ['.jpg', '.jpeg', '.png', '.JPG']:
            try_path = os.path.join(IMAGES_DIR, filename + ext)
            if os.path.exists(try_path):
                img_path = try_path
                break
        
        if img_path is None:
            continue 

        with open(txt_path, 'r') as f:
            lines = f.readlines()
            
        if lines:
            try:
              
                class_id = int(lines[0].split()[0])
                if class_id in dataset:
                    dataset[class_id].append((img_path, lines))
            except:
                pass 
    return dataset

def augment_polygon(img_path, lines, new_filename):
    img = cv2.imread(img_path)
    if img is None: return
    
    action = random.choice(["h_flip", "v_flip", "bright", "noise"])
    new_lines = []
    
    if action == "h_flip":
        new_img = cv2.flip(img, 1)
        for line in lines:
            parts = line.strip().split()
            cls = parts[0]
            coords = [float(x) for x in parts[1:]]
            new_coords = []
            for i, val in enumerate(coords):
                if i % 2 == 0: new_coords.append(1.0 - val)
                else:          new_coords.append(val)       
            new_lines.append(f"{cls} " + " ".join([f"{c:.6f}" for c in new_coords]) + "\n")

    elif action == "v_flip":
        new_img = cv2.flip(img, 0)
        for line in lines:
            parts = line.strip().split()
            cls = parts[0]
            coords = [float(x) for x in parts[1:]]
            new_coords = []
            for i, val in enumerate(coords):
                if i % 2 == 0: new_coords.append(val)      
                else:          new_coords.append(1.0 - val) 
            new_lines.append(f"{cls} " + " ".join([f"{c:.6f}" for c in new_coords]) + "\n")

    elif action == "bright":
        beta = random.randint(-30, 30)
        new_img = cv2.convertScaleAbs(img, alpha=1.0, beta=beta)
        new_lines = lines 

    elif action == "noise":
       
        noise = np.random.normal(0, 15, img.shape)
        img_float = img.astype(np.float32) 
        new_img = np.clip(img_float + noise, 0, 255).astype(np.uint8)
        new_lines = lines 
        
    else: 
        new_img = img
        new_lines = lines

  
    cv2.imwrite(os.path.join(IMAGES_DIR, new_filename + ".jpg"), new_img)
    with open(os.path.join(LABELS_DIR, new_filename + ".txt"), 'w') as f:
        f.writelines(new_lines)

def main():
    print(" Loading Roboflow Dataset...")
    data_map = load_dataset()
    

    if sum(len(v) for v in data_map.values()) == 0:
        return

    print("\nCurrent Counts:")
    for cid in [0, 1, 2]:
        print(f"   - {CLASS_NAMES[cid]}: {len(data_map[cid])} images")
        
    print("\n Augmenting to 100 per class...")
    for cid in [0, 1, 2]:
        items = data_map[cid]
        current_count = len(items)
        needed = TARGET_PER_CLASS - current_count
        
        if needed > 0 and items:
            print(f"   -> Generating {needed} images for {CLASS_NAMES[cid]}...")
            for i in tqdm(range(needed)):
             
                src_img, src_lines = random.choice(items)
                augment_polygon(src_img, src_lines, f"aug_{cid}_{i}")
        elif needed <= 0:
             print(f"   -> {CLASS_NAMES[cid]} is already full ({current_count} images). Skipping.")

    print("\n dataset now has 300 balanced images.")

if __name__ == "__main__":
    main()