Spaces:

Goyamproject
/

React_native_app

Sleeping

File size: 3,665 Bytes

f89eb88
28d91cf
 
 
f89eb88
 
28d91cf
 
 
 
 
 
 
 
f89eb88
 
28d91cf
 
 
f89eb88
 
28d91cf
 
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
f89eb88
28d91cf
f89eb88
28d91cf
 
f89eb88
28d91cf
f89eb88
 
28d91cf
 
 
 
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
 
 
 
 
 
 
f89eb88
 
28d91cf
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
f89eb88
 
28d91cf

import os
import shutil
import random
from ultralytics import YOLO
from tqdm import tqdm


MODEL_PATH = r"C:\Users\charu\Documents\goyam\roboflow\runs\segment\yolo26_real_v1\weights\best.pt"


INPUT_IMG_DIR = r"C:\Users\charu\Desktop\all new\40000\all_images"


OUTPUT_DATASET_DIR = r"C:\Users\charu\Desktop\all new\40000\goyam_v2_dataset"


CONF_THRESHOLD = 0.30 
SPLIT_RATIO = 0.85    
BATCH_SIZE = 16       


def setup_directories():
    """Creates the YOLO standard folder structure."""
    print("📁 Creating dataset directories...")
    dirs = [
        os.path.join(OUTPUT_DATASET_DIR, "images", "train"),
        os.path.join(OUTPUT_DATASET_DIR, "images", "val"),
        os.path.join(OUTPUT_DATASET_DIR, "labels", "train"),
        os.path.join(OUTPUT_DATASET_DIR, "labels", "val")
    ]
    for d in dirs:
        os.makedirs(d, exist_ok=True)

def generate_yaml(model):
    """Automatically creates the data.yaml file needed for the next training."""
    yaml_path = os.path.join(OUTPUT_DATASET_DIR, "data.yaml")
  
    names_dict = model.names
    
    with open(yaml_path, "w") as f:
        f.write(f"train: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'train')}\n")
        f.write(f"val: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'val')}\n\n")
        f.write(f"nc: {len(names_dict)}\n")
        f.write(f"names: {list(names_dict.values())}\n")
    
    print(f"Created data.yaml at {yaml_path}")

def auto_label_and_split():
  
    setup_directories()
    
    print(f"Loading : {MODEL_PATH}")
    model = YOLO(MODEL_PATH)
    
    generate_yaml(model)


    valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.webp')
    all_images = [f for f in os.listdir(INPUT_IMG_DIR) if f.lower().endswith(valid_extensions)]
    
    total_images = len(all_images)
    print(f"Found {total_images} images. Shuffling and Splitting...")
    
    
    random.shuffle(all_images)
    
  
    split_idx = int(total_images * SPLIT_RATIO)
    train_images = set(all_images[:split_idx])
   

   
    print(f"Starting Auto-Labeling (Batch Size: {BATCH_SIZE})...")
    
    results = model.predict(
        source=INPUT_IMG_DIR, 
        stream=True, 
        batch=BATCH_SIZE, 
        conf=CONF_THRESHOLD, 
        verbose=False,
        device="cuda:0"
    )

   
    for result in tqdm(results, total=total_images, desc="Labeling"):
        img_path = result.path
        filename = os.path.basename(img_path)
        
     
        folder_type = "train" if filename in train_images else "val"
        
        dest_img_path = os.path.join(OUTPUT_DATASET_DIR, "images", folder_type, filename)
        
    
        txt_filename = os.path.splitext(filename)[0] + ".txt"
        dest_txt_path = os.path.join(OUTPUT_DATASET_DIR, "labels", folder_type, txt_filename)
        
     
        lines = []
  
        if result.masks is not None and result.boxes is not None:
  
            for i, polygon in enumerate(result.masks.xyn):
                cls_id = int(result.boxes.cls[i].item())
                
             
                coords = " ".join([f"{x:.6f} {y:.6f}" for x, y in polygon])
                lines.append(f"{cls_id} {coords}")
        
      
        with open(dest_txt_path, "w") as f:
            f.write("\n".join(lines))
            
  
        shutil.copy2(img_path, dest_img_path)

    print("\n🎉 Auto-Labeling Complete!")
    print(f"Dataset ready at: {OUTPUT_DATASET_DIR}")
    print("You can now train your V2 model using the newly generated data.yaml!")

if __name__ == "__main__":
    auto_label_and_split()