File size: 3,665 Bytes
f89eb88
28d91cf
 
 
f89eb88
 
28d91cf
 
 
 
 
 
 
 
f89eb88
 
28d91cf
 
 
f89eb88
 
28d91cf
 
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
f89eb88
28d91cf
f89eb88
28d91cf
 
f89eb88
28d91cf
f89eb88
 
28d91cf
 
 
 
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
 
 
 
 
 
 
f89eb88
 
28d91cf
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
 
 
 
 
 
 
 
f89eb88
28d91cf
 
 
f89eb88
 
28d91cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
import shutil
import random
from ultralytics import YOLO
from tqdm import tqdm


MODEL_PATH = r"C:\Users\charu\Documents\goyam\roboflow\runs\segment\yolo26_real_v1\weights\best.pt"


INPUT_IMG_DIR = r"C:\Users\charu\Desktop\all new\40000\all_images"


OUTPUT_DATASET_DIR = r"C:\Users\charu\Desktop\all new\40000\goyam_v2_dataset"


CONF_THRESHOLD = 0.30 
SPLIT_RATIO = 0.85    
BATCH_SIZE = 16       


def setup_directories():
    """Creates the YOLO standard folder structure."""
    print("๐Ÿ“ Creating dataset directories...")
    dirs = [
        os.path.join(OUTPUT_DATASET_DIR, "images", "train"),
        os.path.join(OUTPUT_DATASET_DIR, "images", "val"),
        os.path.join(OUTPUT_DATASET_DIR, "labels", "train"),
        os.path.join(OUTPUT_DATASET_DIR, "labels", "val")
    ]
    for d in dirs:
        os.makedirs(d, exist_ok=True)

def generate_yaml(model):
    """Automatically creates the data.yaml file needed for the next training."""
    yaml_path = os.path.join(OUTPUT_DATASET_DIR, "data.yaml")
  
    names_dict = model.names
    
    with open(yaml_path, "w") as f:
        f.write(f"train: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'train')}\n")
        f.write(f"val: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'val')}\n\n")
        f.write(f"nc: {len(names_dict)}\n")
        f.write(f"names: {list(names_dict.values())}\n")
    
    print(f"Created data.yaml at {yaml_path}")

def auto_label_and_split():
  
    setup_directories()
    
    print(f"Loading : {MODEL_PATH}")
    model = YOLO(MODEL_PATH)
    
    generate_yaml(model)


    valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.webp')
    all_images = [f for f in os.listdir(INPUT_IMG_DIR) if f.lower().endswith(valid_extensions)]
    
    total_images = len(all_images)
    print(f"Found {total_images} images. Shuffling and Splitting...")
    
    
    random.shuffle(all_images)
    
  
    split_idx = int(total_images * SPLIT_RATIO)
    train_images = set(all_images[:split_idx])
   

   
    print(f"Starting Auto-Labeling (Batch Size: {BATCH_SIZE})...")
    
    results = model.predict(
        source=INPUT_IMG_DIR, 
        stream=True, 
        batch=BATCH_SIZE, 
        conf=CONF_THRESHOLD, 
        verbose=False,
        device="cuda:0"
    )

   
    for result in tqdm(results, total=total_images, desc="Labeling"):
        img_path = result.path
        filename = os.path.basename(img_path)
        
     
        folder_type = "train" if filename in train_images else "val"
        
        dest_img_path = os.path.join(OUTPUT_DATASET_DIR, "images", folder_type, filename)
        
    
        txt_filename = os.path.splitext(filename)[0] + ".txt"
        dest_txt_path = os.path.join(OUTPUT_DATASET_DIR, "labels", folder_type, txt_filename)
        
     
        lines = []
  
        if result.masks is not None and result.boxes is not None:
  
            for i, polygon in enumerate(result.masks.xyn):
                cls_id = int(result.boxes.cls[i].item())
                
             
                coords = " ".join([f"{x:.6f} {y:.6f}" for x, y in polygon])
                lines.append(f"{cls_id} {coords}")
        
      
        with open(dest_txt_path, "w") as f:
            f.write("\n".join(lines))
            
  
        shutil.copy2(img_path, dest_img_path)

    print("\n๐ŸŽ‰ Auto-Labeling Complete!")
    print(f"Dataset ready at: {OUTPUT_DATASET_DIR}")
    print("You can now train your V2 model using the newly generated data.yaml!")

if __name__ == "__main__":
    auto_label_and_split()