Spaces:

Goyamproject
/

React_native_app

Running

App Files Files Community

Charuka66 commited on Mar 5

Commit

28d91cf

verified ·

1 Parent(s): 5ae39ff

Upload auto_label.py

Browse files

label 45000 images using newely trained brain

Files changed (1) hide show

auto_label.py +100 -49

auto_label.py CHANGED Viewed

@@ -1,71 +1,122 @@
-from ultralytics import YOLO
 import os
 from tqdm import tqdm
-MODEL_PATH = r"C:\Users\charu\Desktop\all new\dataset_new\runs\detect\Teacher_v2_Detect\weights\best.pt"
-IMAGES_DIR = r"C:\Users\charu\Desktop\all new\40000\all_images"
-# 3. Output Folder for Labels
-LABELS_DIR = r"C:\Users\charu\Desktop\all new\40000\all_labels"
-CONF_THRESHOLD = 0.50
-BATCH_SIZE = 128
-# =================================================
-def auto_label_detect():
-    print(f" Loading Detection Teacher: {MODEL_PATH}")
-    try:
-        model = YOLO(MODEL_PATH)
-    except:
-        print(" Model not found! Please check the MODEL_PATH.")
-        return
-    print(f"🧠 Model Classes: {model.names}")
-    os.makedirs(LABELS_DIR, exist_ok=True)
-    image_files = [f for f in os.listdir(IMAGES_DIR) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
-    print(f"📂 Found {len(image_files)} images to label.")
-    for img_name in tqdm(image_files):
-        img_path = os.path.join(IMAGES_DIR, img_name)
-        txt_name = os.path.splitext(img_name)[0] + ".txt"
-        txt_path = os.path.join(LABELS_DIR, txt_name)
-        if os.path.exists(txt_path):
-            continue
-        results = model.predict(img_path, conf=CONF_THRESHOLD, verbose=False)
-        result = results[0]
-        label_lines = []
-        if len(result.boxes) > 0:
-            for box in result.boxes:
-                cls_id = int(box.cls[0])
-                x, y, w, h = box.xywhn[0].tolist()
-                line = f"{cls_id} {x:.6f} {y:.6f} {w:.6f} {h:.6f}"
-                label_lines.append(line)
-        with open(txt_path, "w") as f:
-            if label_lines:
-                f.write("\n".join(label_lines))
-            else:
-                pass
-    print("\n✅ Auto-Labeling Complete! You now have labels for your massive dataset.")
 if __name__ == "__main__":
-    auto_label_detect()

 import os
+import shutil
+import random
+from ultralytics import YOLO
 from tqdm import tqdm
+MODEL_PATH = r"C:\Users\charu\Documents\goyam\roboflow\runs\segment\yolo26_real_v1\weights\best.pt"
+INPUT_IMG_DIR = r"C:\Users\charu\Desktop\all new\40000\all_images"
+OUTPUT_DATASET_DIR = r"C:\Users\charu\Desktop\all new\40000\goyam_v2_dataset"
+CONF_THRESHOLD = 0.30
+SPLIT_RATIO = 0.85
+BATCH_SIZE = 16
+def setup_directories():
+    """Creates the YOLO standard folder structure."""
+    print("📁 Creating dataset directories...")
+    dirs = [
+        os.path.join(OUTPUT_DATASET_DIR, "images", "train"),
+        os.path.join(OUTPUT_DATASET_DIR, "images", "val"),
+        os.path.join(OUTPUT_DATASET_DIR, "labels", "train"),
+        os.path.join(OUTPUT_DATASET_DIR, "labels", "val")
+    ]
+    for d in dirs:
+        os.makedirs(d, exist_ok=True)
+def generate_yaml(model):
+    """Automatically creates the data.yaml file needed for the next training."""
+    yaml_path = os.path.join(OUTPUT_DATASET_DIR, "data.yaml")
+    names_dict = model.names
+    with open(yaml_path, "w") as f:
+        f.write(f"train: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'train')}\n")
+        f.write(f"val: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'val')}\n\n")
+        f.write(f"nc: {len(names_dict)}\n")
+        f.write(f"names: {list(names_dict.values())}\n")
+    print(f"Created data.yaml at {yaml_path}")
+def auto_label_and_split():
+    setup_directories()
+    print(f"Loading : {MODEL_PATH}")
+    model = YOLO(MODEL_PATH)
+    generate_yaml(model)
+    valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.webp')
+    all_images = [f for f in os.listdir(INPUT_IMG_DIR) if f.lower().endswith(valid_extensions)]
+    total_images = len(all_images)
+    print(f"Found {total_images} images. Shuffling and Splitting...")
+    random.shuffle(all_images)
+    split_idx = int(total_images * SPLIT_RATIO)
+    train_images = set(all_images[:split_idx])
+    print(f"Starting Auto-Labeling (Batch Size: {BATCH_SIZE})...")
+    results = model.predict(
+        source=INPUT_IMG_DIR,
+        stream=True,
+        batch=BATCH_SIZE,
+        conf=CONF_THRESHOLD,
+        verbose=False,
+        device="cuda:0"
+    )
+    for result in tqdm(results, total=total_images, desc="Labeling"):
+        img_path = result.path
+        filename = os.path.basename(img_path)
+        folder_type = "train" if filename in train_images else "val"
+        dest_img_path = os.path.join(OUTPUT_DATASET_DIR, "images", folder_type, filename)
+        txt_filename = os.path.splitext(filename)[0] + ".txt"
+        dest_txt_path = os.path.join(OUTPUT_DATASET_DIR, "labels", folder_type, txt_filename)
+        lines = []
+        if result.masks is not None and result.boxes is not None:
+            for i, polygon in enumerate(result.masks.xyn):
+                cls_id = int(result.boxes.cls[i].item())
+                coords = " ".join([f"{x:.6f} {y:.6f}" for x, y in polygon])
+                lines.append(f"{cls_id} {coords}")
+        with open(dest_txt_path, "w") as f:
+            f.write("\n".join(lines))
+        shutil.copy2(img_path, dest_img_path)
+    print("\n🎉 Auto-Labeling Complete!")
+    print(f"Dataset ready at: {OUTPUT_DATASET_DIR}")
+    print("You can now train your V2 model using the newly generated data.yaml!")
 if __name__ == "__main__":
+    auto_label_and_split()