Spaces:
Sleeping
Sleeping
File size: 3,665 Bytes
f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf f89eb88 28d91cf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | import os
import shutil
import random
from ultralytics import YOLO
from tqdm import tqdm
MODEL_PATH = r"C:\Users\charu\Documents\goyam\roboflow\runs\segment\yolo26_real_v1\weights\best.pt"
INPUT_IMG_DIR = r"C:\Users\charu\Desktop\all new\40000\all_images"
OUTPUT_DATASET_DIR = r"C:\Users\charu\Desktop\all new\40000\goyam_v2_dataset"
CONF_THRESHOLD = 0.30
SPLIT_RATIO = 0.85
BATCH_SIZE = 16
def setup_directories():
"""Creates the YOLO standard folder structure."""
print("๐ Creating dataset directories...")
dirs = [
os.path.join(OUTPUT_DATASET_DIR, "images", "train"),
os.path.join(OUTPUT_DATASET_DIR, "images", "val"),
os.path.join(OUTPUT_DATASET_DIR, "labels", "train"),
os.path.join(OUTPUT_DATASET_DIR, "labels", "val")
]
for d in dirs:
os.makedirs(d, exist_ok=True)
def generate_yaml(model):
"""Automatically creates the data.yaml file needed for the next training."""
yaml_path = os.path.join(OUTPUT_DATASET_DIR, "data.yaml")
names_dict = model.names
with open(yaml_path, "w") as f:
f.write(f"train: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'train')}\n")
f.write(f"val: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'val')}\n\n")
f.write(f"nc: {len(names_dict)}\n")
f.write(f"names: {list(names_dict.values())}\n")
print(f"Created data.yaml at {yaml_path}")
def auto_label_and_split():
setup_directories()
print(f"Loading : {MODEL_PATH}")
model = YOLO(MODEL_PATH)
generate_yaml(model)
valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.webp')
all_images = [f for f in os.listdir(INPUT_IMG_DIR) if f.lower().endswith(valid_extensions)]
total_images = len(all_images)
print(f"Found {total_images} images. Shuffling and Splitting...")
random.shuffle(all_images)
split_idx = int(total_images * SPLIT_RATIO)
train_images = set(all_images[:split_idx])
print(f"Starting Auto-Labeling (Batch Size: {BATCH_SIZE})...")
results = model.predict(
source=INPUT_IMG_DIR,
stream=True,
batch=BATCH_SIZE,
conf=CONF_THRESHOLD,
verbose=False,
device="cuda:0"
)
for result in tqdm(results, total=total_images, desc="Labeling"):
img_path = result.path
filename = os.path.basename(img_path)
folder_type = "train" if filename in train_images else "val"
dest_img_path = os.path.join(OUTPUT_DATASET_DIR, "images", folder_type, filename)
txt_filename = os.path.splitext(filename)[0] + ".txt"
dest_txt_path = os.path.join(OUTPUT_DATASET_DIR, "labels", folder_type, txt_filename)
lines = []
if result.masks is not None and result.boxes is not None:
for i, polygon in enumerate(result.masks.xyn):
cls_id = int(result.boxes.cls[i].item())
coords = " ".join([f"{x:.6f} {y:.6f}" for x, y in polygon])
lines.append(f"{cls_id} {coords}")
with open(dest_txt_path, "w") as f:
f.write("\n".join(lines))
shutil.copy2(img_path, dest_img_path)
print("\n๐ Auto-Labeling Complete!")
print(f"Dataset ready at: {OUTPUT_DATASET_DIR}")
print("You can now train your V2 model using the newly generated data.yaml!")
if __name__ == "__main__":
auto_label_and_split() |