Spaces:
Sleeping
Sleeping
| import os | |
| import shutil | |
| import random | |
| from ultralytics import YOLO | |
| from tqdm import tqdm | |
| MODEL_PATH = r"C:\Users\charu\Documents\goyam\roboflow\runs\segment\yolo26_real_v1\weights\best.pt" | |
| INPUT_IMG_DIR = r"C:\Users\charu\Desktop\all new\40000\all_images" | |
| OUTPUT_DATASET_DIR = r"C:\Users\charu\Desktop\all new\40000\goyam_v2_dataset" | |
| CONF_THRESHOLD = 0.30 | |
| SPLIT_RATIO = 0.85 | |
| BATCH_SIZE = 16 | |
| def setup_directories(): | |
| """Creates the YOLO standard folder structure.""" | |
| print("π Creating dataset directories...") | |
| dirs = [ | |
| os.path.join(OUTPUT_DATASET_DIR, "images", "train"), | |
| os.path.join(OUTPUT_DATASET_DIR, "images", "val"), | |
| os.path.join(OUTPUT_DATASET_DIR, "labels", "train"), | |
| os.path.join(OUTPUT_DATASET_DIR, "labels", "val") | |
| ] | |
| for d in dirs: | |
| os.makedirs(d, exist_ok=True) | |
| def generate_yaml(model): | |
| """Automatically creates the data.yaml file needed for the next training.""" | |
| yaml_path = os.path.join(OUTPUT_DATASET_DIR, "data.yaml") | |
| names_dict = model.names | |
| with open(yaml_path, "w") as f: | |
| f.write(f"train: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'train')}\n") | |
| f.write(f"val: {os.path.join(OUTPUT_DATASET_DIR, 'images', 'val')}\n\n") | |
| f.write(f"nc: {len(names_dict)}\n") | |
| f.write(f"names: {list(names_dict.values())}\n") | |
| print(f"Created data.yaml at {yaml_path}") | |
| def auto_label_and_split(): | |
| setup_directories() | |
| print(f"Loading : {MODEL_PATH}") | |
| model = YOLO(MODEL_PATH) | |
| generate_yaml(model) | |
| valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.webp') | |
| all_images = [f for f in os.listdir(INPUT_IMG_DIR) if f.lower().endswith(valid_extensions)] | |
| total_images = len(all_images) | |
| print(f"Found {total_images} images. Shuffling and Splitting...") | |
| random.shuffle(all_images) | |
| split_idx = int(total_images * SPLIT_RATIO) | |
| train_images = set(all_images[:split_idx]) | |
| print(f"Starting Auto-Labeling (Batch Size: {BATCH_SIZE})...") | |
| results = model.predict( | |
| source=INPUT_IMG_DIR, | |
| stream=True, | |
| batch=BATCH_SIZE, | |
| conf=CONF_THRESHOLD, | |
| verbose=False, | |
| device="cuda:0" | |
| ) | |
| for result in tqdm(results, total=total_images, desc="Labeling"): | |
| img_path = result.path | |
| filename = os.path.basename(img_path) | |
| folder_type = "train" if filename in train_images else "val" | |
| dest_img_path = os.path.join(OUTPUT_DATASET_DIR, "images", folder_type, filename) | |
| txt_filename = os.path.splitext(filename)[0] + ".txt" | |
| dest_txt_path = os.path.join(OUTPUT_DATASET_DIR, "labels", folder_type, txt_filename) | |
| lines = [] | |
| if result.masks is not None and result.boxes is not None: | |
| for i, polygon in enumerate(result.masks.xyn): | |
| cls_id = int(result.boxes.cls[i].item()) | |
| coords = " ".join([f"{x:.6f} {y:.6f}" for x, y in polygon]) | |
| lines.append(f"{cls_id} {coords}") | |
| with open(dest_txt_path, "w") as f: | |
| f.write("\n".join(lines)) | |
| shutil.copy2(img_path, dest_img_path) | |
| print("\nπ Auto-Labeling Complete!") | |
| print(f"Dataset ready at: {OUTPUT_DATASET_DIR}") | |
| print("You can now train your V2 model using the newly generated data.yaml!") | |
| if __name__ == "__main__": | |
| auto_label_and_split() |