Spaces:

mametarow
/

booth-pic-api

Running

booth-pic-api / backend /scripts /auto_annotate.py

github-actions

Deploy to HF (clean history with LFS)

a06f06c 19 days ago

3.69 kB

	import os
	import random
	import glob
	import shutil
	from ultralytics import YOLO
	from pathlib import Path
	from tqdm import tqdm

	def find_best_model():
	base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	candidates = [
	os.path.join(base_dir, "runs", "detect", "train2", "weights", "best.pt"),
	os.path.join(base_dir, "..", "runs", "detect", "train2", "weights", "best.pt"),
	os.path.join(base_dir, "runs", "detect", "train", "weights", "best.pt"),
	os.path.join(base_dir, "..", "runs", "detect", "train", "weights", "best.pt"),
	]
	for path in candidates:
	if os.path.exists(path):
	return path
	return None

	def auto_annotate(num_samples=1000, confidence_threshold=0.6):
	backend_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

	model_path = find_best_model()
	if not model_path:
	print("Error: best.pt model not found! Please ensure it exists.")
	return

	print(f"Loading YOLO model from: {model_path}")
	model = YOLO(model_path)

	raw_images_dir = os.path.join(backend_dir, "scraper", "data", "raw_images")
	output_dataset_dir = os.path.join(backend_dir, "yolo_dataset", "auto_generated")
	images_out_dir = os.path.join(output_dataset_dir, "images")
	labels_out_dir = os.path.join(output_dataset_dir, "labels")

	os.makedirs(images_out_dir, exist_ok=True)
	os.makedirs(labels_out_dir, exist_ok=True)

	# Get all jpg images
	all_images = glob.glob(os.path.join(raw_images_dir, "*.jpg"))
	if not all_images:
	print(f"No images found in {raw_images_dir}")
	return

	print(f"Found {len(all_images)} raw images.")

	# Shuffle and pick num_samples
	random.shuffle(all_images)
	samples = all_images[:num_samples]

	print(f"Starting auto-annotation for {len(samples)} images...")

	successful_annotated = 0

	for img_path in tqdm(samples):
	filename = os.path.basename(img_path)
	img_name, ext = os.path.splitext(filename)

	try:
	results = model(img_path, conf=confidence_threshold, verbose=False)

	# If nothing was detected above threshold, skip saving this image
	# Or you might want to save it as empty background. For now, we only save if detected.
	has_detections = False
	label_lines = []

	for result in results:
	for box in result.boxes:
	cls_id = int(box.cls)
	# YOLO normalized coordinates: center_x center_y width height
	# xywhn is normalized 0-1
	x, y, w, h = box.xywhn[0]
	label_lines.append(f"{cls_id} {float(x)} {float(y)} {float(w)} {float(h)}")
	has_detections = True

	if has_detections:
	# 1. Copy image
	dest_img = os.path.join(images_out_dir, filename)
	shutil.copy2(img_path, dest_img)

	# 2. Save label
	label_path = os.path.join(labels_out_dir, f"{img_name}.txt")
	with open(label_path, "w", encoding="utf-8") as f:
	f.write("\n".join(label_lines) + "\n")

	successful_annotated += 1

	except Exception as e:
	print(f"Error processing {filename}: {e}")

	print(f"Auto-annotation complete! {successful_annotated} images successfully extracted to {output_dataset_dir}")

	if __name__ == "__main__":
	# Test run with 1000 images
	auto_annotate(num_samples=1000, confidence_threshold=0.6)