Spaces:

nathbns
/

yolo1_from_scratch

Running

App Files Files Community

yolo1_from_scratch / app.py

nathbns

Update app.py

06178e6 verified 2 months ago

raw

history blame

10.5 kB

	import torch
	import gradio as gr
	import numpy as np
	from PIL import Image
	import torchvision.transforms as transforms
	from model import Yolo_V1
	from utils import cellboxes_to_boxes, non_max_suppression
	import cv2
	import os
	import glob
	import time
	from huggingface_hub import hf_hub_download

	# Classes PASCAL VOC
	CLASSES = [
	"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
	"chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
	"pottedplant", "sheep", "sofa", "train", "tvmonitor"
	]

	np.random.seed(42)
	COLORS = np.random.randint(50, 255, size=(len(CLASSES), 3), dtype=np.uint8)

	DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
	MODEL_REPO_ID = "nathbns/yolov1_from_scratch"
	MODEL_FILENAME = "checkpoint_epoch_50.pth.tar"

	# Charger le modèle depuis Hugging Face Hub
	print(f"Chargement du modèle depuis {MODEL_REPO_ID}...")
	try:
	model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME)
	print(f"Modèle téléchargé depuis Hugging Face Hub: {model_path}")
	except Exception as e:
	print(f"Erreur lors du téléchargement: {e}")
	print("Tentative de chargement local...")
	model_path = MODEL_FILENAME

	model = Yolo_V1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
	checkpoint = torch.load(model_path, map_location=DEVICE)
	model.load_state_dict(checkpoint["state_dict"])
	model.eval()
	print(f"Modèle chargé avec succès!")

	# Info sur le modèle
	MODEL_INFO = {
	"mAP": checkpoint.get("mAP", "N/A"),
	"epoch": checkpoint.get("epoch", "N/A"),
	"device": DEVICE,
	"classes": len(CLASSES)
	}
	print(f"entraînement: {MODEL_INFO['mAP']}")
	print(f"Device: {DEVICE}")

	# Charger des images d'exemple depuis le dossier data
	EXAMPLE_IMAGES = []
	if os.path.exists("data/images"):
	image_files = glob.glob("data/images/*.jpg")[:20] # Prendre 20 images
	EXAMPLE_IMAGES = sorted(image_files)
	print(f"{len(EXAMPLE_IMAGES)} images d'exemple chargées")

	def draw_boxes(image, boxes):
	"""Dessine les bounding boxes sur l'image"""
	img_array = np.array(image)
	height, width = img_array.shape[:2]

	for box in boxes:
	# box format: [class_pred, prob_score, x, y, width, height]
	class_pred = int(box[0])
	confidence = float(box[1])
	x_center, y_center, box_width, box_height = box[2:6]

	# Convertir de coordonnées normalisées à pixels
	x1 = int((x_center - box_width / 2) * width)
	y1 = int((y_center - box_height / 2) * height)
	x2 = int((x_center + box_width / 2) * width)
	y2 = int((y_center + box_height / 2) * height)

	# Couleur de la classe
	color = tuple(int(c) for c in COLORS[class_pred])

	# Dessiner le rectangle
	cv2.rectangle(img_array, (x1, y1), (x2, y2), color, 2)

	# Texte
	label = f"{CLASSES[class_pred]}: {confidence:.2f}"

	# Fond du texte
	(text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	cv2.rectangle(img_array, (x1, y1 - text_height - 5), (x1 + text_width, y1), color, -1)

	# Texte blanc
	cv2.putText(img_array, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

	return Image.fromarray(img_array)

	def detect_objects(image, confidence_threshold, iou_threshold, show_confidence=True):
	"""Détecte les objets dans une image avec statistiques détaillées"""
	if image is None:
	return None, None, "Veuillez uploader ou sélectionner une image"

	start_time = time.time()

	# Prétraiter l'image
	transform = transforms.Compose([
	transforms.Resize((448, 448)),
	transforms.ToTensor(),
	])

	# Garder l'image originale pour l'affichage
	original_image = image.copy()
	original_size = image.size # (width, height)

	# Transformer l'image
	img_tensor = transform(image).unsqueeze(0).to(DEVICE)

	# Prédiction
	with torch.no_grad():
	predictions = model(img_tensor)

	# Convertir les prédictions en bounding boxes
	bboxes = cellboxes_to_boxes(predictions)

	# Non-maximum suppression
	nms_boxes = non_max_suppression(
	bboxes[0],
	iou_threshold=iou_threshold,
	threshold=confidence_threshold,
	box_format="midpoint"
	)

	inference_time = time.time() - start_time

	# Dessiner les boxes
	result_image = draw_boxes(original_image, nms_boxes)

	# Statistiques détaillées
	num_detections = len(nms_boxes)
	detected_classes = [CLASSES[int(box[0])] for box in nms_boxes]
	class_counts = {}
	confidence_scores = []

	for box in nms_boxes:
	cls = CLASSES[int(box[0])]
	conf = float(box[1])
	class_counts[cls] = class_counts.get(cls, 0) + 1
	confidence_scores.append(conf)

	# Créer un rapport détaillé
	stats = f"##Résultats de détection\n\n"
	stats += f"{num_detections} objet(s) détecté(s)\n\n"

	if num_detections > 0:
	stats += f"Temps d'inférence: {inference_time:.3f}s\n"
	stats += f"Taille image: {original_size[0]}x{original_size[1]}\n"
	stats += f"Confiance moyenne: {np.mean(confidence_scores):.2%}\n\n"

	stats += "### Objets détectés:\n\n"
	for cls, count in sorted(class_counts.items(), key=lambda x: x[1], reverse=True):
	stats += f"- {cls}: {count}\n"

	if show_confidence:
	stats += "\n### Confiances individuelles:\n\n"
	for i, box in enumerate(nms_boxes[:10], 1): # Top 10
	cls = CLASSES[int(box[0])]
	conf = float(box[1])
	stats += f"{i}. {cls}: {conf:.1%}\n"
	if len(nms_boxes) > 10:
	stats += f"\n...et {len(nms_boxes)-10} détection(s) de plus\n"
	else:
	stats += "Aucun objet détecté.\n\n"

	return original_image, result_image, stats

	# Interface Gradio améliorée
	with gr.Blocks(title="YOLO v1 - Détection d'objets", theme=gr.themes.Soft(), css="""
	.gradio-container {max-width: 1400px !important}
	.example-gallery {height: 400px; overflow-y: auto}
	""") as demo:

	# En-tête
	mAP_display = f"{MODEL_INFO['mAP']:.4f}" if isinstance(MODEL_INFO['mAP'], (int, float)) else MODEL_INFO['mAP']

	gr.Markdown(f"""
	# YOLO v1 - Détection d'objets en temps réel
	---
	""")

	with gr.Tabs():
	# Onglet principal - Détection
	with gr.Tab("Détection"):
	gr.Markdown("""
	### Uploadez votre image ou sélectionnez un exemple
	Classes PASCAL VOC : aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow,
	diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor
	""")

	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(type="pil", label="Image d'entrée")

	with gr.Accordion("Paramètres avancés", open=True):
	confidence_slider = gr.Slider(
	minimum=0.05,
	maximum=0.95,
	value=0.4,
	step=0.05,
	label="Seuil de confiance",
	info="Plus bas = plus de détections"
	)
	iou_slider = gr.Slider(
	minimum=0.1,
	maximum=0.9,
	value=0.5,
	step=0.05,
	label="Seuil",
	info="Plus haut = garde plus de boxes qui se chevauchent"
	)
	show_conf_check = gr.Checkbox(
	value=True,
	label="Afficher les confiances détaillées"
	)

	detect_btn = gr.Button("Détecter les objets", variant="primary", size="lg")


	with gr.Column(scale=2):
	with gr.Row():
	original_display = gr.Image(type="pil", label="Image originale")
	output_image = gr.Image(type="pil", label="Résultat avec détections")

	output_stats = gr.Markdown("Uploadez une image et cliquez sur 'Détecter' pour commencer !")

	# Galerie d'exemples
	if EXAMPLE_IMAGES:
	gr.Markdown("### Exemples (cliquez pour tester)")
	examples_list = [[img, 0.4, 0.5, True] for img in EXAMPLE_IMAGES[:12]]
	gr.Examples(
	examples=examples_list,
	inputs=[input_image, confidence_slider, iou_slider, show_conf_check],
	outputs=[original_display, output_image, output_stats],
	fn=detect_objects,
	cache_examples=False,
	examples_per_page=6,
	)

	# Actions
	detect_btn.click(
	fn=detect_objects,
	inputs=[input_image, confidence_slider, iou_slider, show_conf_check],
	outputs=[original_display, output_image, output_stats]
	)

	input_image.change(
	fn=detect_objects,
	inputs=[input_image, confidence_slider, iou_slider, show_conf_check],
	outputs=[original_display, output_image, output_stats]
	)

	# Onglet Info
	with gr.Tab("À propos"):
	mAP_info = f"{MODEL_INFO['mAP']:.4f}" if isinstance(MODEL_INFO['mAP'], (int, float)) else 'N/A'
	epoch_info = MODEL_INFO['epoch'] if MODEL_INFO['epoch'] != 'N/A' else 'N/A'

	# Lancer l'app
	if __name__ == "__main__":
	print("\n" + "="*60)
	print("Lancement de l'application Gradio YOLO v1")
	print("="*60)
	print(f"Modèle: {MODEL_PATH}")
	print(f"Device: {DEVICE}")
	print(f"Exemples chargés: {len(EXAMPLE_IMAGES)}")
	print("="*60 + "\n")

	demo.launch(
	share=True,
	server_name="0.0.0.0", # Accessible depuis le réseau local
	server_port=7860,
	show_error=True
	)