Spaces:

Ronny56
/

yolo

Sleeping

App Files Files Community

yolo / app.py

Ronny56

Update app.py

958a84c verified about 2 months ago

raw

history blame contribute delete

8.35 kB

	import gradio as gr
	from ultralytics import YOLO
	import numpy as np
	import cv2

	detector = YOLO('best.pt')

	def get_inpaint_bboxes(xyxy, img):
	"""
	Algoritmo di segmentazione del testo ispirato a comic-translate.
	Trova le aree di testo dentro il balloon usando tecniche avanzate.
	"""
	x1, y1, x2, y2 = [int(coord) for coord in xyxy]

	# Estrai regione balloon
	h, w = img.shape[:2]
	x1, y1 = max(0, x1), max(0, y1)
	x2, y2 = min(w, x2), min(h, y2)

	if x2 <= x1 or y2 <= y1:
	return None

	balloon_region = img[y1:y2, x1:x2].copy()
	region_h, region_w = balloon_region.shape[:2]

	# Converti in grayscale
	if len(balloon_region.shape) == 3:
	gray = cv2.cvtColor(balloon_region, cv2.COLOR_RGB2GRAY)
	else:
	gray = balloon_region

	# Step 1: Equalizzazione istogramma per migliorare contrasto
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
	enhanced = clahe.apply(gray)

	# Step 2: Threshold adattivo (migliore per testo su sfondi variabili)
	binary = cv2.adaptiveThreshold(
	enhanced, 255,
	cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY_INV, # Inverso: testo bianco, sfondo nero
	blockSize=15, # Dimensione blocco locale
	C=10 # Costante sottratta dalla media
	)

	# Step 3: Rimuovi rumore con morphological operations
	# Kernel verticale per connettere parti di caratteri
	kernel_v = np.ones((2, 1), np.uint8)
	binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_v)

	# Kernel orizzontale per connettere caratteri vicini
	kernel_h = np.ones((1, 3), np.uint8)
	binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_h)

	# Dilata leggermente per unire caratteri di una parola
	kernel_dilate = np.ones((3, 3), np.uint8)
	dilated = cv2.dilate(binary, kernel_dilate, iterations=1)

	# Step 4: Trova contorni delle aree di testo
	contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	# Step 5: Filtra contorni e crea bounding box
	text_bboxes = []
	mask = np.zeros((region_h, region_w), dtype=np.uint8)

	for contour in contours:
	area = cv2.contourArea(contour)

	# Calcola aspect ratio per filtrare meglio
	rect = cv2.boundingRect(contour)
	rx, ry, rw, rh = rect
	aspect_ratio = rw / float(rh) if rh > 0 else 0

	# Filtra: area minima, dimensioni ragionevoli, aspect ratio valido
	if (area > 50 and # Area minima
	rw > 3 and rh > 3 and # Dimensioni minime
	rw < region_w * 0.95 and rh < region_h * 0.95 and # Non troppo grande
	aspect_ratio > 0.1 and aspect_ratio < 15): # Aspect ratio ragionevole

	# Disegna il contorno riempito sulla mask
	cv2.drawContours(mask, [contour], -1, 255, -1)

	# Salva bounding box in coordinate assolute
	text_bboxes.append({
	'x1': x1 + rx,
	'y1': y1 + ry,
	'x2': x1 + rx + rw,
	'y2': y1 + ry + rh,
	'area': area
	})

	if not text_bboxes:
	return None

	# Step 6: Espandi leggermente le mask per catturare anti-aliasing del testo
	kernel_expand = np.ones((3, 3), np.uint8)
	mask = cv2.dilate(mask, kernel_expand, iterations=1)

	# Crea mask full-size
	full_mask = np.zeros((h, w), dtype=np.uint8)
	full_mask[y1:y2, x1:x2] = mask

	return full_mask, text_bboxes

	def detect_and_segment_balloons(image, confidence):
	if image is None:
	return None, {"error": "Nessuna immagine"}

	print(f"Rilevamento con confidenza: {confidence}")

	# Detection
	detection_results = detector(image, conf=confidence, verbose=False)

	output = {'num_balloons': 0, 'detections': []}

	if detection_results[0].boxes is None or len(detection_results[0].boxes) == 0:
	print("Nessun balloon rilevato")
	return image, output

	output['num_balloons'] = len(detection_results[0].boxes)
	print(f"Trovati {output['num_balloons']} balloon")

	annotated = image.copy()
	h, w = image.shape[:2]

	for i in range(len(detection_results[0].boxes)):
	box = detection_results[0].boxes.xyxy[i].cpu().numpy()
	conf = float(detection_results[0].boxes.conf[i].cpu().numpy())

	print(f"Segmentazione balloon {i+1}...")

	detection_data = {
	'balloon_id': i + 1,
	'balloon_box': {
	'x1': int(box[0]),
	'y1': int(box[1]),
	'x2': int(box[2]),
	'y2': int(box[3])
	},
	'confidence': round(conf, 3)
	}

	try:
	result = get_inpaint_bboxes(box, image)

	if result is not None:
	mask, text_bboxes = result

	if text_bboxes:
	detection_data['text_regions'] = text_bboxes
	detection_data['num_text_regions'] = len(text_bboxes)

	# Trova contorni dalla mask per visualizzazione
	contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	# Disegna contorni verdi sulla mask del testo
	cv2.drawContours(annotated, contours, -1, (0, 255, 0), 2)

	# Disegna anche i singoli bounding box in blu
	for tb in text_bboxes:
	cv2.rectangle(annotated,
	(tb['x1'], tb['y1']),
	(tb['x2'], tb['y2']),
	(255, 165, 0), 1) # Arancione

	# Etichetta balloon
	cv2.putText(annotated, f"B{i+1} ({len(text_bboxes)} txt)",
	(int(box[0]), int(box[1]) - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	detection_data['has_segmentation'] = True
	print(f" ✅ {len(text_bboxes)} aree di testo segmentate")
	else:
	detection_data['text_regions'] = []
	detection_data['has_segmentation'] = False
	print(f" ⚠️ Nessun testo trovato")
	else:
	detection_data['text_regions'] = []
	detection_data['has_segmentation'] = False
	print(f" ⚠️ Segmentazione fallita")

	except Exception as e:
	print(f" ❌ Errore: {str(e)}")
	detection_data['text_regions'] = []
	detection_data['has_segmentation'] = False
	detection_data['error'] = str(e)

	output['detections'].append(detection_data)

	print("Completato!")
	return annotated, output

	with gr.Blocks() as demo:
	gr.Markdown("# 🎈 Segmentazione Testo Balloons (Algorithm Comic-Translate)")
	gr.Markdown("Usa threshold adattivo e morphological operations per segmentare testo preciso")

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="numpy", label="📷 Fumetto")
	confidence = gr.Slider(0.1, 1.0, 0.25, 0.05, label="🎯 Confidenza")
	segment_btn = gr.Button("✂️ Segmenta", variant="primary")

	with gr.Column():
	output_image = gr.Image(label="✅ Risultato")
	output_json = gr.JSON(label="📊 Dati")

	gr.Markdown("""
	### 🔧 Algoritmo:
	1. CLAHE: Equalizzazione istogramma adattiva
	2. Threshold Adattivo: Funziona su sfondi variabili
	3. Morphological Ops: Connette caratteri e rimuove rumore
	4. Filtraggio: Area, aspect ratio, dimensioni

	### 📖 Output:
	- Verde: Contorni mask del testo
	- Arancione: Bounding box individuali
	- text_regions: Coordinate per OCR/inpainting
	""")

	segment_btn.click(
	fn=detect_and_segment_balloons,
	inputs=[input_image, confidence],
	outputs=[output_image, output_json]
	)

	demo.launch()