import gradio as gr
from ultralytics import YOLO
import numpy as np
import cv2

detector = YOLO('best.pt')

def get_inpaint_bboxes(xyxy, img):
    """
    Algoritmo di segmentazione del testo ispirato a comic-translate.
    Trova le aree di testo dentro il balloon usando tecniche avanzate.
    """
    x1, y1, x2, y2 = [int(coord) for coord in xyxy]
    
    # Estrai regione balloon
    h, w = img.shape[:2]
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(w, x2), min(h, y2)
    
    if x2 <= x1 or y2 <= y1:
        return None
    
    balloon_region = img[y1:y2, x1:x2].copy()
    region_h, region_w = balloon_region.shape[:2]
    
    # Converti in grayscale
    if len(balloon_region.shape) == 3:
        gray = cv2.cvtColor(balloon_region, cv2.COLOR_RGB2GRAY)
    else:
        gray = balloon_region
    
    # Step 1: Equalizzazione istogramma per migliorare contrasto
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(gray)
    
    # Step 2: Threshold adattivo (migliore per testo su sfondi variabili)
    binary = cv2.adaptiveThreshold(
        enhanced, 255, 
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY_INV,  # Inverso: testo bianco, sfondo nero
        blockSize=15,  # Dimensione blocco locale
        C=10  # Costante sottratta dalla media
    )
    
    # Step 3: Rimuovi rumore con morphological operations
    # Kernel verticale per connettere parti di caratteri
    kernel_v = np.ones((2, 1), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_v)
    
    # Kernel orizzontale per connettere caratteri vicini
    kernel_h = np.ones((1, 3), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_h)
    
    # Dilata leggermente per unire caratteri di una parola
    kernel_dilate = np.ones((3, 3), np.uint8)
    dilated = cv2.dilate(binary, kernel_dilate, iterations=1)
    
    # Step 4: Trova contorni delle aree di testo
    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Step 5: Filtra contorni e crea bounding box
    text_bboxes = []
    mask = np.zeros((region_h, region_w), dtype=np.uint8)
    
    for contour in contours:
        area = cv2.contourArea(contour)
        
        # Calcola aspect ratio per filtrare meglio
        rect = cv2.boundingRect(contour)
        rx, ry, rw, rh = rect
        aspect_ratio = rw / float(rh) if rh > 0 else 0
        
        # Filtra: area minima, dimensioni ragionevoli, aspect ratio valido
        if (area > 50 and  # Area minima
            rw > 3 and rh > 3 and  # Dimensioni minime
            rw < region_w * 0.95 and rh < region_h * 0.95 and  # Non troppo grande
            aspect_ratio > 0.1 and aspect_ratio < 15):  # Aspect ratio ragionevole
            
            # Disegna il contorno riempito sulla mask
            cv2.drawContours(mask, [contour], -1, 255, -1)
            
            # Salva bounding box in coordinate assolute
            text_bboxes.append({
                'x1': x1 + rx,
                'y1': y1 + ry,
                'x2': x1 + rx + rw,
                'y2': y1 + ry + rh,
                'area': area
            })
    
    if not text_bboxes:
        return None
    
    # Step 6: Espandi leggermente le mask per catturare anti-aliasing del testo
    kernel_expand = np.ones((3, 3), np.uint8)
    mask = cv2.dilate(mask, kernel_expand, iterations=1)
    
    # Crea mask full-size
    full_mask = np.zeros((h, w), dtype=np.uint8)
    full_mask[y1:y2, x1:x2] = mask
    
    return full_mask, text_bboxes

def detect_and_segment_balloons(image, confidence):
    if image is None:
        return None, {"error": "Nessuna immagine"}
    
    print(f"Rilevamento con confidenza: {confidence}")
    
    # Detection
    detection_results = detector(image, conf=confidence, verbose=False)
    
    output = {'num_balloons': 0, 'detections': []}
    
    if detection_results[0].boxes is None or len(detection_results[0].boxes) == 0:
        print("Nessun balloon rilevato")
        return image, output
    
    output['num_balloons'] = len(detection_results[0].boxes)
    print(f"Trovati {output['num_balloons']} balloon")
    
    annotated = image.copy()
    h, w = image.shape[:2]
    
    for i in range(len(detection_results[0].boxes)):
        box = detection_results[0].boxes.xyxy[i].cpu().numpy()
        conf = float(detection_results[0].boxes.conf[i].cpu().numpy())
        
        print(f"Segmentazione balloon {i+1}...")
        
        detection_data = {
            'balloon_id': i + 1,
            'balloon_box': {
                'x1': int(box[0]),
                'y1': int(box[1]),
                'x2': int(box[2]),
                'y2': int(box[3])
            },
            'confidence': round(conf, 3)
        }
        
        try:
            result = get_inpaint_bboxes(box, image)
            
            if result is not None:
                mask, text_bboxes = result
                
                if text_bboxes:
                    detection_data['text_regions'] = text_bboxes
                    detection_data['num_text_regions'] = len(text_bboxes)
                    
                    # Trova contorni dalla mask per visualizzazione
                    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                    
                    # Disegna contorni verdi sulla mask del testo
                    cv2.drawContours(annotated, contours, -1, (0, 255, 0), 2)
                    
                    # Disegna anche i singoli bounding box in blu
                    for tb in text_bboxes:
                        cv2.rectangle(annotated, 
                                    (tb['x1'], tb['y1']), 
                                    (tb['x2'], tb['y2']), 
                                    (255, 165, 0), 1)  # Arancione
                    
                    # Etichetta balloon
                    cv2.putText(annotated, f"B{i+1} ({len(text_bboxes)} txt)", 
                               (int(box[0]), int(box[1]) - 10), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                    
                    detection_data['has_segmentation'] = True
                    print(f"  ✅ {len(text_bboxes)} aree di testo segmentate")
                else:
                    detection_data['text_regions'] = []
                    detection_data['has_segmentation'] = False
                    print(f"  ⚠️ Nessun testo trovato")
            else:
                detection_data['text_regions'] = []
                detection_data['has_segmentation'] = False
                print(f"  ⚠️ Segmentazione fallita")
                
        except Exception as e:
            print(f"  ❌ Errore: {str(e)}")
            detection_data['text_regions'] = []
            detection_data['has_segmentation'] = False
            detection_data['error'] = str(e)
        
        output['detections'].append(detection_data)
    
    print("Completato!")
    return annotated, output

with gr.Blocks() as demo:
    gr.Markdown("# 🎈 Segmentazione Testo Balloons (Algorithm Comic-Translate)")
    gr.Markdown("**Usa threshold adattivo e morphological operations per segmentare testo preciso**")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="numpy", label="📷 Fumetto")
            confidence = gr.Slider(0.1, 1.0, 0.25, 0.05, label="🎯 Confidenza")
            segment_btn = gr.Button("✂️ Segmenta", variant="primary")
            
        with gr.Column():
            output_image = gr.Image(label="✅ Risultato")
            output_json = gr.JSON(label="📊 Dati")
    
    gr.Markdown("""
    ### 🔧 Algoritmo:
    1. **CLAHE**: Equalizzazione istogramma adattiva
    2. **Threshold Adattivo**: Funziona su sfondi variabili
    3. **Morphological Ops**: Connette caratteri e rimuove rumore
    4. **Filtraggio**: Area, aspect ratio, dimensioni
    
    ### 📖 Output:
    - **Verde**: Contorni mask del testo
    - **Arancione**: Bounding box individuali
    - **text_regions**: Coordinate per OCR/inpainting
    """)
    
    segment_btn.click(
        fn=detect_and_segment_balloons,
        inputs=[input_image, confidence],
        outputs=[output_image, output_json]
    )

demo.launch()