File size: 8,345 Bytes
2a569bb cddfd6c 2a569bb 4470ab3 958a84c cddfd6c 958a84c cddfd6c 958a84c cddfd6c 958a84c cddfd6c 958a84c cddfd6c 958a84c cddfd6c 958a84c cddfd6c 958a84c cddfd6c 958a84c cddfd6c 958a84c cddfd6c 958a84c a38cac9 958a84c cddfd6c 958a84c a38cac9 958a84c a38cac9 958a84c cddfd6c 958a84c 2a569bb a38cac9 2a569bb 958a84c 4470ab3 2a569bb 958a84c 2a569bb 4470ab3 a38cac9 4470ab3 958a84c 4470ab3 958a84c a38cac9 cddfd6c a38cac9 cddfd6c 4470ab3 a38cac9 958a84c 2a569bb 958a84c a38cac9 958a84c a38cac9 958a84c a38cac9 958a84c a38cac9 cddfd6c 2a569bb 958a84c 2a569bb 958a84c 2a569bb 958a84c a38cac9 958a84c 2a569bb cddfd6c 958a84c 2a569bb 958a84c 2a569bb 958a84c a38cac9 2a569bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
import gradio as gr
from ultralytics import YOLO
import numpy as np
import cv2
detector = YOLO('best.pt')
def get_inpaint_bboxes(xyxy, img):
"""
Algoritmo di segmentazione del testo ispirato a comic-translate.
Trova le aree di testo dentro il balloon usando tecniche avanzate.
"""
x1, y1, x2, y2 = [int(coord) for coord in xyxy]
# Estrai regione balloon
h, w = img.shape[:2]
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(w, x2), min(h, y2)
if x2 <= x1 or y2 <= y1:
return None
balloon_region = img[y1:y2, x1:x2].copy()
region_h, region_w = balloon_region.shape[:2]
# Converti in grayscale
if len(balloon_region.shape) == 3:
gray = cv2.cvtColor(balloon_region, cv2.COLOR_RGB2GRAY)
else:
gray = balloon_region
# Step 1: Equalizzazione istogramma per migliorare contrasto
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(gray)
# Step 2: Threshold adattivo (migliore per testo su sfondi variabili)
binary = cv2.adaptiveThreshold(
enhanced, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, # Inverso: testo bianco, sfondo nero
blockSize=15, # Dimensione blocco locale
C=10 # Costante sottratta dalla media
)
# Step 3: Rimuovi rumore con morphological operations
# Kernel verticale per connettere parti di caratteri
kernel_v = np.ones((2, 1), np.uint8)
binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_v)
# Kernel orizzontale per connettere caratteri vicini
kernel_h = np.ones((1, 3), np.uint8)
binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_h)
# Dilata leggermente per unire caratteri di una parola
kernel_dilate = np.ones((3, 3), np.uint8)
dilated = cv2.dilate(binary, kernel_dilate, iterations=1)
# Step 4: Trova contorni delle aree di testo
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Step 5: Filtra contorni e crea bounding box
text_bboxes = []
mask = np.zeros((region_h, region_w), dtype=np.uint8)
for contour in contours:
area = cv2.contourArea(contour)
# Calcola aspect ratio per filtrare meglio
rect = cv2.boundingRect(contour)
rx, ry, rw, rh = rect
aspect_ratio = rw / float(rh) if rh > 0 else 0
# Filtra: area minima, dimensioni ragionevoli, aspect ratio valido
if (area > 50 and # Area minima
rw > 3 and rh > 3 and # Dimensioni minime
rw < region_w * 0.95 and rh < region_h * 0.95 and # Non troppo grande
aspect_ratio > 0.1 and aspect_ratio < 15): # Aspect ratio ragionevole
# Disegna il contorno riempito sulla mask
cv2.drawContours(mask, [contour], -1, 255, -1)
# Salva bounding box in coordinate assolute
text_bboxes.append({
'x1': x1 + rx,
'y1': y1 + ry,
'x2': x1 + rx + rw,
'y2': y1 + ry + rh,
'area': area
})
if not text_bboxes:
return None
# Step 6: Espandi leggermente le mask per catturare anti-aliasing del testo
kernel_expand = np.ones((3, 3), np.uint8)
mask = cv2.dilate(mask, kernel_expand, iterations=1)
# Crea mask full-size
full_mask = np.zeros((h, w), dtype=np.uint8)
full_mask[y1:y2, x1:x2] = mask
return full_mask, text_bboxes
def detect_and_segment_balloons(image, confidence):
if image is None:
return None, {"error": "Nessuna immagine"}
print(f"Rilevamento con confidenza: {confidence}")
# Detection
detection_results = detector(image, conf=confidence, verbose=False)
output = {'num_balloons': 0, 'detections': []}
if detection_results[0].boxes is None or len(detection_results[0].boxes) == 0:
print("Nessun balloon rilevato")
return image, output
output['num_balloons'] = len(detection_results[0].boxes)
print(f"Trovati {output['num_balloons']} balloon")
annotated = image.copy()
h, w = image.shape[:2]
for i in range(len(detection_results[0].boxes)):
box = detection_results[0].boxes.xyxy[i].cpu().numpy()
conf = float(detection_results[0].boxes.conf[i].cpu().numpy())
print(f"Segmentazione balloon {i+1}...")
detection_data = {
'balloon_id': i + 1,
'balloon_box': {
'x1': int(box[0]),
'y1': int(box[1]),
'x2': int(box[2]),
'y2': int(box[3])
},
'confidence': round(conf, 3)
}
try:
result = get_inpaint_bboxes(box, image)
if result is not None:
mask, text_bboxes = result
if text_bboxes:
detection_data['text_regions'] = text_bboxes
detection_data['num_text_regions'] = len(text_bboxes)
# Trova contorni dalla mask per visualizzazione
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Disegna contorni verdi sulla mask del testo
cv2.drawContours(annotated, contours, -1, (0, 255, 0), 2)
# Disegna anche i singoli bounding box in blu
for tb in text_bboxes:
cv2.rectangle(annotated,
(tb['x1'], tb['y1']),
(tb['x2'], tb['y2']),
(255, 165, 0), 1) # Arancione
# Etichetta balloon
cv2.putText(annotated, f"B{i+1} ({len(text_bboxes)} txt)",
(int(box[0]), int(box[1]) - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
detection_data['has_segmentation'] = True
print(f" β
{len(text_bboxes)} aree di testo segmentate")
else:
detection_data['text_regions'] = []
detection_data['has_segmentation'] = False
print(f" β οΈ Nessun testo trovato")
else:
detection_data['text_regions'] = []
detection_data['has_segmentation'] = False
print(f" β οΈ Segmentazione fallita")
except Exception as e:
print(f" β Errore: {str(e)}")
detection_data['text_regions'] = []
detection_data['has_segmentation'] = False
detection_data['error'] = str(e)
output['detections'].append(detection_data)
print("Completato!")
return annotated, output
with gr.Blocks() as demo:
gr.Markdown("# π Segmentazione Testo Balloons (Algorithm Comic-Translate)")
gr.Markdown("**Usa threshold adattivo e morphological operations per segmentare testo preciso**")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="numpy", label="π· Fumetto")
confidence = gr.Slider(0.1, 1.0, 0.25, 0.05, label="π― Confidenza")
segment_btn = gr.Button("βοΈ Segmenta", variant="primary")
with gr.Column():
output_image = gr.Image(label="β
Risultato")
output_json = gr.JSON(label="π Dati")
gr.Markdown("""
### π§ Algoritmo:
1. **CLAHE**: Equalizzazione istogramma adattiva
2. **Threshold Adattivo**: Funziona su sfondi variabili
3. **Morphological Ops**: Connette caratteri e rimuove rumore
4. **Filtraggio**: Area, aspect ratio, dimensioni
### π Output:
- **Verde**: Contorni mask del testo
- **Arancione**: Bounding box individuali
- **text_regions**: Coordinate per OCR/inpainting
""")
segment_btn.click(
fn=detect_and_segment_balloons,
inputs=[input_image, confidence],
outputs=[output_image, output_json]
)
demo.launch()
|