|
|
import gradio as gr |
|
|
from ultralytics import YOLO |
|
|
import numpy as np |
|
|
import cv2 |
|
|
|
|
|
detector = YOLO('best.pt') |
|
|
|
|
|
def get_inpaint_bboxes(xyxy, img): |
|
|
""" |
|
|
Algoritmo di segmentazione del testo ispirato a comic-translate. |
|
|
Trova le aree di testo dentro il balloon usando tecniche avanzate. |
|
|
""" |
|
|
x1, y1, x2, y2 = [int(coord) for coord in xyxy] |
|
|
|
|
|
|
|
|
h, w = img.shape[:2] |
|
|
x1, y1 = max(0, x1), max(0, y1) |
|
|
x2, y2 = min(w, x2), min(h, y2) |
|
|
|
|
|
if x2 <= x1 or y2 <= y1: |
|
|
return None |
|
|
|
|
|
balloon_region = img[y1:y2, x1:x2].copy() |
|
|
region_h, region_w = balloon_region.shape[:2] |
|
|
|
|
|
|
|
|
if len(balloon_region.shape) == 3: |
|
|
gray = cv2.cvtColor(balloon_region, cv2.COLOR_RGB2GRAY) |
|
|
else: |
|
|
gray = balloon_region |
|
|
|
|
|
|
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) |
|
|
enhanced = clahe.apply(gray) |
|
|
|
|
|
|
|
|
binary = cv2.adaptiveThreshold( |
|
|
enhanced, 255, |
|
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, |
|
|
cv2.THRESH_BINARY_INV, |
|
|
blockSize=15, |
|
|
C=10 |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
kernel_v = np.ones((2, 1), np.uint8) |
|
|
binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_v) |
|
|
|
|
|
|
|
|
kernel_h = np.ones((1, 3), np.uint8) |
|
|
binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_h) |
|
|
|
|
|
|
|
|
kernel_dilate = np.ones((3, 3), np.uint8) |
|
|
dilated = cv2.dilate(binary, kernel_dilate, iterations=1) |
|
|
|
|
|
|
|
|
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
|
|
|
|
|
text_bboxes = [] |
|
|
mask = np.zeros((region_h, region_w), dtype=np.uint8) |
|
|
|
|
|
for contour in contours: |
|
|
area = cv2.contourArea(contour) |
|
|
|
|
|
|
|
|
rect = cv2.boundingRect(contour) |
|
|
rx, ry, rw, rh = rect |
|
|
aspect_ratio = rw / float(rh) if rh > 0 else 0 |
|
|
|
|
|
|
|
|
if (area > 50 and |
|
|
rw > 3 and rh > 3 and |
|
|
rw < region_w * 0.95 and rh < region_h * 0.95 and |
|
|
aspect_ratio > 0.1 and aspect_ratio < 15): |
|
|
|
|
|
|
|
|
cv2.drawContours(mask, [contour], -1, 255, -1) |
|
|
|
|
|
|
|
|
text_bboxes.append({ |
|
|
'x1': x1 + rx, |
|
|
'y1': y1 + ry, |
|
|
'x2': x1 + rx + rw, |
|
|
'y2': y1 + ry + rh, |
|
|
'area': area |
|
|
}) |
|
|
|
|
|
if not text_bboxes: |
|
|
return None |
|
|
|
|
|
|
|
|
kernel_expand = np.ones((3, 3), np.uint8) |
|
|
mask = cv2.dilate(mask, kernel_expand, iterations=1) |
|
|
|
|
|
|
|
|
full_mask = np.zeros((h, w), dtype=np.uint8) |
|
|
full_mask[y1:y2, x1:x2] = mask |
|
|
|
|
|
return full_mask, text_bboxes |
|
|
|
|
|
def detect_and_segment_balloons(image, confidence): |
|
|
if image is None: |
|
|
return None, {"error": "Nessuna immagine"} |
|
|
|
|
|
print(f"Rilevamento con confidenza: {confidence}") |
|
|
|
|
|
|
|
|
detection_results = detector(image, conf=confidence, verbose=False) |
|
|
|
|
|
output = {'num_balloons': 0, 'detections': []} |
|
|
|
|
|
if detection_results[0].boxes is None or len(detection_results[0].boxes) == 0: |
|
|
print("Nessun balloon rilevato") |
|
|
return image, output |
|
|
|
|
|
output['num_balloons'] = len(detection_results[0].boxes) |
|
|
print(f"Trovati {output['num_balloons']} balloon") |
|
|
|
|
|
annotated = image.copy() |
|
|
h, w = image.shape[:2] |
|
|
|
|
|
for i in range(len(detection_results[0].boxes)): |
|
|
box = detection_results[0].boxes.xyxy[i].cpu().numpy() |
|
|
conf = float(detection_results[0].boxes.conf[i].cpu().numpy()) |
|
|
|
|
|
print(f"Segmentazione balloon {i+1}...") |
|
|
|
|
|
detection_data = { |
|
|
'balloon_id': i + 1, |
|
|
'balloon_box': { |
|
|
'x1': int(box[0]), |
|
|
'y1': int(box[1]), |
|
|
'x2': int(box[2]), |
|
|
'y2': int(box[3]) |
|
|
}, |
|
|
'confidence': round(conf, 3) |
|
|
} |
|
|
|
|
|
try: |
|
|
result = get_inpaint_bboxes(box, image) |
|
|
|
|
|
if result is not None: |
|
|
mask, text_bboxes = result |
|
|
|
|
|
if text_bboxes: |
|
|
detection_data['text_regions'] = text_bboxes |
|
|
detection_data['num_text_regions'] = len(text_bboxes) |
|
|
|
|
|
|
|
|
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
|
|
|
|
|
cv2.drawContours(annotated, contours, -1, (0, 255, 0), 2) |
|
|
|
|
|
|
|
|
for tb in text_bboxes: |
|
|
cv2.rectangle(annotated, |
|
|
(tb['x1'], tb['y1']), |
|
|
(tb['x2'], tb['y2']), |
|
|
(255, 165, 0), 1) |
|
|
|
|
|
|
|
|
cv2.putText(annotated, f"B{i+1} ({len(text_bboxes)} txt)", |
|
|
(int(box[0]), int(box[1]) - 10), |
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) |
|
|
|
|
|
detection_data['has_segmentation'] = True |
|
|
print(f" β
{len(text_bboxes)} aree di testo segmentate") |
|
|
else: |
|
|
detection_data['text_regions'] = [] |
|
|
detection_data['has_segmentation'] = False |
|
|
print(f" β οΈ Nessun testo trovato") |
|
|
else: |
|
|
detection_data['text_regions'] = [] |
|
|
detection_data['has_segmentation'] = False |
|
|
print(f" β οΈ Segmentazione fallita") |
|
|
|
|
|
except Exception as e: |
|
|
print(f" β Errore: {str(e)}") |
|
|
detection_data['text_regions'] = [] |
|
|
detection_data['has_segmentation'] = False |
|
|
detection_data['error'] = str(e) |
|
|
|
|
|
output['detections'].append(detection_data) |
|
|
|
|
|
print("Completato!") |
|
|
return annotated, output |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# π Segmentazione Testo Balloons (Algorithm Comic-Translate)") |
|
|
gr.Markdown("**Usa threshold adattivo e morphological operations per segmentare testo preciso**") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_image = gr.Image(type="numpy", label="π· Fumetto") |
|
|
confidence = gr.Slider(0.1, 1.0, 0.25, 0.05, label="π― Confidenza") |
|
|
segment_btn = gr.Button("βοΈ Segmenta", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
output_image = gr.Image(label="β
Risultato") |
|
|
output_json = gr.JSON(label="π Dati") |
|
|
|
|
|
gr.Markdown(""" |
|
|
### π§ Algoritmo: |
|
|
1. **CLAHE**: Equalizzazione istogramma adattiva |
|
|
2. **Threshold Adattivo**: Funziona su sfondi variabili |
|
|
3. **Morphological Ops**: Connette caratteri e rimuove rumore |
|
|
4. **Filtraggio**: Area, aspect ratio, dimensioni |
|
|
|
|
|
### π Output: |
|
|
- **Verde**: Contorni mask del testo |
|
|
- **Arancione**: Bounding box individuali |
|
|
- **text_regions**: Coordinate per OCR/inpainting |
|
|
""") |
|
|
|
|
|
segment_btn.click( |
|
|
fn=detect_and_segment_balloons, |
|
|
inputs=[input_image, confidence], |
|
|
outputs=[output_image, output_json] |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|