Spaces:

iammraat
/

test

Sleeping

App Files Files Community

iammraat commited on Jan 30

Commit

e5d3222

verified ·

1 Parent(s): 8414f8b

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -0

app.py CHANGED Viewed

@@ -566,5 +566,117 @@ demo = gr.Interface(
     allow_flagging="never"
 )
 if __name__ == "__main__":
     demo.launch()

     allow_flagging="never"
 )
+if __name__ == "__main__":
+    demo.launch()# app.py (fixed version)
+import gradio as gr
+from ultralytics import YOLO
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from PIL import Image
+import torch
+import numpy as np
+# Load local models
+region_model = YOLO("regions.pt")
+line_model = YOLO("lines.pt")
+# TrOCR
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+# Move to GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+def get_crop(image: Image.Image, result, idx: int, padding: int = 15):
+    img_np = np.array(image)
+    if result.masks is not None:
+        mask = result.masks.data[idx].cpu().numpy()
+        mask_bool = mask > 0.5
+        ys, xs = np.where(mask_bool)
+        if len(ys) == 0:
+            return None
+        y_min, y_max = ys.min(), ys.max()
+        x_min, x_max = xs.min(), xs.max()
+        y_min = max(0, y_min - padding)
+        y_max = min(img_np.shape[0], y_max + padding + 1)
+        x_min = max(0, x_min - padding)
+        x_max = min(img_np.shape[1], x_max + padding + 1)
+        crop = img_np[y_min:y_max, x_min:x_max]
+        mask_crop = mask_bool[y_min:y_max, x_min:x_max]
+        crop[~mask_crop] = 255
+        return Image.fromarray(crop)
+    else:
+        xyxy = result.boxes.xyxy[idx].cpu().numpy().astype(int)
+        x1, y1, x2, y2 = xyxy
+        x1 = max(0, x1 - padding)
+        y1 = max(0, y1 - padding)
+        x2 = min(image.width, x2 + padding)
+        y2 = min(image.height, y2 + padding)
+        return image.crop((x1, y1, x2, y2))
+def process_image(image: Image.Image):
+    results = region_model(image)
+    region_result = results[0]
+    if region_result.boxes is None or len(region_result.boxes) == 0:
+        return "No text regions detected."
+    regions_with_pos = []
+    for i in range(len(region_result.boxes)):
+        y1 = region_result.boxes.xyxy[i][1].item()
+        crop = get_crop(image, region_result, i, padding=20)
+        if crop:
+            regions_with_pos.append((y1, crop))
+    regions_with_pos.sort(key=lambda x: x[0])
+    full_text_parts = []
+    for _, region_crop in regions_with_pos:
+        line_results = line_model(region_crop)
+        line_result = line_results[0]
+        if line_result.boxes is None or len(line_result.boxes) == 0:
+            continue
+        lines_with_pos = []
+        for j in range(len(line_result.boxes)):
+            rel_y1 = line_result.boxes.xyxy[j][1].item()
+            rel_x1 = line_result.boxes.xyxy[j][0].item()
+            line_crop = get_crop(region_crop, line_result, j, padding=15)
+            if line_crop is None:
+                continue
+            pixel_values = processor(line_crop, return_tensors="pt").pixel_values.to(device)
+            generated_ids = model.generate(pixel_values)
+            text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            lines_with_pos.append((rel_y1, rel_x1, text))
+        lines_with_pos.sort(key=lambda x: (x[0], x[1]))
+        region_text = "\n".join([item[2] for item in lines_with_pos])
+        full_text_parts.append(region_text)
+    return "\n\n".join(full_text_parts) if full_text_parts else "No text recognized."
+# Gradio interface (fixed: use flagging_mode instead of allow_flagging)
+demo = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil", label="Upload handwritten document"),
+    outputs=gr.Textbox(label="Recognized Text"),
+    title="Handwritten Text Recognition (YOLO regions/lines + TrOCR)",
+    description="Uses your local regions.pt and lines.pt (same as Riksarkivet demo) with precise mask-based cropping.",
+    flagging_mode="never"  # ← fixed: changed from allow_flagging to flagging_mode
+)
 if __name__ == "__main__":
     demo.launch()