Spaces:

minhvh
/

ocr

Sleeping

App Files Files Community

minhvh commited on Sep 5, 2025

Commit

6855d5b

verified ·

1 Parent(s): 841a076

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -54

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import gradio as gr
 from paddleocr import PaddleOCR
-from PIL import Image
-from typing import Optional, Dict, Tuple, List
 ocr = PaddleOCR(
     text_detection_model_name="PP-OCRv5_mobile_det",
     text_recognition_model_name="PP-OCRv5_mobile_rec",
@@ -14,78 +14,56 @@ ocr = PaddleOCR(
 def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image:
     draw = ImageDraw.Draw(image)
     for region in text_regions:
         bbox = region.get("bbox", {})
         text = region.get("text", "")
         confidence = region.get("confidence", 0)
         if bbox:
             x1, y1 = bbox["x1"], bbox["y1"]
             x2, y2 = bbox["x2"], bbox["y2"]
             draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
             draw.text((x1, max(y1 - 12, 0)), f"{text} ({confidence})", fill=text_color)
     return image
 def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict:
-    ocr_data = result_json.get('res', {})
-    rec_texts = ocr_data.get('rec_texts', [])
-    rec_scores = ocr_data.get('rec_scores', [])
-    rec_boxes = ocr_data.get('rec_boxes', []) if include_bbox else []
     clean_texts = []
     text_regions = []
     for i, (text, score) in enumerate(zip(rec_texts, rec_scores)):
-        if text and text.strip() and score > 0.3:  # Basic quality filter
             clean_text = text.strip()
             clean_texts.append(clean_text)
             if include_bbox and i < len(rec_boxes):
                 box = rec_boxes[i]
-                if len(box) >= 4:
-                    x1, y1, x2, y2 = box[:4]
-                    text_regions.append({
-                        "text": clean_text,
-                        "bbox": {
-                            "x1": int(x1),
-                            "y1": int(y1),
-                            "x2": int(x2),
-                            "y2": int(y2),
-                            "width": int(x2 - x1),
-                            "height": int(y2 - y1)
-                        },
-                        "confidence": round(float(score), 3)
-                    })
-    # Create output
-    ouput = {
         "extracted_text": " ".join(clean_texts),
         "text_count": len(clean_texts),
         "avg_confidence": round(sum(rec_scores) / len(rec_scores), 2) if rec_scores else 0
     }
-    # Add bounding boxes if requested
     if include_bbox:
-        ouput["text_regions"] = text_regions
-    return ouput
-def inference(img, lang):
-    result = ocr.predict(img)
-    res = result[0]
-    data = extract_json(res, true)
     image = Image.open(img).convert("RGB")
     im_show = draw_ocr(image, data.get("text_regions", []))
-    return im_show, data.get('extracted_text')
 # ===== Giao diện Gradio =====
 title = "PaddleOCR Multi-language (v3.x)"
@@ -96,18 +74,12 @@ PaddleOCR 3.x hỗ trợ Chinese, Japanese, Korean.
 - Xuất ảnh kết quả và văn bản nhận diện.
 """
 demo = gr.Interface(
     fn=inference,
-    inputs=[
-        gr.Image(type="filepath", label="Upload ảnh"),
-    ],
-    outputs=[
-        gr.Image(type="pil", label="Ảnh kết quả"),
-        gr.Textbox(label="Văn bản nhận diện")
-    ],
     title=title,
-    description=description,
 )
 if __name__ == "__main__":

 import gradio as gr
 from paddleocr import PaddleOCR
+from PIL import Image, ImageDraw
+from typing import Dict
+# Khởi tạo OCR
 ocr = PaddleOCR(
     text_detection_model_name="PP-OCRv5_mobile_det",
     text_recognition_model_name="PP-OCRv5_mobile_rec",
 def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image:
     draw = ImageDraw.Draw(image)
     for region in text_regions:
         bbox = region.get("bbox", {})
         text = region.get("text", "")
         confidence = region.get("confidence", 0)
         if bbox:
             x1, y1 = bbox["x1"], bbox["y1"]
             x2, y2 = bbox["x2"], bbox["y2"]
             draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
             draw.text((x1, max(y1 - 12, 0)), f"{text} ({confidence})", fill=text_color)
     return image
 def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict:
+    rec_texts = [line[1][0] for line in result_json]
+    rec_scores = [line[1][1] for line in result_json]
+    rec_boxes = [line[0] for line in result_json] if include_bbox else []
     clean_texts = []
     text_regions = []
     for i, (text, score) in enumerate(zip(rec_texts, rec_scores)):
+        if text and text.strip() and score > 0.3:
             clean_text = text.strip()
             clean_texts.append(clean_text)
             if include_bbox and i < len(rec_boxes):
                 box = rec_boxes[i]
+                x1, y1 = map(int, box[0])
+                x2, y2 = map(int, box[2])
+                text_regions.append({
+                    "text": clean_text,
+                    "bbox": {"x1": x1, "y1": y1, "x2": x2, "y2": y2,
+                             "width": x2 - x1, "height": y2 - y1},
+                    "confidence": round(float(score), 3)
+                })
+    output = {
         "extracted_text": " ".join(clean_texts),
         "text_count": len(clean_texts),
         "avg_confidence": round(sum(rec_scores) / len(rec_scores), 2) if rec_scores else 0
     }
     if include_bbox:
+        output["text_regions"] = text_regions
+    return output
+def inference(img, lang=None):
     image = Image.open(img).convert("RGB")
+    result = ocr.ocr(image)  # PaddleOCR 3.x
+    data = extract_json(result, include_bbox=True)
     im_show = draw_ocr(image, data.get("text_regions", []))
+    return im_show, data.get("extracted_text")
 # ===== Giao diện Gradio =====
 title = "PaddleOCR Multi-language (v3.x)"
 - Xuất ảnh kết quả và văn bản nhận diện.
 """
 demo = gr.Interface(
     fn=inference,
+    inputs=[gr.Image(type="filepath", label="Upload ảnh")],
+    outputs=[gr.Image(type="pil", label="Ảnh kết quả"), gr.Textbox(label="Văn bản nhận diện")],
     title=title,
+    description=description
 )
 if __name__ == "__main__":