Spaces:

LLDDWW
/

MedCard

Sleeping

App Files Files Community

LLDDWW commited on Sep 29, 2025

Commit

e35cc62

1 Parent(s): 00b2fbb

feat: upgrade ocr to paddleocr and qwen 1.5b

Browse files

Files changed (2) hide show

app.py +30 -17
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,22 +2,27 @@ import json
 import re
 from typing import Any, Dict, List, Optional, Sequence
-import easyocr
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image, ImageDraw
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 # --- OCR pipeline ---------------------------------------------------------
 # Use a high-capacity OCR model for better accuracy on prescription labels.
-OCR_LANGS = ["ko", "en"]
-LLM_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
 def _load_ocr():
     use_gpu = torch.cuda.is_available()
-    return easyocr.Reader(OCR_LANGS, gpu=use_gpu)
 ocr_reader = _load_ocr()
@@ -26,10 +31,15 @@ ocr_reader = _load_ocr()
 def _load_llm():
     device_map = "auto" if torch.cuda.is_available() else None
     dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-    model = AutoModelForCausalLM.from_pretrained(LLM_MODEL_ID, device_map=device_map, torch_dtype=dtype)
     if device_map is None:
         model = model.to(torch.device("cpu"))
-    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID)
     return model, tokenizer
@@ -156,21 +166,24 @@ def parse_fields(raw: str) -> Dict[str, Any]:
 def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
     np_img = np.array(image.convert("RGB"))
-    results = ocr_reader.readtext(np_img, detail=1, paragraph=False)
     segments: List[Dict[str, Any]] = []
     lines: List[str] = []
-    for bbox, text, confidence in results:
-        cleaned = text.strip()
-        if not cleaned:
             continue
-        lines.append(cleaned)
-        box_serializable = np.asarray(bbox, dtype=float).tolist()
-        segments.append({
-            "text": cleaned,
-            "confidence": float(confidence),
-            "bbox": box_serializable,
-        })
     raw_text = "\n".join(lines)
     fields = parse_fields(raw_text)

 import re
 from typing import Any, Dict, List, Optional, Sequence
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image, ImageDraw
+from paddleocr import PaddleOCR
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 # --- OCR pipeline ---------------------------------------------------------
 # Use a high-capacity OCR model for better accuracy on prescription labels.
+OCR_LANGS = ["korean", "en"]
+LLM_MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
 def _load_ocr():
     use_gpu = torch.cuda.is_available()
+    return PaddleOCR(
+        use_angle_cls=True,
+        lang=OCR_LANGS[0],
+        show_log=False,
+        use_gpu=use_gpu,
+    )
 ocr_reader = _load_ocr()
 def _load_llm():
     device_map = "auto" if torch.cuda.is_available() else None
     dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    model = AutoModelForCausalLM.from_pretrained(
+        LLM_MODEL_ID,
+        device_map=device_map,
+        torch_dtype=dtype,
+        trust_remote_code=True,
+    )
     if device_map is None:
         model = model.to(torch.device("cpu"))
+    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID, trust_remote_code=True)
     return model, tokenizer
 def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
     np_img = np.array(image.convert("RGB"))
+    ocr_results = ocr_reader.ocr(np_img, cls=True)
     segments: List[Dict[str, Any]] = []
     lines: List[str] = []
+    for result in ocr_results:
+        if not result:
             continue
+        for bbox, (text, confidence) in result:
+            cleaned = (text or "").strip()
+            if not cleaned:
+                continue
+            lines.append(cleaned)
+            box_serializable = np.asarray(bbox, dtype=float).tolist()
+            segments.append({
+                "text": cleaned,
+                "confidence": float(confidence),
+                "bbox": box_serializable,
+            })
     raw_text = "\n".join(lines)
     fields = parse_fields(raw_text)

requirements.txt CHANGED Viewed

@@ -3,6 +3,7 @@ torch
 gradio
 Pillow
 sentencepiece
-easyocr
 opencv-python-headless
 numpy

 gradio
 Pillow
 sentencepiece
+paddleocr
+paddlepaddle
 opencv-python-headless
 numpy