Spaces:

LLDDWW
/

MedCard

Runtime error

App Files Files Community

LLDDWW commited on Oct 13, 2025

Commit

a97e706

1 Parent(s): 2879fbc

Use PaddleOCR predict API and normalize inputs

Browse files

Files changed (1) hide show

app.py +85 -21

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import re
-from typing import List, Optional, Tuple
 import numpy as np
 import os
@@ -26,6 +26,55 @@ MED_MODEL = None
 MED_TOKENIZER = None
 OCR_MODEL_REPO_ID = "PaddlePaddle/korean_PP-OCRv5_mobile_rec"
 def load_models():
     """모델들을 한 번만 로드"""
     global OCR_READER, MED_MODEL, MED_TOKENIZER
@@ -90,7 +139,11 @@ def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
         # Step 1: OCR - PaddleOCR로 한글 텍스트 추출
         start_time = time.time()
         img_array = np.array(image)
-        ocr_results = OCR_READER.ocr(img_array)
         ocr_time = time.time() - start_time
         print(f"⏱️ OCR took {ocr_time:.2f}s")
@@ -98,20 +151,7 @@ def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
             return "텍스트를 찾을 수 없습니다.", ""
         # 텍스트 추출
-        texts: List[str] = []
-        first_entry = ocr_results[0]
-        if isinstance(first_entry, list):
-            texts = [line[1][0] for line in first_entry if len(line) > 1 and line[1]]
-        elif isinstance(first_entry, dict):
-            rec_results = first_entry.get("text_recognition") or first_entry.get("rec_results")
-            if isinstance(rec_results, list):
-                for rec in rec_results:
-                    if isinstance(rec, dict) and rec.get("text"):
-                        texts.append(rec["text"])
-            if not texts and isinstance(first_entry.get("text"), str):
-                texts.append(first_entry["text"])
         if not texts:
             return "텍스트를 찾을 수 없습니다.", ""
@@ -261,16 +301,40 @@ def format_results(extracted_text: str, medications: List[str]) -> Tuple[str, st
     return text_output, med_output
-def run_analysis(image: Optional[Image.Image], progress=gr.Progress()):
     """메인 분석 파이프라인: OCR + 약 정보 분석"""
-    if image is None:
         return "📷 약 봉투나 처방전 사진을 업로드해주세요.", ""
     progress(0.3, desc="📸 1단계: OCR 텍스트 추출 중...")
     progress(0.6, desc="🤖 2단계: 약 정보 분석 중...")
     try:
-        ocr_text, analysis = analyze_medication_image(image)
         progress(1.0, desc="✅ 완료!")
         ocr_output = f"### 📄 추출된 텍스트\n\n```\n{ocr_text}\n```"
@@ -375,7 +439,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
     with gr.Column(elem_classes=["upload-section"]):
         gr.Markdown("### 📸 사진 업로드")
-        image_input = gr.Image(type="pil", label="약봉투 또는 처방전 사진", height=350)
         analyze_button = gr.Button("🔍 약 정보 분석하기", elem_classes=["analyze-btn"], size="lg")
     with gr.Row():
@@ -406,7 +470,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
     - AI가 생성한 정보이므로 정확하지 않을 수 있습니다
     **🤖 기술 스택**
-    - EasyOCR (한글+영어, 초고속 OCR)
     - Google Gemma-2-2B-IT (8bit 양자화, 빠른 의료 정보 분석)
     **🔑 설정 방법**

 import json
 import re
+from typing import List, Optional, Tuple, Union
 import numpy as np
 import os
 MED_TOKENIZER = None
 OCR_MODEL_REPO_ID = "PaddlePaddle/korean_PP-OCRv5_mobile_rec"
+def _collect_ocr_texts(ocr_payload) -> List[str]:
+    """PaddleOCR 결과 구조에서 텍스트만 추출"""
+    texts: List[str] = []
+    seen = set()
+    def add_text(candidate: str):
+        if not isinstance(candidate, str):
+            return
+        normalized = candidate.strip()
+        if normalized and normalized not in seen:
+            seen.add(normalized)
+            texts.append(normalized)
+    def walk(node):
+        if isinstance(node, str):
+            add_text(node)
+            return
+        if isinstance(node, dict):
+            for key in ("text", "label", "transcription"):
+                add_text(node.get(key))
+            for key in ("texts", "labels"):
+                values = node.get(key)
+                if isinstance(values, (list, tuple)):
+                    for value in values:
+                        add_text(value)
+            for key in ("text_recognition", "rec_results", "data", "results"):
+                if key in node:
+                    walk(node[key])
+            return
+        if isinstance(node, (list, tuple)):
+            if len(node) >= 2:
+                second = node[1]
+                if isinstance(second, str):
+                    add_text(second)
+                elif isinstance(second, (list, tuple)) and second:
+                    maybe_text = second[0]
+                    add_text(maybe_text)
+            for item in node:
+                walk(item)
+    walk(ocr_payload)
+    return texts
 def load_models():
     """모델들을 한 번만 로드"""
     global OCR_READER, MED_MODEL, MED_TOKENIZER
         # Step 1: OCR - PaddleOCR로 한글 텍스트 추출
         start_time = time.time()
         img_array = np.array(image)
+        try:
+            ocr_results = OCR_READER.predict(img_array)
+        except (TypeError, AttributeError):
+            ocr_results = OCR_READER.ocr(img_array)
         ocr_time = time.time() - start_time
         print(f"⏱️ OCR took {ocr_time:.2f}s")
             return "텍스트를 찾을 수 없습니다.", ""
         # 텍스트 추출
+        texts = _collect_ocr_texts(ocr_results)
         if not texts:
             return "텍스트를 찾을 수 없습니다.", ""
     return text_output, med_output
+def _ensure_pil(image_input: Optional[Union[Image.Image, np.ndarray, str]]) -> Optional[Image.Image]:
+    """Gradio 입력을 PIL 이미지로 변환"""
+    if image_input is None:
+        return None
+    if isinstance(image_input, Image.Image):
+        return image_input
+    if isinstance(image_input, np.ndarray):
+        if image_input.dtype != np.uint8:
+            image_input = np.clip(image_input, 0, 255).astype(np.uint8)
+        return Image.fromarray(image_input).convert("RGB")
+    if isinstance(image_input, str):
+        if not os.path.exists(image_input):
+            return None
+        with Image.open(image_input) as img:
+            return img.convert("RGB")
+    return None
+def run_analysis(image: Optional[Union[Image.Image, np.ndarray, str]], progress=gr.Progress()):
     """메인 분석 파이프라인: OCR + 약 정보 분석"""
+    pil_image = _ensure_pil(image)
+    if pil_image is None:
         return "📷 약 봉투나 처방전 사진을 업로드해주세요.", ""
     progress(0.3, desc="📸 1단계: OCR 텍스트 추출 중...")
     progress(0.6, desc="🤖 2단계: 약 정보 분석 중...")
     try:
+        ocr_text, analysis = analyze_medication_image(pil_image)
         progress(1.0, desc="✅ 완료!")
         ocr_output = f"### 📄 추출된 텍스트\n\n```\n{ocr_text}\n```"
     with gr.Column(elem_classes=["upload-section"]):
         gr.Markdown("### 📸 사진 업로드")
+        image_input = gr.Image(type="numpy", image_mode="RGB", label="약봉투 또는 처방전 사진", height=350)
         analyze_button = gr.Button("🔍 약 정보 분석하기", elem_classes=["analyze-btn"], size="lg")
     with gr.Row():
     - AI가 생성한 정보이므로 정확하지 않을 수 있습니다
     **🤖 기술 스택**
+    - PaddleOCR PP-OCRv5 (한국어 최적화 OCR)
     - Google Gemma-2-2B-IT (8bit 양자화, 빠른 의료 정보 분석)
     **🔑 설정 방법**