Spaces:

LLDDWW
/

MedCard

Runtime error

App Files Files Community

LLDDWW commited on Oct 1, 2025

Commit

e94e117

1 Parent(s): 92bb45b

sdfdsfads23333

Browse files

Files changed (1) hide show

app.py +42 -21

app.py CHANGED Viewed

@@ -6,34 +6,31 @@ import gradio as gr
 import spaces
 import torch
 from PIL import Image
-from transformers import AutoModel, AutoProcessor, AutoTokenizer, AutoModelForCausalLM
-# Stage 1: OCR 모델 (문서에서 텍스트 추출)
-OCR_MODEL_ID = "ibm-granite/granite-docling-258M"
 # Stage 2: LLM 모델 (텍스트에서 약 이름 추출)
 LLM_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
 def _load_ocr_model():
-    """Granite Docling OCR 모델 로드"""
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model = AutoModel.from_pretrained(
-        OCR_MODEL_ID,
-        trust_remote_code=True
-    ).to(device)
-    processor = AutoProcessor.from_pretrained(
         OCR_MODEL_ID,
-        trust_remote_code=True
     )
     return model, processor
 def _load_llm_model():
-    """Llama 3.1 8B 모델 로드 (8bit 양자화)"""
     model = AutoModelForCausalLM.from_pretrained(
         LLM_MODEL_ID,
         device_map="auto",
@@ -46,7 +43,7 @@ def _load_llm_model():
     return model, tokenizer
-print("🔄 Loading Granite Docling OCR model...")
 OCR_MODEL, OCR_PROCESSOR = _load_ocr_model()
 print("✅ OCR model loaded!")
@@ -73,15 +70,39 @@ def _extract_json_block(text: str) -> Optional[str]:
 def extract_text_from_image(image: Image.Image) -> str:
-    """Stage 1: Granite Docling으로 이미지에서 텍스트 추출"""
     try:
-        inputs = OCR_PROCESSOR(images=image, return_tensors="pt").to(OCR_MODEL.device)
         with torch.no_grad():
-            outputs = OCR_MODEL(**inputs)
-        extracted_text = OCR_PROCESSOR.batch_decode(outputs, skip_special_tokens=True)[0]
-        return extracted_text.strip()
     except Exception as e:
         raise Exception(f"OCR 오류: {str(e)}")
@@ -308,7 +329,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
     ---
     **ℹ️ 2단계 파이프라인**
-    - **Stage 1**: Granite Docling (OCR) - 이미지에서 모든 텍스트 추출
     - **Stage 2**: Qwen2.5 7B (LLM) - 추출된 텍스트에서 약 이름만 식별
     실제 복약은 의사·약사의 지시를 따르세요.

 import spaces
 import torch
 from PIL import Image
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTokenizer, AutoModelForCausalLM
+# Stage 1: OCR 모델 (Qwen2-VL로 문서에서 텍스트 추출)
+OCR_MODEL_ID = "Qwen/Qwen2-VL-7B-Instruct"
 # Stage 2: LLM 모델 (텍스트에서 약 이름 추출)
 LLM_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
 def _load_ocr_model():
+    """Qwen2-VL OCR 모델 로드"""
+    model = Qwen2VLForConditionalGeneration.from_pretrained(
         OCR_MODEL_ID,
+        device_map="auto",
+        load_in_8bit=True,
+        torch_dtype=torch.float16,
+        trust_remote_code=True,
     )
+    processor = AutoProcessor.from_pretrained(OCR_MODEL_ID, trust_remote_code=True)
     return model, processor
 def _load_llm_model():
+    """Qwen2.5 7B 모델 로드 (8bit 양자화)"""
     model = AutoModelForCausalLM.from_pretrained(
         LLM_MODEL_ID,
         device_map="auto",
     return model, tokenizer
+print("🔄 Loading Qwen2-VL OCR model...")
 OCR_MODEL, OCR_PROCESSOR = _load_ocr_model()
 print("✅ OCR model loaded!")
 def extract_text_from_image(image: Image.Image) -> str:
+    """Stage 1: Qwen2-VL로 이미지에서 텍스트 추출 (OCR)"""
     try:
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "이 이미지의 모든 텍스트를 정확히 추출해서 그대로 출력해주세요. OCR 결과만 출력하세요."},
+                    {"type": "image"},
+                ],
+            }
+        ]
+        chat_text = OCR_PROCESSOR.apply_chat_template(messages, add_generation_prompt=True)
+        inputs = OCR_PROCESSOR(text=[chat_text], images=[image], return_tensors="pt").to(OCR_MODEL.device)
         with torch.no_grad():
+            output_ids = OCR_MODEL.generate(
+                **inputs,
+                max_new_tokens=1024,
+                temperature=0.1,  # 정확한 OCR을 위해 낮은 temperature
+                do_sample=False,  # 결정적 출력
+            )
+        output_text = OCR_PROCESSOR.batch_decode(output_ids, skip_special_tokens=False)[0]
+        # Extract assistant response
+        if "<|im_start|>assistant" in output_text:
+            extracted_text = output_text.split("<|im_start|>assistant")[-1]
+            extracted_text = extracted_text.replace("<|im_end|>", "").strip()
+        else:
+            extracted_text = output_text.strip()
+        return extracted_text
     except Exception as e:
         raise Exception(f"OCR 오류: {str(e)}")
     ---
     **ℹ️ 2단계 파이프라인**
+    - **Stage 1**: Qwen2-VL 7B (OCR) - 이미지에서 모든 텍스트 추출
     - **Stage 2**: Qwen2.5 7B (LLM) - 추출된 텍스트에서 약 이름만 식별
     실제 복약은 의사·약사의 지시를 따르세요.