Spaces:

LLDDWW
/

MedCard

Sleeping

LLDDWW Claude commited on Oct 10

Commit

e96841e

1 Parent(s): ab48ca2

perf: switch to faster Qwen2-VL-2B for OCR

- Replace Qwen2.5-VL-7B with Qwen2-VL-2B for faster inference
- Reduce max_new_tokens: OCR 2048→1024, Medical 3072→1536
- Increase GPU duration to 300s to prevent timeout
- Significantly faster processing while maintaining quality

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import gradio as gr
 import spaces
 import torch
 from PIL import Image
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, AutoTokenizer, AutoModelForCausalLM
 from qwen_vl_utils import process_vision_info
 from huggingface_hub import login
@@ -17,8 +17,8 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN:
     login(token=HF_TOKEN.strip())
-# OCR 모델 ID
-OCR_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
 # 약 정보 분석 모델 ID (의료 전문)
 MED_MODEL_ID = "google/medgemma-4b-it"
@@ -34,10 +34,10 @@ def load_models():
     global OCR_MODEL, OCR_PROCESSOR, MED_MODEL, MED_TOKENIZER
     if OCR_MODEL is None:
-        print("🔄 Loading Qwen2.5-VL-7B for OCR...")
-        OCR_MODEL = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             OCR_MODEL_ID,
-            torch_dtype="auto",
             device_map="auto"
         )
         OCR_PROCESSOR = AutoProcessor.from_pretrained(OCR_MODEL_ID)
@@ -74,7 +74,7 @@ def _extract_json_block(text: str) -> Optional[str]:
     return match.group(0)
-@spaces.GPU(duration=180)
 def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
     """이미지에서 OCR 추출 후 약 정보 분석"""
     try:
@@ -101,7 +101,7 @@ def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
         inputs = inputs.to(OCR_MODEL.device)
         with torch.no_grad():
-            generated_ids = OCR_MODEL.generate(**inputs, max_new_tokens=2048)
         generated_ids_trimmed = [
             out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
@@ -149,7 +149,7 @@ def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
         with torch.no_grad():
             outputs = MED_MODEL.generate(
                 **inputs,
-                max_new_tokens=3072,
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True
@@ -396,7 +396,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
     - AI가 생성한 정보이므로 정확하지 않을 수 있습니다
     **🤖 기술 스택**
-    - Qwen2.5-VL-7B-Instruct (OCR 텍스트 추출)
     - Google MedGemma-4B-IT (의료 전문 모델 - 약 정보 분석 및 설명)
     **🔑 설정 방법**

 import spaces
 import torch
 from PIL import Image
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTokenizer, AutoModelForCausalLM
 from qwen_vl_utils import process_vision_info
 from huggingface_hub import login
 if HF_TOKEN:
     login(token=HF_TOKEN.strip())
+# OCR 모델 ID (더 빠른 추론을 위해 2B 모델 사용)
+OCR_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
 # 약 정보 분석 모델 ID (의료 전문)
 MED_MODEL_ID = "google/medgemma-4b-it"
     global OCR_MODEL, OCR_PROCESSOR, MED_MODEL, MED_TOKENIZER
     if OCR_MODEL is None:
+        print("🔄 Loading Qwen2-VL-2B for OCR...")
+        OCR_MODEL = Qwen2VLForConditionalGeneration.from_pretrained(
             OCR_MODEL_ID,
+            torch_dtype=torch.bfloat16,
             device_map="auto"
         )
         OCR_PROCESSOR = AutoProcessor.from_pretrained(OCR_MODEL_ID)
     return match.group(0)
+@spaces.GPU(duration=300)
 def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
     """이미지에서 OCR 추출 후 약 정보 분석"""
     try:
         inputs = inputs.to(OCR_MODEL.device)
         with torch.no_grad():
+            generated_ids = OCR_MODEL.generate(**inputs, max_new_tokens=1024)
         generated_ids_trimmed = [
             out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
         with torch.no_grad():
             outputs = MED_MODEL.generate(
                 **inputs,
+                max_new_tokens=1536,
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True
     - AI가 생성한 정보이므로 정확하지 않을 수 있습니다
     **🤖 기술 스택**
+    - Qwen2-VL-2B-Instruct (빠른 OCR 텍스트 추출)
     - Google MedGemma-4B-IT (의료 전문 모델 - 약 정보 분석 및 설명)
     **🔑 설정 방법**