Spaces:

LLDDWW
/

MedCard

Running

App Files Files Community

LLDDWW commited on Oct 1

Commit

64c57fb

1 Parent(s): 52bda02

sdfdsfads2333

Browse files

Files changed (1) hide show

app.py +148 -85

app.py CHANGED Viewed

@@ -1,39 +1,58 @@
 import json
 import re
-from typing import List, Optional
 import gradio as gr
 import spaces
 import torch
 from PIL import Image
-from transformers import (
-    Qwen2VLForConditionalGeneration,
-    AutoProcessor,
-)
-# 최고 품질 공개 모델 + 8비트 양자화 (ZeroGPU 최적화)
-VL_MODEL_ID = "Qwen/Qwen2-VL-7B-Instruct"
-def _load_vl_model():
-    """VL 모델 로드 - 8비트 양자화 + FP16"""
-    device_map = "auto" if torch.cuda.is_available() else None
-    model = Qwen2VLForConditionalGeneration.from_pretrained(
-        VL_MODEL_ID,
-        device_map=device_map,
         load_in_8bit=True,
         torch_dtype=torch.float16,
         trust_remote_code=True,
     )
-    processor = AutoProcessor.from_pretrained(VL_MODEL_ID, trust_remote_code=True)
-    return model, processor
-print("🔄 Loading Qwen2-VL-7B model...")
-VL_MODEL, VL_PROCESSOR = _load_vl_model()
-print("✅ Model loaded successfully!")
 def _extract_assistant_content(decoded: str) -> str:
@@ -53,84 +72,120 @@ def _extract_json_block(text: str) -> Optional[str]:
     return match.group(0)
-@spaces.GPU(duration=120)
-def extract_medication_names(image: Image.Image) -> List[str]:
-    """이미지에서 약 이름만 추출"""
     try:
-        instructions = """이 사진 속 약봉투/처방전에서 약 이름만 모두 찾아서 JSON 형식으로 답변하세요."""
-        schema = """{
-  "medications": ["약 이름 1", "약 이름 2", "약 이름 3"]
-}"""
-        messages = [
-            {
-                "role": "system",
-                "content": "당신은 약 이름을 정확히 읽는 OCR 전문가입니다. 약봉투나 처방전에서 약 이름만 추출합니다.",
-            },
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": instructions},
-                    {"type": "text", "text": schema},
-                    {"type": "image"},
-                ],
-            },
-        ]
-        chat_text = VL_PROCESSOR.apply_chat_template(messages, add_generation_prompt=True)
-        inputs = VL_PROCESSOR(text=[chat_text], images=[image], return_tensors="pt").to(VL_MODEL.device)
-        output_ids = VL_MODEL.generate(
-            **inputs,
-            max_new_tokens=1024,
-            temperature=0.2,  # 매우 정확하게
-            top_p=0.85,
-            do_sample=True,
-        )
-        decoded = VL_PROCESSOR.batch_decode(output_ids, skip_special_tokens=False)[0]
-        assistant_text = _extract_assistant_content(decoded)
-        # JSON 파싱
-        json_block = _extract_json_block(assistant_text)
-        if json_block:
-            data = json.loads(json_block)
-            meds = data.get("medications", [])
-            if isinstance(meds, list):
-                return [str(m).strip() for m in meds if str(m).strip()]
         return ["약 이름을 찾지 못했습니다."]
     except Exception as e:
-        return [f"오류 발생: {str(e)}"]
-def format_medication_list(medications: List[str]) -> str:
-    """약 이름 리스트를 마크다운으로 포맷"""
-    if not medications or medications[0].startswith("오류") or medications[0].startswith("약 이름을 찾지"):
-        return f"### ⚠️ {medications[0] if medications else '약 이름을 찾지 못했습니다.'}"
-    output = f"### 💊 검출된 약물 ({len(medications)}개)\n\n"
-    for idx, med_name in enumerate(medications, 1):
-        output += f"{idx}. **{med_name}**\n"
-    return output
 def run_analysis(image: Optional[Image.Image], progress=gr.Progress()):
-    """메인 분석 파이프라인"""
     if image is None:
-        return "📷 약 봉투나 처방전 사진을 업로드해주세요."
-    progress(0.3, desc="🔍 이미지 분석 중...")
-    medications = extract_medication_names(image)
     progress(0.9, desc="📝 결과 정리 중...")
-    result_md = format_medication_list(medications)
     progress(1.0, desc="✅ 완료!")
-    return result_md
 # 심플한 CSS
@@ -228,23 +283,31 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
     with gr.Column(elem_classes=["upload-section"]):
         gr.Markdown("### 📸 사진 업로드")
         image_input = gr.Image(type="pil", label="약봉투 또는 처방전 사진", height=350)
-        analyze_button = gr.Button("🔍 약 이름 추출", elem_classes=["analyze-btn"], size="lg")
-    with gr.Column(elem_classes=["result-section"]):
-        gr.Markdown("### 📋 추출 결과")
-        result_output = gr.Markdown("분석을 시작하면 여기에 약 이름 리스트가 표시됩니다.")
     analyze_button.click(
         run_analysis,
         inputs=image_input,
-        outputs=result_output,
     )
     gr.Markdown("""
     ---
-    **ℹ️ 참고사항**
-    이 도구는 OCR 기반으로 약 이름만 추출합니다. 실제 복약은 의사·약사의 지시를 따르세요.
     """)
 if __name__ == "__main__":

 import json
 import re
+from typing import List, Optional, Tuple
 import gradio as gr
 import spaces
 import torch
 from PIL import Image
+from transformers import AutoModel, AutoProcessor, AutoTokenizer, AutoModelForCausalLM
+# Stage 1: OCR 모델 (문서에서 텍스트 추출)
+OCR_MODEL_ID = "ibm-granite/granite-docling-258M"
+# Stage 2: LLM 모델 (텍스트에서 약 이름 추출)
+LLM_MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
+def _load_ocr_model():
+    """Granite Docling OCR 모델 로드"""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = AutoModel.from_pretrained(
+        OCR_MODEL_ID,
+        trust_remote_code=True
+    ).to(device)
+    processor = AutoProcessor.from_pretrained(
+        OCR_MODEL_ID,
+        trust_remote_code=True
+    )
+    return model, processor
+def _load_llm_model():
+    """Llama 3.1 8B 모델 로드 (8bit 양자화)"""
+    model = AutoModelForCausalLM.from_pretrained(
+        LLM_MODEL_ID,
+        device_map="auto",
         load_in_8bit=True,
         torch_dtype=torch.float16,
         trust_remote_code=True,
     )
+    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID, trust_remote_code=True)
+    return model, tokenizer
+print("🔄 Loading Granite Docling OCR model...")
+OCR_MODEL, OCR_PROCESSOR = _load_ocr_model()
+print("✅ OCR model loaded!")
+print("🔄 Loading Llama-3.1-8B-Instruct...")
+LLM_MODEL, LLM_TOKENIZER = _load_llm_model()
+print("✅ LLM model loaded!")
 def _extract_assistant_content(decoded: str) -> str:
     return match.group(0)
+def extract_text_from_image(image: Image.Image) -> str:
+    """Stage 1: Granite Docling으로 이미지에서 텍스트 추출"""
     try:
+        inputs = OCR_PROCESSOR(images=image, return_tensors="pt").to(OCR_MODEL.device)
+        with torch.no_grad():
+            outputs = OCR_MODEL(**inputs)
+        extracted_text = OCR_PROCESSOR.batch_decode(outputs, skip_special_tokens=True)[0]
+        return extracted_text.strip()
+    except Exception as e:
+        raise Exception(f"OCR 오류: {str(e)}")
+def extract_medications_from_text(text: str) -> List[str]:
+    """Stage 2: Llama 3.1로 텍스트에서 약 이름만 추출"""
+    try:
+        prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+You are a medical text analyzer. Extract only medication names from the given text and return them as a JSON array.
+Return ONLY valid JSON format: {{"medications": ["name1", "name2"]}}
+<|eot_id|><|start_header_id|>user<|end_header_id|>
+Extract all medication names from this text:
+{text}
+Return only the JSON array of medication names.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+"""
+        inputs = LLM_TOKENIZER(prompt, return_tensors="pt").to(LLM_MODEL.device)
+        with torch.no_grad():
+            outputs = LLM_MODEL.generate(
+                **inputs,
+                max_new_tokens=512,
+                temperature=0.3,
+                top_p=0.9,
+                do_sample=True,
+                pad_token_id=LLM_TOKENIZER.eos_token_id,
+            )
+        response = LLM_TOKENIZER.decode(outputs[0], skip_special_tokens=True)
+        # Extract assistant response
+        if "<|start_header_id|>assistant<|end_header_id|>" in response:
+            response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
+        # Parse JSON
+        json_match = re.search(r'\{.*?\}', response, re.DOTALL)
+        if json_match:
+            data = json.loads(json_match.group(0))
+            medications = data.get("medications", [])
+            if isinstance(medications, list) and medications:
+                return [str(m).strip() for m in medications if str(m).strip()]
         return ["약 이름을 찾지 못했습니다."]
     except Exception as e:
+        raise Exception(f"LLM 분석 오류: {str(e)}")
+@spaces.GPU(duration=120)
+def extract_medication_names(image: Image.Image) -> Tuple[str, List[str]]:
+    """2단계 파이프라인: OCR → LLM 분석"""
+    try:
+        # Stage 1: OCR로 텍스트 추출
+        extracted_text = extract_text_from_image(image)
+        if not extracted_text:
+            return "", ["텍스트를 추출하지 못했습니다."]
+        # Stage 2: LLM으로 약 이름 추출
+        medications = extract_medications_from_text(extracted_text)
+        return extracted_text, medications
+    except Exception as e:
+        return "", [f"오류 발생: {str(e)}"]
+def format_results(extracted_text: str, medications: List[str]) -> Tuple[str, str]:
+    """결과를 포맷팅"""
+    # 추출된 전체 텍스트
+    text_output = f"### 📄 추출된 텍스트\n\n```\n{extracted_text}\n```"
+    # 약 이름 리스트
+    if not medications or medications[0].startswith("오류") or medications[0].startswith("약 이름을 찾지") or medications[0].startswith("텍스트를"):
+        med_output = f"### ⚠️ {medications[0] if medications else '약 이름을 찾지 못했습니다.'}"
+    else:
+        med_output = f"### 💊 검출된 약물 ({len(medications)}개)\n\n"
+        for idx, med_name in enumerate(medications, 1):
+            med_output += f"{idx}. **{med_name}**\n"
+    return text_output, med_output
 def run_analysis(image: Optional[Image.Image], progress=gr.Progress()):
+    """메인 분석 파이프라인: OCR → LLM"""
     if image is None:
+        return "📷 약 봉투나 처방전 사진을 업로드해주세요.", ""
+    progress(0.2, desc="📸 Stage 1: OCR 텍스트 추출 중...")
+    progress(0.6, desc="🤖 Stage 2: LLM 약물 분석 중...")
+    extracted_text, medications = extract_medication_names(image)
     progress(0.9, desc="📝 결과 정리 중...")
+    text_output, med_output = format_results(extracted_text, medications)
     progress(1.0, desc="✅ 완료!")
+    return text_output, med_output
 # 심플한 CSS
     with gr.Column(elem_classes=["upload-section"]):
         gr.Markdown("### 📸 사진 업로드")
         image_input = gr.Image(type="pil", label="약봉투 또는 처방전 사진", height=350)
+        analyze_button = gr.Button("🔍 2단계 분석 시작 (OCR → LLM)", elem_classes=["analyze-btn"], size="lg")
+    with gr.Row():
+        with gr.Column(elem_classes=["result-section"]):
+            gr.Markdown("### 📋 Stage 1: OCR 결과")
+            text_output = gr.Markdown("OCR로 추출된 전체 텍스트가 여기 표시됩니다.")
+        with gr.Column(elem_classes=["result-section"]):
+            gr.Markdown("### 📋 Stage 2: LLM 분석 결과")
+            med_output = gr.Markdown("LLM이 분석한 약물 리스트가 여기 표시됩니다.")
     analyze_button.click(
         run_analysis,
         inputs=image_input,
+        outputs=[text_output, med_output],
     )
     gr.Markdown("""
     ---
+    **ℹ️ 2단계 파이프라인**
+    - **Stage 1**: Granite Docling (OCR) - 이미지에서 모든 텍스트 추출
+    - **Stage 2**: Llama 3.1 8B (LLM) - 추출된 텍스트에서 약 이름만 식별
+    실제 복약은 의사·약사의 지시를 따르세요.
     """)
 if __name__ == "__main__":