Spaces:

LLDDWW
/

MedCard

Sleeping

LLDDWW Claude commited on Sep 30, 2025

Commit

8a13800

1 Parent(s): 31407b3

feat: upgrade models and improve quality

- Upgrade text model to Qwen2.5-7B-Instruct for better explanations
- Replace SD 2.1 with FLUX.1-schnell for higher quality images
- Improve generation parameters (temperature, tokens, sampling)
- Enhance prompts with detailed instructions and examples
- Update requirements.txt with version constraints

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

app.py +32 -16
requirements.txt +7 -6

app.py CHANGED Viewed

@@ -15,8 +15,8 @@ from transformers import (
 )
 VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
-TEXT_MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
-IMAGE_MODEL_ID = "stabilityai/stable-diffusion-2-1"
 def _load_vl_model():
@@ -224,12 +224,23 @@ def generate_explanations(raw_text: str, medications: List[Dict[str, Any]]) -> D
         med_summary_lines.append(summary.strip())
     med_summary = "\n".join(med_summary_lines)
-    system_prompt = "약사 선생님처럼 어르신과 어린이에게 각각 쉽게 설명하세요."
     user_prompt = (
-        "다음은 약 봉투에서 읽은 원문과 약 목록입니다. \n"
-        "JSON으로 답변하세요. 형식은 {\"elderly\": {\"narrative\": ..., \"image_prompt\": ...}, \"child\": {\"narrative\": ..., \"image_prompt\": ...}} 입니다.\n"
-        "narrative는 한국어, image_prompt는 영어로 한 컷 만화 스타일을 묘사하세요.\n"
-        f"약 목록:\n{med_summary}\n\n원문:\n{raw_text}\n"
     )
     messages = [
@@ -246,9 +257,10 @@ def generate_explanations(raw_text: str, medications: List[Dict[str, Any]]) -> D
     with torch.no_grad():
         output_ids = TEXT_MODEL.generate(
             input_ids,
-            max_new_tokens=512,
-            temperature=0.3,
-            top_p=0.8,
         )
     generated_ids = output_ids[0][input_ids.shape[1]:]
@@ -284,13 +296,17 @@ def generate_explanations(raw_text: str, medications: List[Dict[str, Any]]) -> D
 @spaces.GPU(enable_queue=True)
 def generate_cartoon_image(prompt: str) -> Image.Image:
     if not prompt:
-        prompt = "single panel wholesome cartoon, pharmacist gently explaining medicine to family, warm pastel colors"
-    negative_prompt = "text, watermark, logo, blurry"
     image = IMAGE_PIPELINE(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        num_inference_steps=30,
-        guidance_scale=7.5,
     ).images[0]
     return image

 )
 VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
+TEXT_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
+IMAGE_MODEL_ID = "black-forest-labs/FLUX.1-schnell"
 def _load_vl_model():
         med_summary_lines.append(summary.strip())
     med_summary = "\n".join(med_summary_lines)
+    system_prompt = "당신은 환자 교육 전문 약사입니다. 어르신과 어린이에게 약을 쉽고 친절하게 설명하며, 복용 방법과 주의사항을 명확히 전달합니다."
     user_prompt = (
+        "다음 약 정보를 바탕으로 어르신과 어린이를 위한 복약 안내를 작성하세요.\n\n"
+        f"약 목록:\n{med_summary}\n\n원문:\n{raw_text}\n\n"
+        "JSON 형식으로 답변하세요:\n"
+        "{\n"
+        '  "elderly": {\n'
+        '    "narrative": "어르신께 드리는 설명 (존댓말, 구체적 복용 시간과 방법, 주의사항 포함, 3-5문장)",\n'
+        '    "image_prompt": "detailed cartoon illustration showing elderly person taking medicine with family support, warm pastel colors, professional medical setting, clear and caring atmosphere"\n'
+        "  },\n"
+        '  "child": {\n'
+        '    "narrative": "어린이를 위한 설명 (쉬운 말, 재미있게, 왜 먹어야 하는지 설명, 3-5문장)",\n'
+        '    "image_prompt": "cheerful illustrated cartoon of child taking medicine with parent helping, colorful and friendly, encouraging atmosphere, high quality digital art"\n'
+        "  }\n"
+        "}\n\n"
+        "narrative는 반드시 한국어로, image_prompt는 반드시 영어로 작성하세요. "
+        "image_prompt는 구체적이고 상세하게 장면을 묘사하세요."
     )
     messages = [
     with torch.no_grad():
         output_ids = TEXT_MODEL.generate(
             input_ids,
+            max_new_tokens=768,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
         )
     generated_ids = output_ids[0][input_ids.shape[1]:]
 @spaces.GPU(enable_queue=True)
 def generate_cartoon_image(prompt: str) -> Image.Image:
     if not prompt:
+        prompt = "wholesome illustrated cartoon scene, friendly pharmacist explaining medicine to elderly and children, warm soft pastel colors, professional medical setting, gentle and caring atmosphere, high quality digital illustration"
+    enhanced_prompt = f"high quality illustration, {prompt}, soft lighting, detailed, professional artwork, clean composition"
     image = IMAGE_PIPELINE(
+        prompt=enhanced_prompt,
+        num_inference_steps=4,
+        guidance_scale=0.0,
+        height=768,
+        width=1024,
+        max_sequence_length=256,
     ).images[0]
     return image

requirements.txt CHANGED Viewed

@@ -1,10 +1,11 @@
-transformers
-torch
-accelerate
 einops
-diffusers
 safetensors
-gradio
 Pillow
 sentencepiece
-torchvision

+transformers>=4.46.0
+torch>=2.1.0
+accelerate>=0.25.0
 einops
+diffusers>=0.31.0
 safetensors
+gradio>=4.0.0
 Pillow
 sentencepiece
+torchvision
+qwen-vl-utils