feat: upgrade models and improve quality
Browse files- Upgrade text model to Qwen2.5-7B-Instruct for better explanations
- Replace SD 2.1 with FLUX.1-schnell for higher quality images
- Improve generation parameters (temperature, tokens, sampling)
- Enhance prompts with detailed instructions and examples
- Update requirements.txt with version constraints
๐ค Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- app.py +32 -16
- requirements.txt +7 -6
app.py
CHANGED
|
@@ -15,8 +15,8 @@ from transformers import (
|
|
| 15 |
)
|
| 16 |
|
| 17 |
VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 18 |
-
TEXT_MODEL_ID = "Qwen/Qwen2.5-
|
| 19 |
-
IMAGE_MODEL_ID = "
|
| 20 |
|
| 21 |
|
| 22 |
def _load_vl_model():
|
|
@@ -224,12 +224,23 @@ def generate_explanations(raw_text: str, medications: List[Dict[str, Any]]) -> D
|
|
| 224 |
med_summary_lines.append(summary.strip())
|
| 225 |
med_summary = "\n".join(med_summary_lines)
|
| 226 |
|
| 227 |
-
system_prompt = "
|
| 228 |
user_prompt = (
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
)
|
| 234 |
|
| 235 |
messages = [
|
|
@@ -246,9 +257,10 @@ def generate_explanations(raw_text: str, medications: List[Dict[str, Any]]) -> D
|
|
| 246 |
with torch.no_grad():
|
| 247 |
output_ids = TEXT_MODEL.generate(
|
| 248 |
input_ids,
|
| 249 |
-
max_new_tokens=
|
| 250 |
-
temperature=0.
|
| 251 |
-
top_p=0.
|
|
|
|
| 252 |
)
|
| 253 |
|
| 254 |
generated_ids = output_ids[0][input_ids.shape[1]:]
|
|
@@ -284,13 +296,17 @@ def generate_explanations(raw_text: str, medications: List[Dict[str, Any]]) -> D
|
|
| 284 |
@spaces.GPU(enable_queue=True)
|
| 285 |
def generate_cartoon_image(prompt: str) -> Image.Image:
|
| 286 |
if not prompt:
|
| 287 |
-
prompt = "
|
| 288 |
-
|
|
|
|
|
|
|
| 289 |
image = IMAGE_PIPELINE(
|
| 290 |
-
prompt=
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
|
|
|
|
|
|
| 294 |
).images[0]
|
| 295 |
return image
|
| 296 |
|
|
|
|
| 15 |
)
|
| 16 |
|
| 17 |
VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 18 |
+
TEXT_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
|
| 19 |
+
IMAGE_MODEL_ID = "black-forest-labs/FLUX.1-schnell"
|
| 20 |
|
| 21 |
|
| 22 |
def _load_vl_model():
|
|
|
|
| 224 |
med_summary_lines.append(summary.strip())
|
| 225 |
med_summary = "\n".join(med_summary_lines)
|
| 226 |
|
| 227 |
+
system_prompt = "๋น์ ์ ํ์ ๊ต์ก ์ ๋ฌธ ์ฝ์ฌ์
๋๋ค. ์ด๋ฅด์ ๊ณผ ์ด๋ฆฐ์ด์๊ฒ ์ฝ์ ์ฝ๊ณ ์น์ ํ๊ฒ ์ค๋ช
ํ๋ฉฐ, ๋ณต์ฉ ๋ฐฉ๋ฒ๊ณผ ์ฃผ์์ฌํญ์ ๋ช
ํํ ์ ๋ฌํฉ๋๋ค."
|
| 228 |
user_prompt = (
|
| 229 |
+
"๋ค์ ์ฝ ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก ์ด๋ฅด์ ๊ณผ ์ด๋ฆฐ์ด๋ฅผ ์ํ ๋ณต์ฝ ์๋ด๋ฅผ ์์ฑํ์ธ์.\n\n"
|
| 230 |
+
f"์ฝ ๋ชฉ๋ก:\n{med_summary}\n\n์๋ฌธ:\n{raw_text}\n\n"
|
| 231 |
+
"JSON ํ์์ผ๋ก ๋ต๋ณํ์ธ์:\n"
|
| 232 |
+
"{\n"
|
| 233 |
+
' "elderly": {\n'
|
| 234 |
+
' "narrative": "์ด๋ฅด์ ๊ป ๋๋ฆฌ๋ ์ค๋ช
(์กด๋๋ง, ๊ตฌ์ฒด์ ๋ณต์ฉ ์๊ฐ๊ณผ ๋ฐฉ๋ฒ, ์ฃผ์์ฌํญ ํฌํจ, 3-5๋ฌธ์ฅ)",\n'
|
| 235 |
+
' "image_prompt": "detailed cartoon illustration showing elderly person taking medicine with family support, warm pastel colors, professional medical setting, clear and caring atmosphere"\n'
|
| 236 |
+
" },\n"
|
| 237 |
+
' "child": {\n'
|
| 238 |
+
' "narrative": "์ด๋ฆฐ์ด๋ฅผ ์ํ ์ค๋ช
(์ฌ์ด ๋ง, ์ฌ๋ฏธ์๊ฒ, ์ ๋จน์ด์ผ ํ๋์ง ์ค๋ช
, 3-5๋ฌธ์ฅ)",\n'
|
| 239 |
+
' "image_prompt": "cheerful illustrated cartoon of child taking medicine with parent helping, colorful and friendly, encouraging atmosphere, high quality digital art"\n'
|
| 240 |
+
" }\n"
|
| 241 |
+
"}\n\n"
|
| 242 |
+
"narrative๋ ๋ฐ๋์ ํ๊ตญ์ด๋ก, image_prompt๋ ๋ฐ๋์ ์์ด๋ก ์์ฑํ์ธ์. "
|
| 243 |
+
"image_prompt๋ ๊ตฌ์ฒด์ ์ด๊ณ ์์ธํ๊ฒ ์ฅ๋ฉด์ ๋ฌ์ฌํ์ธ์."
|
| 244 |
)
|
| 245 |
|
| 246 |
messages = [
|
|
|
|
| 257 |
with torch.no_grad():
|
| 258 |
output_ids = TEXT_MODEL.generate(
|
| 259 |
input_ids,
|
| 260 |
+
max_new_tokens=768,
|
| 261 |
+
temperature=0.7,
|
| 262 |
+
top_p=0.9,
|
| 263 |
+
do_sample=True,
|
| 264 |
)
|
| 265 |
|
| 266 |
generated_ids = output_ids[0][input_ids.shape[1]:]
|
|
|
|
| 296 |
@spaces.GPU(enable_queue=True)
|
| 297 |
def generate_cartoon_image(prompt: str) -> Image.Image:
|
| 298 |
if not prompt:
|
| 299 |
+
prompt = "wholesome illustrated cartoon scene, friendly pharmacist explaining medicine to elderly and children, warm soft pastel colors, professional medical setting, gentle and caring atmosphere, high quality digital illustration"
|
| 300 |
+
|
| 301 |
+
enhanced_prompt = f"high quality illustration, {prompt}, soft lighting, detailed, professional artwork, clean composition"
|
| 302 |
+
|
| 303 |
image = IMAGE_PIPELINE(
|
| 304 |
+
prompt=enhanced_prompt,
|
| 305 |
+
num_inference_steps=4,
|
| 306 |
+
guidance_scale=0.0,
|
| 307 |
+
height=768,
|
| 308 |
+
width=1024,
|
| 309 |
+
max_sequence_length=256,
|
| 310 |
).images[0]
|
| 311 |
return image
|
| 312 |
|
requirements.txt
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
-
transformers
|
| 2 |
-
torch
|
| 3 |
-
accelerate
|
| 4 |
einops
|
| 5 |
-
diffusers
|
| 6 |
safetensors
|
| 7 |
-
gradio
|
| 8 |
Pillow
|
| 9 |
sentencepiece
|
| 10 |
-
torchvision
|
|
|
|
|
|
| 1 |
+
transformers>=4.46.0
|
| 2 |
+
torch>=2.1.0
|
| 3 |
+
accelerate>=0.25.0
|
| 4 |
einops
|
| 5 |
+
diffusers>=0.31.0
|
| 6 |
safetensors
|
| 7 |
+
gradio>=4.0.0
|
| 8 |
Pillow
|
| 9 |
sentencepiece
|
| 10 |
+
torchvision
|
| 11 |
+
qwen-vl-utils
|