LLDDWW Claude commited on
Commit
8a13800
ยท
1 Parent(s): 31407b3

feat: upgrade models and improve quality

Browse files

- Upgrade text model to Qwen2.5-7B-Instruct for better explanations
- Replace SD 2.1 with FLUX.1-schnell for higher quality images
- Improve generation parameters (temperature, tokens, sampling)
- Enhance prompts with detailed instructions and examples
- Update requirements.txt with version constraints

๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +32 -16
  2. requirements.txt +7 -6
app.py CHANGED
@@ -15,8 +15,8 @@ from transformers import (
15
  )
16
 
17
  VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
18
- TEXT_MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
19
- IMAGE_MODEL_ID = "stabilityai/stable-diffusion-2-1"
20
 
21
 
22
  def _load_vl_model():
@@ -224,12 +224,23 @@ def generate_explanations(raw_text: str, medications: List[Dict[str, Any]]) -> D
224
  med_summary_lines.append(summary.strip())
225
  med_summary = "\n".join(med_summary_lines)
226
 
227
- system_prompt = "์•ฝ์‚ฌ ์„ ์ƒ๋‹˜์ฒ˜๋Ÿผ ์–ด๋ฅด์‹ ๊ณผ ์–ด๋ฆฐ์ด์—๊ฒŒ ๊ฐ๊ฐ ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•˜์„ธ์š”."
228
  user_prompt = (
229
- "๋‹ค์Œ์€ ์•ฝ ๋ด‰ํˆฌ์—์„œ ์ฝ์€ ์›๋ฌธ๊ณผ ์•ฝ ๋ชฉ๋ก์ž…๋‹ˆ๋‹ค. \n"
230
- "JSON์œผ๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”. ํ˜•์‹์€ {\"elderly\": {\"narrative\": ..., \"image_prompt\": ...}, \"child\": {\"narrative\": ..., \"image_prompt\": ...}} ์ž…๋‹ˆ๋‹ค.\n"
231
- "narrative๋Š” ํ•œ๊ตญ์–ด, image_prompt๋Š” ์˜์–ด๋กœ ํ•œ ์ปท ๋งŒํ™” ์Šคํƒ€์ผ์„ ๋ฌ˜์‚ฌํ•˜์„ธ์š”.\n"
232
- f"์•ฝ ๋ชฉ๋ก:\n{med_summary}\n\n์›๋ฌธ:\n{raw_text}\n"
 
 
 
 
 
 
 
 
 
 
 
233
  )
234
 
235
  messages = [
@@ -246,9 +257,10 @@ def generate_explanations(raw_text: str, medications: List[Dict[str, Any]]) -> D
246
  with torch.no_grad():
247
  output_ids = TEXT_MODEL.generate(
248
  input_ids,
249
- max_new_tokens=512,
250
- temperature=0.3,
251
- top_p=0.8,
 
252
  )
253
 
254
  generated_ids = output_ids[0][input_ids.shape[1]:]
@@ -284,13 +296,17 @@ def generate_explanations(raw_text: str, medications: List[Dict[str, Any]]) -> D
284
  @spaces.GPU(enable_queue=True)
285
  def generate_cartoon_image(prompt: str) -> Image.Image:
286
  if not prompt:
287
- prompt = "single panel wholesome cartoon, pharmacist gently explaining medicine to family, warm pastel colors"
288
- negative_prompt = "text, watermark, logo, blurry"
 
 
289
  image = IMAGE_PIPELINE(
290
- prompt=prompt,
291
- negative_prompt=negative_prompt,
292
- num_inference_steps=30,
293
- guidance_scale=7.5,
 
 
294
  ).images[0]
295
  return image
296
 
 
15
  )
16
 
17
  VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
18
+ TEXT_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
19
+ IMAGE_MODEL_ID = "black-forest-labs/FLUX.1-schnell"
20
 
21
 
22
  def _load_vl_model():
 
224
  med_summary_lines.append(summary.strip())
225
  med_summary = "\n".join(med_summary_lines)
226
 
227
+ system_prompt = "๋‹น์‹ ์€ ํ™˜์ž ๊ต์œก ์ „๋ฌธ ์•ฝ์‚ฌ์ž…๋‹ˆ๋‹ค. ์–ด๋ฅด์‹ ๊ณผ ์–ด๋ฆฐ์ด์—๊ฒŒ ์•ฝ์„ ์‰ฝ๊ณ  ์นœ์ ˆํ•˜๊ฒŒ ์„ค๋ช…ํ•˜๋ฉฐ, ๋ณต์šฉ ๋ฐฉ๋ฒ•๊ณผ ์ฃผ์˜์‚ฌํ•ญ์„ ๋ช…ํ™•ํžˆ ์ „๋‹ฌํ•ฉ๋‹ˆ๋‹ค."
228
  user_prompt = (
229
+ "๋‹ค์Œ ์•ฝ ์ •๋ณด๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์–ด๋ฅด์‹ ๊ณผ ์–ด๋ฆฐ์ด๋ฅผ ์œ„ํ•œ ๋ณต์•ฝ ์•ˆ๋‚ด๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”.\n\n"
230
+ f"์•ฝ ๋ชฉ๋ก:\n{med_summary}\n\n์›๋ฌธ:\n{raw_text}\n\n"
231
+ "JSON ํ˜•์‹์œผ๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”:\n"
232
+ "{\n"
233
+ ' "elderly": {\n'
234
+ ' "narrative": "์–ด๋ฅด์‹ ๊ป˜ ๋“œ๋ฆฌ๋Š” ์„ค๋ช… (์กด๋Œ“๋ง, ๊ตฌ์ฒด์  ๋ณต์šฉ ์‹œ๊ฐ„๊ณผ ๋ฐฉ๋ฒ•, ์ฃผ์˜์‚ฌํ•ญ ํฌํ•จ, 3-5๋ฌธ์žฅ)",\n'
235
+ ' "image_prompt": "detailed cartoon illustration showing elderly person taking medicine with family support, warm pastel colors, professional medical setting, clear and caring atmosphere"\n'
236
+ " },\n"
237
+ ' "child": {\n'
238
+ ' "narrative": "์–ด๋ฆฐ์ด๋ฅผ ์œ„ํ•œ ์„ค๋ช… (์‰ฌ์šด ๋ง, ์žฌ๋ฏธ์žˆ๊ฒŒ, ์™œ ๋จน์–ด์•ผ ํ•˜๋Š”์ง€ ์„ค๋ช…, 3-5๋ฌธ์žฅ)",\n'
239
+ ' "image_prompt": "cheerful illustrated cartoon of child taking medicine with parent helping, colorful and friendly, encouraging atmosphere, high quality digital art"\n'
240
+ " }\n"
241
+ "}\n\n"
242
+ "narrative๋Š” ๋ฐ˜๋“œ์‹œ ํ•œ๊ตญ์–ด๋กœ, image_prompt๋Š” ๋ฐ˜๋“œ์‹œ ์˜์–ด๋กœ ์ž‘์„ฑํ•˜์„ธ์š”. "
243
+ "image_prompt๋Š” ๊ตฌ์ฒด์ ์ด๊ณ  ์ƒ์„ธํ•˜๊ฒŒ ์žฅ๋ฉด์„ ๋ฌ˜์‚ฌํ•˜์„ธ์š”."
244
  )
245
 
246
  messages = [
 
257
  with torch.no_grad():
258
  output_ids = TEXT_MODEL.generate(
259
  input_ids,
260
+ max_new_tokens=768,
261
+ temperature=0.7,
262
+ top_p=0.9,
263
+ do_sample=True,
264
  )
265
 
266
  generated_ids = output_ids[0][input_ids.shape[1]:]
 
296
  @spaces.GPU(enable_queue=True)
297
  def generate_cartoon_image(prompt: str) -> Image.Image:
298
  if not prompt:
299
+ prompt = "wholesome illustrated cartoon scene, friendly pharmacist explaining medicine to elderly and children, warm soft pastel colors, professional medical setting, gentle and caring atmosphere, high quality digital illustration"
300
+
301
+ enhanced_prompt = f"high quality illustration, {prompt}, soft lighting, detailed, professional artwork, clean composition"
302
+
303
  image = IMAGE_PIPELINE(
304
+ prompt=enhanced_prompt,
305
+ num_inference_steps=4,
306
+ guidance_scale=0.0,
307
+ height=768,
308
+ width=1024,
309
+ max_sequence_length=256,
310
  ).images[0]
311
  return image
312
 
requirements.txt CHANGED
@@ -1,10 +1,11 @@
1
- transformers
2
- torch
3
- accelerate
4
  einops
5
- diffusers
6
  safetensors
7
- gradio
8
  Pillow
9
  sentencepiece
10
- torchvision
 
 
1
+ transformers>=4.46.0
2
+ torch>=2.1.0
3
+ accelerate>=0.25.0
4
  einops
5
+ diffusers>=0.31.0
6
  safetensors
7
+ gradio>=4.0.0
8
  Pillow
9
  sentencepiece
10
+ torchvision
11
+ qwen-vl-utils