ColdSlim commited on
Commit
25a237f
·
verified ·
1 Parent(s): fc0a615

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -46
app.py CHANGED
@@ -1,15 +1,9 @@
1
  # app.py
2
  # Dermatology-AI-Assistant — Hugging Face Space (ZeroGPU-ready)
3
- # - Logging is configured before use
4
- # - No runtime pip installs (use requirements.txt)
5
- # - ZeroGPU acquired only during inference via @spaces.GPU
6
- # - Uses qwen-vl-utils.process_vision_info (fixes missing attribute error)
7
- # - SSR disabled in Gradio launch to avoid Node 20 requirement in container
8
 
9
  import os
10
- import sys
11
  import logging
12
- from typing import Optional, Tuple
13
 
14
  import gradio as gr
15
  import spaces
@@ -18,21 +12,17 @@ from PIL import Image
18
  from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
19
  from qwen_vl_utils import process_vision_info
20
 
21
-
22
  # ---------------------------
23
  # Logging
24
  # ---------------------------
25
  logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
26
  logger = logging.getLogger(__name__)
27
 
28
-
29
  # ---------------------------
30
  # Config
31
  # ---------------------------
32
- # Fine-tuned (or partially fine-tuned) Qwen VL checkpoint
33
  MODEL_ID = os.environ.get("MODEL_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B")
34
 
35
- # Generation params (tweak as needed)
36
  GEN_KW = dict(
37
  max_new_tokens=512,
38
  do_sample=True,
@@ -40,15 +30,12 @@ GEN_KW = dict(
40
  top_p=0.9,
41
  )
42
 
43
- # ZeroGPU time (seconds). Increase if your model is slow to generate.
44
  ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
45
 
46
- # Preload only the processor on CPU; load the model inside GPU-decorated call.
47
  logger.info(f"Loading processor from: {MODEL_ID}")
48
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
49
  logger.info("Processor loaded.")
50
 
51
-
52
  # ---------------------------
53
  # Helpers
54
  # ---------------------------
@@ -67,13 +54,9 @@ def build_inputs(image: Image.Image, question: str):
67
  }
68
  ]
69
 
70
- # Chat template
71
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
72
-
73
- # Vision inputs
74
  image_inputs, video_inputs = process_vision_info(messages)
75
 
76
- # Pack tensors (CPU for now; we move to CUDA later)
77
  inputs = processor(
78
  text=[text],
79
  images=image_inputs,
@@ -83,9 +66,7 @@ def build_inputs(image: Image.Image, question: str):
83
  )
84
  return inputs
85
 
86
-
87
  def format_derm_disclaimer(ans: str) -> str:
88
- """Append a short medical disclaimer (non-blocking)."""
89
  tail = (
90
  "\n\n---\n"
91
  "_Disclaimer: This AI is not a medical device. The output is informational and may be inaccurate. "
@@ -93,37 +74,32 @@ def format_derm_disclaimer(ans: str) -> str:
93
  )
94
  return ans + tail
95
 
96
-
97
  # ---------------------------
98
  # Inference (ZeroGPU)
99
  # ---------------------------
100
  @spaces.GPU(duration=ZGPU_DURATION)
101
  def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
102
  """
103
- Main inference function. Runs inside a ZeroGPU reservation window.
104
  Loads model on GPU, generates, frees VRAM.
105
  """
106
  if image is None:
107
  return "❌ Please upload an image first."
108
-
109
  try:
110
  logger.info(f"Loading model on GPU: {MODEL_ID}")
111
- # On ZeroGPU, load inside the GPU-decorated function
112
  model = Qwen2VLForConditionalGeneration.from_pretrained(
113
  MODEL_ID,
114
- torch_dtype=torch.float16, # fp16 is broadly compatible on ZeroGPU
115
- device_map="cuda", # place modules on available CUDA
116
  trust_remote_code=True,
117
  low_cpu_mem_usage=True,
118
- ignore_mismatched_sizes=True, # your logs indicated shape diffs; keep this to avoid crash
119
  )
120
  logger.info("Model loaded successfully!")
121
 
122
- # Build and move inputs to CUDA
123
  inputs = build_inputs(image, question)
124
  inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
125
 
126
- # Generate
127
  with torch.no_grad():
128
  out_ids = model.generate(
129
  **inputs,
@@ -131,15 +107,12 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
131
  pad_token_id=processor.tokenizer.eos_token_id,
132
  )
133
 
134
- # Strip prompt tokens before decoding for clean answer
135
- prompt_len_trimmed = [
136
- out[len(inp):] for inp, out in zip(inputs["input_ids"], out_ids)
137
- ]
138
  text = processor.batch_decode(
139
- prompt_len_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
140
  )[0]
141
 
142
- # Free VRAM early
143
  del model
144
  torch.cuda.empty_cache()
145
 
@@ -149,7 +122,6 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
149
  logger.exception("Error during inference")
150
  return f"❌ Error analyzing image: {e}"
151
 
152
-
153
  # ---------------------------
154
  # UI
155
  # ---------------------------
@@ -174,20 +146,15 @@ def create_interface() -> gr.Blocks:
174
 
175
  output_box = gr.Textbox(label="Response", lines=16)
176
 
177
- # Wire events
178
  submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
179
  clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
180
 
181
- # Queue for concurrency control (ZeroGPU friendly)
182
- demo.queue(concurrency_count=1, status_update_rate=1)
183
-
184
- gr.Markdown(
185
- "Tips: Ensure good lighting and focus. Avoid uploading personally identifying information."
186
- )
187
 
 
188
  return demo
189
 
190
-
191
  def main():
192
  demo = create_interface()
193
  demo.launch(
@@ -197,9 +164,8 @@ def main():
197
  show_error=True,
198
  inbrowser=False,
199
  quiet=False,
200
- ssr_mode=False, # disable SSR to avoid Node 20 requirement in Spaces container
201
  )
202
 
203
-
204
  if __name__ == "__main__":
205
  main()
 
1
  # app.py
2
  # Dermatology-AI-Assistant — Hugging Face Space (ZeroGPU-ready)
 
 
 
 
 
3
 
4
  import os
 
5
  import logging
6
+ from typing import Optional
7
 
8
  import gradio as gr
9
  import spaces
 
12
  from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
13
  from qwen_vl_utils import process_vision_info
14
 
 
15
  # ---------------------------
16
  # Logging
17
  # ---------------------------
18
  logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
19
  logger = logging.getLogger(__name__)
20
 
 
21
  # ---------------------------
22
  # Config
23
  # ---------------------------
 
24
  MODEL_ID = os.environ.get("MODEL_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B")
25
 
 
26
  GEN_KW = dict(
27
  max_new_tokens=512,
28
  do_sample=True,
 
30
  top_p=0.9,
31
  )
32
 
 
33
  ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
34
 
 
35
  logger.info(f"Loading processor from: {MODEL_ID}")
36
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
37
  logger.info("Processor loaded.")
38
 
 
39
  # ---------------------------
40
  # Helpers
41
  # ---------------------------
 
54
  }
55
  ]
56
 
 
57
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
58
  image_inputs, video_inputs = process_vision_info(messages)
59
 
 
60
  inputs = processor(
61
  text=[text],
62
  images=image_inputs,
 
66
  )
67
  return inputs
68
 
 
69
  def format_derm_disclaimer(ans: str) -> str:
 
70
  tail = (
71
  "\n\n---\n"
72
  "_Disclaimer: This AI is not a medical device. The output is informational and may be inaccurate. "
 
74
  )
75
  return ans + tail
76
 
 
77
  # ---------------------------
78
  # Inference (ZeroGPU)
79
  # ---------------------------
80
  @spaces.GPU(duration=ZGPU_DURATION)
81
  def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
82
  """
83
+ Runs inside a ZeroGPU reservation window.
84
  Loads model on GPU, generates, frees VRAM.
85
  """
86
  if image is None:
87
  return "❌ Please upload an image first."
 
88
  try:
89
  logger.info(f"Loading model on GPU: {MODEL_ID}")
 
90
  model = Qwen2VLForConditionalGeneration.from_pretrained(
91
  MODEL_ID,
92
+ torch_dtype=torch.float16,
93
+ device_map="cuda",
94
  trust_remote_code=True,
95
  low_cpu_mem_usage=True,
96
+ ignore_mismatched_sizes=True, # keep until your weights match exactly
97
  )
98
  logger.info("Model loaded successfully!")
99
 
 
100
  inputs = build_inputs(image, question)
101
  inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
102
 
 
103
  with torch.no_grad():
104
  out_ids = model.generate(
105
  **inputs,
 
107
  pad_token_id=processor.tokenizer.eos_token_id,
108
  )
109
 
110
+ # strip prompt tokens before decoding
111
+ trimmed = [o[len(i):] for i, o in zip(inputs["input_ids"], out_ids)]
 
 
112
  text = processor.batch_decode(
113
+ trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
114
  )[0]
115
 
 
116
  del model
117
  torch.cuda.empty_cache()
118
 
 
122
  logger.exception("Error during inference")
123
  return f"❌ Error analyzing image: {e}"
124
 
 
125
  # ---------------------------
126
  # UI
127
  # ---------------------------
 
146
 
147
  output_box = gr.Textbox(label="Response", lines=16)
148
 
 
149
  submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
150
  clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
151
 
152
+ # Gradio 4.44.1: call queue() with no keyword args
153
+ demo.queue()
 
 
 
 
154
 
155
+ gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
156
  return demo
157
 
 
158
  def main():
159
  demo = create_interface()
160
  demo.launch(
 
164
  show_error=True,
165
  inbrowser=False,
166
  quiet=False,
167
+ ssr_mode=False, # disable SSR to avoid Node 20 requirement in container
168
  )
169
 
 
170
  if __name__ == "__main__":
171
  main()