ColdSlim commited on
Commit
a79b20b
·
verified ·
1 Parent(s): 25a237f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -34
app.py CHANGED
@@ -1,5 +1,8 @@
1
  # app.py
2
  # Dermatology-AI-Assistant — Hugging Face Space (ZeroGPU-ready)
 
 
 
3
 
4
  import os
5
  import logging
@@ -22,29 +25,33 @@ logger = logging.getLogger(__name__)
22
  # Config
23
  # ---------------------------
24
  MODEL_ID = os.environ.get("MODEL_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B")
25
-
26
  GEN_KW = dict(
27
  max_new_tokens=512,
28
  do_sample=True,
29
  temperature=0.7,
30
  top_p=0.9,
31
  )
32
-
33
  ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
34
 
35
  logger.info(f"Loading processor from: {MODEL_ID}")
36
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
37
  logger.info("Processor loaded.")
38
 
39
  # ---------------------------
40
  # Helpers
41
  # ---------------------------
42
- def build_inputs(image: Image.Image, question: str):
43
- """
44
- Build Qwen-style multimodal chat inputs using qwen-vl-utils.
45
- Returns a dict of tensors ready for model.generate.
46
- """
47
- messages = [
48
  {
49
  "role": "user",
50
  "content": [
@@ -54,16 +61,45 @@ def build_inputs(image: Image.Image, question: str):
54
  }
55
  ]
56
 
 
 
 
 
 
 
 
 
 
57
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
58
  image_inputs, video_inputs = process_vision_info(messages)
59
 
60
- inputs = processor(
61
- text=[text],
62
- images=image_inputs,
63
- videos=video_inputs,
64
- padding=True,
65
- return_tensors="pt",
66
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  return inputs
68
 
69
  def format_derm_disclaimer(ans: str) -> str:
@@ -82,9 +118,11 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
82
  """
83
  Runs inside a ZeroGPU reservation window.
84
  Loads model on GPU, generates, frees VRAM.
 
85
  """
86
  if image is None:
87
  return "❌ Please upload an image first."
 
88
  try:
89
  logger.info(f"Loading model on GPU: {MODEL_ID}")
90
  model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -93,26 +131,40 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
93
  device_map="cuda",
94
  trust_remote_code=True,
95
  low_cpu_mem_usage=True,
96
- ignore_mismatched_sizes=True, # keep until your weights match exactly
97
  )
98
  logger.info("Model loaded successfully!")
99
 
100
- inputs = build_inputs(image, question)
101
- inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
102
-
103
- with torch.no_grad():
104
- out_ids = model.generate(
105
- **inputs,
106
- **GEN_KW,
107
- pad_token_id=processor.tokenizer.eos_token_id,
108
- )
109
-
110
- # strip prompt tokens before decoding
111
- trimmed = [o[len(i):] for i, o in zip(inputs["input_ids"], out_ids)]
112
- text = processor.batch_decode(
113
- trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
114
- )[0]
115
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  del model
117
  torch.cuda.empty_cache()
118
 
@@ -149,7 +201,7 @@ def create_interface() -> gr.Blocks:
149
  submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
150
  clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
151
 
152
- # Gradio 4.44.1: call queue() with no keyword args
153
  demo.queue()
154
 
155
  gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
@@ -164,7 +216,7 @@ def main():
164
  show_error=True,
165
  inbrowser=False,
166
  quiet=False,
167
- ssr_mode=False, # disable SSR to avoid Node 20 requirement in container
168
  )
169
 
170
  if __name__ == "__main__":
 
1
  # app.py
2
  # Dermatology-AI-Assistant — Hugging Face Space (ZeroGPU-ready)
3
+ # - Uses qwen-vl-utils for vision inputs
4
+ # - Acquires ZeroGPU only during inference
5
+ # - Handles Qwen2-VL token/feature mismatch with a safe fallback retry
6
 
7
  import os
8
  import logging
 
25
  # Config
26
  # ---------------------------
27
  MODEL_ID = os.environ.get("MODEL_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B")
 
28
  GEN_KW = dict(
29
  max_new_tokens=512,
30
  do_sample=True,
31
  temperature=0.7,
32
  top_p=0.9,
33
  )
 
34
  ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
35
 
36
  logger.info(f"Loading processor from: {MODEL_ID}")
37
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
38
+
39
+ # (Optional) Tame resolution to reduce tiling variance; adjust if you like.
40
+ if hasattr(processor, "image_processor"):
41
+ try:
42
+ # Keep images within a predictable pixel band so placeholder count is stable.
43
+ processor.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", "1500000")) # ~1.5MP
44
+ processor.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", "262144")) # 512x512
45
+ except Exception:
46
+ pass
47
+
48
  logger.info("Processor loaded.")
49
 
50
  # ---------------------------
51
  # Helpers
52
  # ---------------------------
53
+ def _messages(image: Image.Image, question: str):
54
+ return [
 
 
 
 
55
  {
56
  "role": "user",
57
  "content": [
 
61
  }
62
  ]
63
 
64
+ def build_inputs(image: Image.Image, question: str, *, disable_splitting: bool = False):
65
+ """
66
+ Build Qwen-style multimodal chat inputs.
67
+ When disable_splitting is True, we hint the image processor to avoid tiling,
68
+ which can fix token/feature mismatches for some edge cases.
69
+ """
70
+ messages = _messages(image, question)
71
+
72
+ # Apply chat template (inserts <image> placeholders automatically)
73
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
74
+
75
+ # Prepare vision inputs
76
  image_inputs, video_inputs = process_vision_info(messages)
77
 
78
+ # Optionally force-disable splitting (fallback path)
79
+ if disable_splitting and hasattr(processor, "image_processor"):
80
+ ip = processor.image_processor
81
+ # Cache old setting to not mutate global defaults permanently
82
+ prev = getattr(ip, "do_image_splitting", None)
83
+ try:
84
+ if hasattr(ip, "do_image_splitting"):
85
+ ip.do_image_splitting = False
86
+ inputs = processor(
87
+ text=[text],
88
+ images=image_inputs,
89
+ videos=video_inputs,
90
+ return_tensors="pt", # <- no padding for single-sample path
91
+ )
92
+ finally:
93
+ if prev is not None:
94
+ ip.do_image_splitting = prev
95
+ else:
96
+ inputs = processor(
97
+ text=[text],
98
+ images=image_inputs,
99
+ videos=video_inputs,
100
+ return_tensors="pt", # <- no padding to avoid mask quirks
101
+ )
102
+
103
  return inputs
104
 
105
  def format_derm_disclaimer(ans: str) -> str:
 
118
  """
119
  Runs inside a ZeroGPU reservation window.
120
  Loads model on GPU, generates, frees VRAM.
121
+ Includes a fallback retry if Qwen raises a token/feature mismatch.
122
  """
123
  if image is None:
124
  return "❌ Please upload an image first."
125
+
126
  try:
127
  logger.info(f"Loading model on GPU: {MODEL_ID}")
128
  model = Qwen2VLForConditionalGeneration.from_pretrained(
 
131
  device_map="cuda",
132
  trust_remote_code=True,
133
  low_cpu_mem_usage=True,
134
+ ignore_mismatched_sizes=True, # keep until weights align perfectly
135
  )
136
  logger.info("Model loaded successfully!")
137
 
138
+ def _run_infer(disable_splitting: bool = False) -> str:
139
+ inputs = build_inputs(image, question, disable_splitting=disable_splitting)
140
+ # Move tensors to CUDA
141
+ inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
142
+ with torch.no_grad():
143
+ out_ids = model.generate(
144
+ **inputs,
145
+ **GEN_KW,
146
+ pad_token_id=processor.tokenizer.eos_token_id,
147
+ )
148
+ # Strip prompt tokens before decoding
149
+ trimmed = [o[len(i):] for i, o in zip(inputs["input_ids"], out_ids)]
150
+ text = processor.batch_decode(
151
+ trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
152
+ )[0]
153
+ return text
154
+
155
+ # First attempt: normal path
156
+ try:
157
+ text = _run_infer(disable_splitting=False)
158
+ except ValueError as ve:
159
+ msg = str(ve)
160
+ # Known Qwen2-VL edge case: token/feature mismatch — retry with splitting disabled
161
+ if "Image features and image tokens do not match" in msg:
162
+ logger.warning("Token/feature mismatch detected — retrying with image splitting disabled.")
163
+ text = _run_infer(disable_splitting=True)
164
+ else:
165
+ raise
166
+
167
+ # Free VRAM
168
  del model
169
  torch.cuda.empty_cache()
170
 
 
201
  submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
202
  clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
203
 
204
+ # Gradio 4.44.1: simple queue() call (no kwargs)
205
  demo.queue()
206
 
207
  gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
 
216
  show_error=True,
217
  inbrowser=False,
218
  quiet=False,
219
+ ssr_mode=False, # avoid Node 20 requirement in container
220
  )
221
 
222
  if __name__ == "__main__":