ColdSlim commited on
Commit
e79ec61
·
verified ·
1 Parent(s): 3ad2951

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -22
app.py CHANGED
@@ -1,9 +1,10 @@
1
  # app.py
2
  # Dermatology-AI-Assistant — HF Spaces (ZeroGPU)
3
- # - Uses AutoModelForCausalLM + trust_remote_code to avoid class import issues
4
- # - Robust FT->Base fallback on load/mismatch
5
- # - qwen-vl-utils for vision preprocessing
6
  # - ZeroGPU only during inference
 
7
 
8
  import os
9
  import logging
@@ -31,23 +32,26 @@ GEN_KW = dict(
31
  temperature=0.7,
32
  top_p=0.9,
33
  )
34
-
35
  ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
36
 
37
- # Preload only FT processor on CPU (we may swap to base in fallback)
38
- logger.info(f"Loading processor from: {FT_MODEL_ID}")
39
- ft_processor = AutoProcessor.from_pretrained(FT_MODEL_ID, trust_remote_code=True)
40
- logger.info("Processor loaded.")
 
 
41
 
42
  def _tune_image_processor(proc):
43
  if hasattr(proc, "image_processor"):
44
  try:
 
45
  proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", "1500000")) # ~1.5MP
46
  proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", "262144")) # 512x512
 
47
  except Exception:
48
  pass
49
 
50
- _tune_image_processor(ft_processor)
51
 
52
  # ---------------------------
53
  # Helpers
@@ -67,7 +71,7 @@ def build_inputs(processor: AutoProcessor, image: Image.Image, question: str):
67
  messages = _messages(image, question)
68
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
69
  image_inputs, video_inputs = process_vision_info(messages)
70
- # single-sample no padding to avoid mask quirks
71
  inputs = processor(text=[text], images=image_inputs, videos=video_inputs, return_tensors="pt")
72
  return inputs
73
 
@@ -78,7 +82,7 @@ def _pad_token_id(processor, model):
78
  return getattr(getattr(model, "config", None), "eos_token_id", 0)
79
 
80
  def _generate_text(model, processor, inputs: dict) -> str:
81
- # move tensors to CUDA
82
  inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
83
  with torch.no_grad():
84
  out_ids = model.generate(
@@ -98,9 +102,9 @@ def format_derm_disclaimer(ans: str) -> str:
98
  )
99
  return ans + tail
100
 
101
- def try_load_model(model_id: str, *, allow_mismatch: bool = True) -> Tuple[Optional[AutoModelForCausalLM], Optional[str]]:
102
  """
103
- Load Qwen2.5-VL via AutoModelForCausalLM with trust_remote_code (works across transformers versions).
104
  """
105
  try:
106
  logger.info(f"Loading model on GPU: {model_id}")
@@ -108,7 +112,7 @@ def try_load_model(model_id: str, *, allow_mismatch: bool = True) -> Tuple[Optio
108
  model_id,
109
  torch_dtype=torch.float16,
110
  device_map="cuda",
111
- trust_remote_code=True, # let repo code provide the class if needed
112
  low_cpu_mem_usage=True,
113
  ignore_mismatched_sizes=allow_mismatch,
114
  offload_state_dict=True,
@@ -125,21 +129,25 @@ def try_load_model(model_id: str, *, allow_mismatch: bool = True) -> Tuple[Optio
125
  @spaces.GPU(duration=ZGPU_DURATION)
126
  def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
127
  """
128
- Try FT model first; on ANY load/generation error, fall back to base model+processor.
 
129
  """
130
  if image is None:
131
  return "❌ Please upload an image first."
132
 
133
  model = None
134
  try:
 
 
 
135
  # Attempt 1: fine-tuned model
136
  model, ft_err = try_load_model(FT_MODEL_ID, allow_mismatch=True)
137
  if model is not None:
138
  try:
139
- inputs = build_inputs(ft_processor, image, question)
140
- text = _generate_text(model, ft_processor, inputs)
141
  return format_derm_disclaimer(text)
142
  except ValueError as ve:
 
143
  if "Image features and image tokens do not match" in str(ve):
144
  logger.warning("Token/feature mismatch on FT model — falling back to base.")
145
  else:
@@ -155,15 +163,12 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
155
  model = None
156
  torch.cuda.empty_cache()
157
 
158
- # Attempt 2: base model + its processor
159
- base_processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
160
- _tune_image_processor(base_processor)
161
  model, base_err = try_load_model(BASE_MODEL_ID, allow_mismatch=False)
162
  if model is None:
163
  return f"❌ Error loading models.\n- FT: {ft_err}\n- BASE: {base_err}"
164
 
165
- base_inputs = build_inputs(base_processor, image, question)
166
- text = _generate_text(model, base_processor, base_inputs)
167
  return format_derm_disclaimer(text)
168
 
169
  except Exception as e:
 
1
  # app.py
2
  # Dermatology-AI-Assistant — HF Spaces (ZeroGPU)
3
+ # - Processor is ALWAYS from the official base (FT repo lacks a processor)
4
+ # - Tries FT model first, then falls back to base on any load/generation error
5
+ # - Uses qwen-vl-utils for Qwen2.5-VL vision preprocessing
6
  # - ZeroGPU only during inference
7
+ # - No runtime pip; pin versions in requirements.txt
8
 
9
  import os
10
  import logging
 
32
  temperature=0.7,
33
  top_p=0.9,
34
  )
 
35
  ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
36
 
37
+ # ---------------------------
38
+ # Processor (ALWAYS base)
39
+ # ---------------------------
40
+ logger.info(f"Loading processor from base model: {BASE_MODEL_ID}")
41
+ base_processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
42
+ logger.info("Processor loaded from base.")
43
 
44
  def _tune_image_processor(proc):
45
  if hasattr(proc, "image_processor"):
46
  try:
47
+ # Keep image size in a predictable range to stabilize placeholders/tiles.
48
  proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", "1500000")) # ~1.5MP
49
  proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", "262144")) # 512x512
50
+ # If the class exposes splitting flag, prefer default behavior (do not force-disable globally).
51
  except Exception:
52
  pass
53
 
54
+ _tune_image_processor(base_processor)
55
 
56
  # ---------------------------
57
  # Helpers
 
71
  messages = _messages(image, question)
72
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
73
  image_inputs, video_inputs = process_vision_info(messages)
74
+ # Single-sample: no padding to avoid mask quirks.
75
  inputs = processor(text=[text], images=image_inputs, videos=video_inputs, return_tensors="pt")
76
  return inputs
77
 
 
82
  return getattr(getattr(model, "config", None), "eos_token_id", 0)
83
 
84
  def _generate_text(model, processor, inputs: dict) -> str:
85
+ # Move tensors to CUDA
86
  inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
87
  with torch.no_grad():
88
  out_ids = model.generate(
 
102
  )
103
  return ans + tail
104
 
105
+ def try_load_model(model_id: str, *, allow_mismatch: bool) -> Tuple[Optional[AutoModelForCausalLM], Optional[str]]:
106
  """
107
+ Load Qwen2.5-VL via AutoModelForCausalLM with trust_remote_code.
108
  """
109
  try:
110
  logger.info(f"Loading model on GPU: {model_id}")
 
112
  model_id,
113
  torch_dtype=torch.float16,
114
  device_map="cuda",
115
+ trust_remote_code=True,
116
  low_cpu_mem_usage=True,
117
  ignore_mismatched_sizes=allow_mismatch,
118
  offload_state_dict=True,
 
129
  @spaces.GPU(duration=ZGPU_DURATION)
130
  def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
131
  """
132
+ Use base processor for both models.
133
+ Try FT model first; on ANY load/generation error, fall back to base model.
134
  """
135
  if image is None:
136
  return "❌ Please upload an image first."
137
 
138
  model = None
139
  try:
140
+ # Build inputs once (with base processor) — valid for both models
141
+ inputs = build_inputs(base_processor, image, question)
142
+
143
  # Attempt 1: fine-tuned model
144
  model, ft_err = try_load_model(FT_MODEL_ID, allow_mismatch=True)
145
  if model is not None:
146
  try:
147
+ text = _generate_text(model, base_processor, inputs)
 
148
  return format_derm_disclaimer(text)
149
  except ValueError as ve:
150
+ # Known Qwen placeholder mismatch path — just fall back.
151
  if "Image features and image tokens do not match" in str(ve):
152
  logger.warning("Token/feature mismatch on FT model — falling back to base.")
153
  else:
 
163
  model = None
164
  torch.cuda.empty_cache()
165
 
166
+ # Attempt 2: base model
 
 
167
  model, base_err = try_load_model(BASE_MODEL_ID, allow_mismatch=False)
168
  if model is None:
169
  return f"❌ Error loading models.\n- FT: {ft_err}\n- BASE: {base_err}"
170
 
171
+ text = _generate_text(model, base_processor, inputs)
 
172
  return format_derm_disclaimer(text)
173
 
174
  except Exception as e: