multimodalart HF Staff commited on
Commit
bbc2e65
·
verified ·
1 Parent(s): 999f78d

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +21 -1
  2. requirements.txt +2 -2
app.py CHANGED
@@ -212,13 +212,33 @@ def qwen3_vl_predict(model, processor, messages, max_new_tokens=4096):
212
  inputs = inputs.to(model.device)
213
  print(f"[DEBUG qwen3_vl_predict] Input keys: {inputs.keys() if hasattr(inputs, 'keys') else type(inputs)}")
214
  print(f"[DEBUG qwen3_vl_predict] Input IDs shape: {inputs.input_ids.shape if hasattr(inputs, 'input_ids') else 'N/A'}")
 
 
 
 
215
  with torch.inference_mode():
216
- generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
 
 
 
 
 
 
 
 
217
  print(f"[DEBUG qwen3_vl_predict] Generated IDs shape: {generated_ids.shape}")
 
 
 
218
  generated_ids_trimmed = [
219
  out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
220
  ]
221
  print(f"[DEBUG qwen3_vl_predict] Trimmed lengths: {[len(t) for t in generated_ids_trimmed]}")
 
 
 
 
 
222
  output_text = processor.batch_decode(
223
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
224
  )
 
212
  inputs = inputs.to(model.device)
213
  print(f"[DEBUG qwen3_vl_predict] Input keys: {inputs.keys() if hasattr(inputs, 'keys') else type(inputs)}")
214
  print(f"[DEBUG qwen3_vl_predict] Input IDs shape: {inputs.input_ids.shape if hasattr(inputs, 'input_ids') else 'N/A'}")
215
+ # Print last 20 tokens of input to see if generation prompt is correct
216
+ input_ids_list = inputs.input_ids[0].tolist()
217
+ print(f"[DEBUG qwen3_vl_predict] Last 20 input tokens: {input_ids_list[-20:]}")
218
+ print(f"[DEBUG qwen3_vl_predict] Decoded last 20: {processor.tokenizer.decode(input_ids_list[-20:])}")
219
  with torch.inference_mode():
220
+ generated_ids = model.generate(
221
+ **inputs,
222
+ max_new_tokens=max_new_tokens,
223
+ do_sample=True,
224
+ temperature=0.7,
225
+ top_p=0.8,
226
+ top_k=20,
227
+ use_cache=True,
228
+ )
229
  print(f"[DEBUG qwen3_vl_predict] Generated IDs shape: {generated_ids.shape}")
230
+ # Check what the generated token is
231
+ gen_tokens = generated_ids[0].tolist()
232
+ print(f"[DEBUG qwen3_vl_predict] Last 5 generated tokens: {gen_tokens[-5:]}")
233
  generated_ids_trimmed = [
234
  out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
235
  ]
236
  print(f"[DEBUG qwen3_vl_predict] Trimmed lengths: {[len(t) for t in generated_ids_trimmed]}")
237
+ # Try decoding without skip_special_tokens to see what's there
238
+ raw_decode = processor.batch_decode(
239
+ generated_ids_trimmed, skip_special_tokens=False, clean_up_tokenization_spaces=False
240
+ )
241
+ print(f"[DEBUG qwen3_vl_predict] Raw decode (no skip): {raw_decode[0][:200] if raw_decode else 'empty'}")
242
  output_text = processor.batch_decode(
243
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
244
  )
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- transformers==4.57.1
2
- diffusers>=0.37.0
3
  accelerate
4
  bitsandbytes
5
  sentencepiece
 
1
+ transformers
2
+ diffusers
3
  accelerate
4
  bitsandbytes
5
  sentencepiece