Spaces:
Running on Zero
Running on Zero
Upload folder using huggingface_hub
Browse files- app.py +21 -1
- requirements.txt +2 -2
app.py
CHANGED
|
@@ -212,13 +212,33 @@ def qwen3_vl_predict(model, processor, messages, max_new_tokens=4096):
|
|
| 212 |
inputs = inputs.to(model.device)
|
| 213 |
print(f"[DEBUG qwen3_vl_predict] Input keys: {inputs.keys() if hasattr(inputs, 'keys') else type(inputs)}")
|
| 214 |
print(f"[DEBUG qwen3_vl_predict] Input IDs shape: {inputs.input_ids.shape if hasattr(inputs, 'input_ids') else 'N/A'}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
with torch.inference_mode():
|
| 216 |
-
generated_ids = model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
print(f"[DEBUG qwen3_vl_predict] Generated IDs shape: {generated_ids.shape}")
|
|
|
|
|
|
|
|
|
|
| 218 |
generated_ids_trimmed = [
|
| 219 |
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
| 220 |
]
|
| 221 |
print(f"[DEBUG qwen3_vl_predict] Trimmed lengths: {[len(t) for t in generated_ids_trimmed]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
output_text = processor.batch_decode(
|
| 223 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 224 |
)
|
|
|
|
| 212 |
inputs = inputs.to(model.device)
|
| 213 |
print(f"[DEBUG qwen3_vl_predict] Input keys: {inputs.keys() if hasattr(inputs, 'keys') else type(inputs)}")
|
| 214 |
print(f"[DEBUG qwen3_vl_predict] Input IDs shape: {inputs.input_ids.shape if hasattr(inputs, 'input_ids') else 'N/A'}")
|
| 215 |
+
# Print last 20 tokens of input to see if generation prompt is correct
|
| 216 |
+
input_ids_list = inputs.input_ids[0].tolist()
|
| 217 |
+
print(f"[DEBUG qwen3_vl_predict] Last 20 input tokens: {input_ids_list[-20:]}")
|
| 218 |
+
print(f"[DEBUG qwen3_vl_predict] Decoded last 20: {processor.tokenizer.decode(input_ids_list[-20:])}")
|
| 219 |
with torch.inference_mode():
|
| 220 |
+
generated_ids = model.generate(
|
| 221 |
+
**inputs,
|
| 222 |
+
max_new_tokens=max_new_tokens,
|
| 223 |
+
do_sample=True,
|
| 224 |
+
temperature=0.7,
|
| 225 |
+
top_p=0.8,
|
| 226 |
+
top_k=20,
|
| 227 |
+
use_cache=True,
|
| 228 |
+
)
|
| 229 |
print(f"[DEBUG qwen3_vl_predict] Generated IDs shape: {generated_ids.shape}")
|
| 230 |
+
# Check what the generated token is
|
| 231 |
+
gen_tokens = generated_ids[0].tolist()
|
| 232 |
+
print(f"[DEBUG qwen3_vl_predict] Last 5 generated tokens: {gen_tokens[-5:]}")
|
| 233 |
generated_ids_trimmed = [
|
| 234 |
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
| 235 |
]
|
| 236 |
print(f"[DEBUG qwen3_vl_predict] Trimmed lengths: {[len(t) for t in generated_ids_trimmed]}")
|
| 237 |
+
# Try decoding without skip_special_tokens to see what's there
|
| 238 |
+
raw_decode = processor.batch_decode(
|
| 239 |
+
generated_ids_trimmed, skip_special_tokens=False, clean_up_tokenization_spaces=False
|
| 240 |
+
)
|
| 241 |
+
print(f"[DEBUG qwen3_vl_predict] Raw decode (no skip): {raw_decode[0][:200] if raw_decode else 'empty'}")
|
| 242 |
output_text = processor.batch_decode(
|
| 243 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 244 |
)
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
transformers
|
| 2 |
-
diffusers
|
| 3 |
accelerate
|
| 4 |
bitsandbytes
|
| 5 |
sentencepiece
|
|
|
|
| 1 |
+
transformers
|
| 2 |
+
diffusers
|
| 3 |
accelerate
|
| 4 |
bitsandbytes
|
| 5 |
sentencepiece
|