Spaces:
Runtime error
Runtime error
ImageStudio Maintainer Claude Opus 4.8 (1M context) commited on
Commit Β·
3a2ca6a
1
Parent(s): 6efa78a
feat: add Reasoning On/Off toggle to Prompt Assistant (Qwen enable_thinking)
Browse files
app.py
CHANGED
|
@@ -436,12 +436,18 @@ def _generate_image_inner(
|
|
| 436 |
# Prompt Assistant (Qwen3.5-4B) β single-turn chat, optional image
|
| 437 |
# =============================================================================
|
| 438 |
@spaces.GPU
|
| 439 |
-
def vlm_chat(message, image, max_new_tokens, progress=gr.Progress(track_tqdm=True)):
|
| 440 |
-
"""Answer a single user message, optionally grounded on an uploaded image.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
message = (message or "").strip()
|
| 442 |
if not message and image is None:
|
| 443 |
return "Please enter a question (and optionally attach an image)."
|
| 444 |
|
|
|
|
| 445 |
_gpu_start = time.time()
|
| 446 |
try:
|
| 447 |
content = []
|
|
@@ -456,6 +462,7 @@ def vlm_chat(message, image, max_new_tokens, progress=gr.Progress(track_tqdm=Tru
|
|
| 456 |
add_generation_prompt=True,
|
| 457 |
return_dict=True,
|
| 458 |
return_tensors="pt",
|
|
|
|
| 459 |
).to(vlm_model.device)
|
| 460 |
|
| 461 |
with torch.inference_mode():
|
|
@@ -466,12 +473,16 @@ def vlm_chat(message, image, max_new_tokens, progress=gr.Progress(track_tqdm=Tru
|
|
| 466 |
)
|
| 467 |
# Drop the prompt tokens so only the freshly generated answer is decoded.
|
| 468 |
trimmed = generated[0][inputs["input_ids"].shape[1]:]
|
| 469 |
-
text = vlm_processor.decode(trimmed, skip_special_tokens=True)
|
| 470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
finally:
|
| 472 |
print(
|
| 473 |
f"[ImageStudio] Assistant GPU time: {time.time() - _gpu_start:.2f}s "
|
| 474 |
-
f"(has_image={image is not None}, max_new_tokens={int(max_new_tokens)})",
|
| 475 |
flush=True,
|
| 476 |
)
|
| 477 |
|
|
@@ -733,6 +744,12 @@ with gr.Blocks(fill_height=True) as demo:
|
|
| 733 |
lines=4,
|
| 734 |
max_lines=12,
|
| 735 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 736 |
with gr.Accordion("βοΈ Settings", open=False):
|
| 737 |
vlm_max_tokens = gr.Slider(
|
| 738 |
minimum=64, maximum=2048, value=512, step=64,
|
|
@@ -786,7 +803,7 @@ with gr.Blocks(fill_height=True) as demo:
|
|
| 786 |
)
|
| 787 |
|
| 788 |
# Prompt Assistant (Qwen3.5-4B) β single-turn, optional image
|
| 789 |
-
vlm_inputs = [vlm_prompt, vlm_image, vlm_max_tokens]
|
| 790 |
vlm_btn.click(
|
| 791 |
fn=vlm_chat, inputs=vlm_inputs, outputs=[vlm_output],
|
| 792 |
api_name="prompt_assistant",
|
|
|
|
| 436 |
# Prompt Assistant (Qwen3.5-4B) β single-turn chat, optional image
|
| 437 |
# =============================================================================
|
| 438 |
@spaces.GPU
|
| 439 |
+
def vlm_chat(message, image, reasoning, max_new_tokens, progress=gr.Progress(track_tqdm=True)):
|
| 440 |
+
"""Answer a single user message, optionally grounded on an uploaded image.
|
| 441 |
+
|
| 442 |
+
``reasoning`` ("On"/"Off") drives Qwen's ``enable_thinking`` switch: Off skips
|
| 443 |
+
the <think> trace for a direct answer (best for prompt rewriting); On lets the
|
| 444 |
+
model reason step-by-step first (slower, needs more max_new_tokens).
|
| 445 |
+
"""
|
| 446 |
message = (message or "").strip()
|
| 447 |
if not message and image is None:
|
| 448 |
return "Please enter a question (and optionally attach an image)."
|
| 449 |
|
| 450 |
+
enable_thinking = (reasoning == "On")
|
| 451 |
_gpu_start = time.time()
|
| 452 |
try:
|
| 453 |
content = []
|
|
|
|
| 462 |
add_generation_prompt=True,
|
| 463 |
return_dict=True,
|
| 464 |
return_tensors="pt",
|
| 465 |
+
enable_thinking=enable_thinking,
|
| 466 |
).to(vlm_model.device)
|
| 467 |
|
| 468 |
with torch.inference_mode():
|
|
|
|
| 473 |
)
|
| 474 |
# Drop the prompt tokens so only the freshly generated answer is decoded.
|
| 475 |
trimmed = generated[0][inputs["input_ids"].shape[1]:]
|
| 476 |
+
text = vlm_processor.decode(trimmed, skip_special_tokens=True).strip()
|
| 477 |
+
# With reasoning off, drop any stray <think>β¦</think> block so the answer
|
| 478 |
+
# stays clean; with it on, keep the trace so the user can see it.
|
| 479 |
+
if not enable_thinking and "</think>" in text:
|
| 480 |
+
text = text.split("</think>")[-1].strip()
|
| 481 |
+
return text
|
| 482 |
finally:
|
| 483 |
print(
|
| 484 |
f"[ImageStudio] Assistant GPU time: {time.time() - _gpu_start:.2f}s "
|
| 485 |
+
f"(has_image={image is not None}, reasoning={reasoning}, max_new_tokens={int(max_new_tokens)})",
|
| 486 |
flush=True,
|
| 487 |
)
|
| 488 |
|
|
|
|
| 744 |
lines=4,
|
| 745 |
max_lines=12,
|
| 746 |
)
|
| 747 |
+
vlm_reasoning = gr.Radio(
|
| 748 |
+
choices=["Off", "On"],
|
| 749 |
+
value="Off",
|
| 750 |
+
label="π§ Reasoning",
|
| 751 |
+
info="Off: direct answer, best for prompts β’ On: think step-by-step first (slower, raise max tokens)",
|
| 752 |
+
)
|
| 753 |
with gr.Accordion("βοΈ Settings", open=False):
|
| 754 |
vlm_max_tokens = gr.Slider(
|
| 755 |
minimum=64, maximum=2048, value=512, step=64,
|
|
|
|
| 803 |
)
|
| 804 |
|
| 805 |
# Prompt Assistant (Qwen3.5-4B) β single-turn, optional image
|
| 806 |
+
vlm_inputs = [vlm_prompt, vlm_image, vlm_reasoning, vlm_max_tokens]
|
| 807 |
vlm_btn.click(
|
| 808 |
fn=vlm_chat, inputs=vlm_inputs, outputs=[vlm_output],
|
| 809 |
api_name="prompt_assistant",
|