ImageStudio Maintainer Claude Opus 4.8 (1M context) commited on
Commit
3a2ca6a
Β·
1 Parent(s): 6efa78a

feat: add Reasoning On/Off toggle to Prompt Assistant (Qwen enable_thinking)

Browse files
Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -436,12 +436,18 @@ def _generate_image_inner(
436
  # Prompt Assistant (Qwen3.5-4B) β€” single-turn chat, optional image
437
  # =============================================================================
438
  @spaces.GPU
439
- def vlm_chat(message, image, max_new_tokens, progress=gr.Progress(track_tqdm=True)):
440
- """Answer a single user message, optionally grounded on an uploaded image."""
 
 
 
 
 
441
  message = (message or "").strip()
442
  if not message and image is None:
443
  return "Please enter a question (and optionally attach an image)."
444
 
 
445
  _gpu_start = time.time()
446
  try:
447
  content = []
@@ -456,6 +462,7 @@ def vlm_chat(message, image, max_new_tokens, progress=gr.Progress(track_tqdm=Tru
456
  add_generation_prompt=True,
457
  return_dict=True,
458
  return_tensors="pt",
 
459
  ).to(vlm_model.device)
460
 
461
  with torch.inference_mode():
@@ -466,12 +473,16 @@ def vlm_chat(message, image, max_new_tokens, progress=gr.Progress(track_tqdm=Tru
466
  )
467
  # Drop the prompt tokens so only the freshly generated answer is decoded.
468
  trimmed = generated[0][inputs["input_ids"].shape[1]:]
469
- text = vlm_processor.decode(trimmed, skip_special_tokens=True)
470
- return text.strip()
 
 
 
 
471
  finally:
472
  print(
473
  f"[ImageStudio] Assistant GPU time: {time.time() - _gpu_start:.2f}s "
474
- f"(has_image={image is not None}, max_new_tokens={int(max_new_tokens)})",
475
  flush=True,
476
  )
477
 
@@ -733,6 +744,12 @@ with gr.Blocks(fill_height=True) as demo:
733
  lines=4,
734
  max_lines=12,
735
  )
 
 
 
 
 
 
736
  with gr.Accordion("βš™οΈ Settings", open=False):
737
  vlm_max_tokens = gr.Slider(
738
  minimum=64, maximum=2048, value=512, step=64,
@@ -786,7 +803,7 @@ with gr.Blocks(fill_height=True) as demo:
786
  )
787
 
788
  # Prompt Assistant (Qwen3.5-4B) β€” single-turn, optional image
789
- vlm_inputs = [vlm_prompt, vlm_image, vlm_max_tokens]
790
  vlm_btn.click(
791
  fn=vlm_chat, inputs=vlm_inputs, outputs=[vlm_output],
792
  api_name="prompt_assistant",
 
436
  # Prompt Assistant (Qwen3.5-4B) β€” single-turn chat, optional image
437
  # =============================================================================
438
  @spaces.GPU
439
+ def vlm_chat(message, image, reasoning, max_new_tokens, progress=gr.Progress(track_tqdm=True)):
440
+ """Answer a single user message, optionally grounded on an uploaded image.
441
+
442
+ ``reasoning`` ("On"/"Off") drives Qwen's ``enable_thinking`` switch: Off skips
443
+ the <think> trace for a direct answer (best for prompt rewriting); On lets the
444
+ model reason step-by-step first (slower, needs more max_new_tokens).
445
+ """
446
  message = (message or "").strip()
447
  if not message and image is None:
448
  return "Please enter a question (and optionally attach an image)."
449
 
450
+ enable_thinking = (reasoning == "On")
451
  _gpu_start = time.time()
452
  try:
453
  content = []
 
462
  add_generation_prompt=True,
463
  return_dict=True,
464
  return_tensors="pt",
465
+ enable_thinking=enable_thinking,
466
  ).to(vlm_model.device)
467
 
468
  with torch.inference_mode():
 
473
  )
474
  # Drop the prompt tokens so only the freshly generated answer is decoded.
475
  trimmed = generated[0][inputs["input_ids"].shape[1]:]
476
+ text = vlm_processor.decode(trimmed, skip_special_tokens=True).strip()
477
+ # With reasoning off, drop any stray <think>…</think> block so the answer
478
+ # stays clean; with it on, keep the trace so the user can see it.
479
+ if not enable_thinking and "</think>" in text:
480
+ text = text.split("</think>")[-1].strip()
481
+ return text
482
  finally:
483
  print(
484
  f"[ImageStudio] Assistant GPU time: {time.time() - _gpu_start:.2f}s "
485
+ f"(has_image={image is not None}, reasoning={reasoning}, max_new_tokens={int(max_new_tokens)})",
486
  flush=True,
487
  )
488
 
 
744
  lines=4,
745
  max_lines=12,
746
  )
747
+ vlm_reasoning = gr.Radio(
748
+ choices=["Off", "On"],
749
+ value="Off",
750
+ label="🧠 Reasoning",
751
+ info="Off: direct answer, best for prompts β€’ On: think step-by-step first (slower, raise max tokens)",
752
+ )
753
  with gr.Accordion("βš™οΈ Settings", open=False):
754
  vlm_max_tokens = gr.Slider(
755
  minimum=64, maximum=2048, value=512, step=64,
 
803
  )
804
 
805
  # Prompt Assistant (Qwen3.5-4B) β€” single-turn, optional image
806
+ vlm_inputs = [vlm_prompt, vlm_image, vlm_reasoning, vlm_max_tokens]
807
  vlm_btn.click(
808
  fn=vlm_chat, inputs=vlm_inputs, outputs=[vlm_output],
809
  api_name="prompt_assistant",