ImageStudio

Runtime error

App Files Files Community

ImageStudio Maintainer Claude Opus 4.8 (1M context) commited on 8 days ago

Commit

3a2ca6a

1 Parent(s): 6efa78a

feat: add Reasoning On/Off toggle to Prompt Assistant (Qwen enable_thinking)

Browse files

Files changed (1) hide show

app.py +23 -6

app.py CHANGED Viewed

@@ -436,12 +436,18 @@ def _generate_image_inner(
 # Prompt Assistant (Qwen3.5-4B) — single-turn chat, optional image
 # =============================================================================
 @spaces.GPU
-def vlm_chat(message, image, max_new_tokens, progress=gr.Progress(track_tqdm=True)):
-    """Answer a single user message, optionally grounded on an uploaded image."""
     message = (message or "").strip()
     if not message and image is None:
         return "Please enter a question (and optionally attach an image)."
     _gpu_start = time.time()
     try:
         content = []
@@ -456,6 +462,7 @@ def vlm_chat(message, image, max_new_tokens, progress=gr.Progress(track_tqdm=Tru
             add_generation_prompt=True,
             return_dict=True,
             return_tensors="pt",
         ).to(vlm_model.device)
         with torch.inference_mode():
@@ -466,12 +473,16 @@ def vlm_chat(message, image, max_new_tokens, progress=gr.Progress(track_tqdm=Tru
             )
         # Drop the prompt tokens so only the freshly generated answer is decoded.
         trimmed = generated[0][inputs["input_ids"].shape[1]:]
-        text = vlm_processor.decode(trimmed, skip_special_tokens=True)
-        return text.strip()
     finally:
         print(
             f"[ImageStudio] Assistant GPU time: {time.time() - _gpu_start:.2f}s "
-            f"(has_image={image is not None}, max_new_tokens={int(max_new_tokens)})",
             flush=True,
         )
@@ -733,6 +744,12 @@ with gr.Blocks(fill_height=True) as demo:
                         lines=4,
                         max_lines=12,
                     )
                     with gr.Accordion("⚙️ Settings", open=False):
                         vlm_max_tokens = gr.Slider(
                             minimum=64, maximum=2048, value=512, step=64,
@@ -786,7 +803,7 @@ with gr.Blocks(fill_height=True) as demo:
     )
     # Prompt Assistant (Qwen3.5-4B) — single-turn, optional image
-    vlm_inputs = [vlm_prompt, vlm_image, vlm_max_tokens]
     vlm_btn.click(
         fn=vlm_chat, inputs=vlm_inputs, outputs=[vlm_output],
         api_name="prompt_assistant",

 # Prompt Assistant (Qwen3.5-4B) — single-turn chat, optional image
 # =============================================================================
 @spaces.GPU
+def vlm_chat(message, image, reasoning, max_new_tokens, progress=gr.Progress(track_tqdm=True)):
+    """Answer a single user message, optionally grounded on an uploaded image.
+    ``reasoning`` ("On"/"Off") drives Qwen's ``enable_thinking`` switch: Off skips
+    the <think> trace for a direct answer (best for prompt rewriting); On lets the
+    model reason step-by-step first (slower, needs more max_new_tokens).
+    """
     message = (message or "").strip()
     if not message and image is None:
         return "Please enter a question (and optionally attach an image)."
+    enable_thinking = (reasoning == "On")
     _gpu_start = time.time()
     try:
         content = []
             add_generation_prompt=True,
             return_dict=True,
             return_tensors="pt",
+            enable_thinking=enable_thinking,
         ).to(vlm_model.device)
         with torch.inference_mode():
             )
         # Drop the prompt tokens so only the freshly generated answer is decoded.
         trimmed = generated[0][inputs["input_ids"].shape[1]:]
+        text = vlm_processor.decode(trimmed, skip_special_tokens=True).strip()
+        # With reasoning off, drop any stray <think>…</think> block so the answer
+        # stays clean; with it on, keep the trace so the user can see it.
+        if not enable_thinking and "</think>" in text:
+            text = text.split("</think>")[-1].strip()
+        return text
     finally:
         print(
             f"[ImageStudio] Assistant GPU time: {time.time() - _gpu_start:.2f}s "
+            f"(has_image={image is not None}, reasoning={reasoning}, max_new_tokens={int(max_new_tokens)})",
             flush=True,
         )
                         lines=4,
                         max_lines=12,
                     )
+                    vlm_reasoning = gr.Radio(
+                        choices=["Off", "On"],
+                        value="Off",
+                        label="🧠 Reasoning",
+                        info="Off: direct answer, best for prompts • On: think step-by-step first (slower, raise max tokens)",
+                    )
                     with gr.Accordion("⚙️ Settings", open=False):
                         vlm_max_tokens = gr.Slider(
                             minimum=64, maximum=2048, value=512, step=64,
     )
     # Prompt Assistant (Qwen3.5-4B) — single-turn, optional image
+    vlm_inputs = [vlm_prompt, vlm_image, vlm_reasoning, vlm_max_tokens]
     vlm_btn.click(
         fn=vlm_chat, inputs=vlm_inputs, outputs=[vlm_output],
         api_name="prompt_assistant",