Spaces:

qiuxi337
/

IntrinSight-4B-Demo

Running on Zero

App Files Files Community

qiuxi337 commited on Dec 31, 2025

Commit

26412dd

verified ·

1 Parent(s): d79bb12

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -4

app.py CHANGED Viewed

@@ -299,7 +299,7 @@ def normalize_task_history_item(item):
 def _launch_demo(args):
     """Launch the Gradio demo interface"""
-    @spaces.GPU(duration=180)
     def call_local_model(messages, system_prompt, temperature, top_p, max_tokens,
                          do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
         """Call the local model with streaming response - loads model lazily"""
@@ -351,7 +351,7 @@ def _launch_demo(args):
                 if "</answer>" in display_text:
                     display_text = display_text.replace("</answer>", "")
                 yield display_text, generated_text
     def predict(_chatbot, task_history, system_prompt, temperature, top_p, max_tokens,
                 do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
         if not _chatbot or not task_history:
@@ -414,7 +414,7 @@ def _launch_demo(args):
             _chatbot[-1] = (_parse_text(chat_query), error_msg)
             task_history[-1]['response'] = error_msg
             yield _chatbot
     def regenerate(_chatbot, task_history, system_prompt, temperature, top_p, max_tokens,
                    do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
         if not task_history or not _chatbot:
@@ -536,7 +536,7 @@ def _launch_demo(args):
                         info="Cumulative probability for token selection"
                     )
                     max_tokens = gr.Slider(
-                        minimum=256, maximum=4096, value=2048, step=256,
                         label="Max Tokens",
                         info="Maximum number of tokens to generate"
                     )

 def _launch_demo(args):
     """Launch the Gradio demo interface"""
+    @spaces.GPU(duration=360)
     def call_local_model(messages, system_prompt, temperature, top_p, max_tokens,
                          do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
         """Call the local model with streaming response - loads model lazily"""
                 if "</answer>" in display_text:
                     display_text = display_text.replace("</answer>", "")
                 yield display_text, generated_text
+    @spaces.GPU(duration=360)
     def predict(_chatbot, task_history, system_prompt, temperature, top_p, max_tokens,
                 do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
         if not _chatbot or not task_history:
             _chatbot[-1] = (_parse_text(chat_query), error_msg)
             task_history[-1]['response'] = error_msg
             yield _chatbot
+    @spaces.GPU(duration=360)
     def regenerate(_chatbot, task_history, system_prompt, temperature, top_p, max_tokens,
                    do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
         if not task_history or not _chatbot:
                         info="Cumulative probability for token selection"
                     )
                     max_tokens = gr.Slider(
+                        minimum=256, maximum=32768, value=8192, step=256,
                         label="Max Tokens",
                         info="Maximum number of tokens to generate"
                     )