Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -299,7 +299,7 @@ def normalize_task_history_item(item):
|
|
| 299 |
def _launch_demo(args):
|
| 300 |
"""Launch the Gradio demo interface"""
|
| 301 |
|
| 302 |
-
@spaces.GPU(duration=
|
| 303 |
def call_local_model(messages, system_prompt, temperature, top_p, max_tokens,
|
| 304 |
do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
|
| 305 |
"""Call the local model with streaming response - loads model lazily"""
|
|
@@ -351,7 +351,7 @@ def _launch_demo(args):
|
|
| 351 |
if "</answer>" in display_text:
|
| 352 |
display_text = display_text.replace("</answer>", "")
|
| 353 |
yield display_text, generated_text
|
| 354 |
-
|
| 355 |
def predict(_chatbot, task_history, system_prompt, temperature, top_p, max_tokens,
|
| 356 |
do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
|
| 357 |
if not _chatbot or not task_history:
|
|
@@ -414,7 +414,7 @@ def _launch_demo(args):
|
|
| 414 |
_chatbot[-1] = (_parse_text(chat_query), error_msg)
|
| 415 |
task_history[-1]['response'] = error_msg
|
| 416 |
yield _chatbot
|
| 417 |
-
|
| 418 |
def regenerate(_chatbot, task_history, system_prompt, temperature, top_p, max_tokens,
|
| 419 |
do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
|
| 420 |
if not task_history or not _chatbot:
|
|
@@ -536,7 +536,7 @@ def _launch_demo(args):
|
|
| 536 |
info="Cumulative probability for token selection"
|
| 537 |
)
|
| 538 |
max_tokens = gr.Slider(
|
| 539 |
-
minimum=256, maximum=
|
| 540 |
label="Max Tokens",
|
| 541 |
info="Maximum number of tokens to generate"
|
| 542 |
)
|
|
|
|
| 299 |
def _launch_demo(args):
|
| 300 |
"""Launch the Gradio demo interface"""
|
| 301 |
|
| 302 |
+
@spaces.GPU(duration=360)
|
| 303 |
def call_local_model(messages, system_prompt, temperature, top_p, max_tokens,
|
| 304 |
do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
|
| 305 |
"""Call the local model with streaming response - loads model lazily"""
|
|
|
|
| 351 |
if "</answer>" in display_text:
|
| 352 |
display_text = display_text.replace("</answer>", "")
|
| 353 |
yield display_text, generated_text
|
| 354 |
+
@spaces.GPU(duration=360)
|
| 355 |
def predict(_chatbot, task_history, system_prompt, temperature, top_p, max_tokens,
|
| 356 |
do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
|
| 357 |
if not _chatbot or not task_history:
|
|
|
|
| 414 |
_chatbot[-1] = (_parse_text(chat_query), error_msg)
|
| 415 |
task_history[-1]['response'] = error_msg
|
| 416 |
yield _chatbot
|
| 417 |
+
@spaces.GPU(duration=360)
|
| 418 |
def regenerate(_chatbot, task_history, system_prompt, temperature, top_p, max_tokens,
|
| 419 |
do_pan_and_scan, pan_scan_max_crops, pan_scan_min_ratio):
|
| 420 |
if not task_history or not _chatbot:
|
|
|
|
| 536 |
info="Cumulative probability for token selection"
|
| 537 |
)
|
| 538 |
max_tokens = gr.Slider(
|
| 539 |
+
minimum=256, maximum=32768, value=8192, step=256,
|
| 540 |
label="Max Tokens",
|
| 541 |
info="Maximum number of tokens to generate"
|
| 542 |
)
|