Update app.py
Browse files
app.py
CHANGED
|
@@ -36,6 +36,9 @@ VLLM_MODEL = os.environ.get('MODEL_NAME', 'google/gemma-3-27b-it')
|
|
| 36 |
HYPERBOLIC_KEY = os.environ.get('HYPERBOLIC_XYZ_KEY')
|
| 37 |
FALLBACK_MODEL = 'Qwen/Qwen2.5-VL-72B-Instruct' # Fallback model at Hyperbolic
|
| 38 |
|
|
|
|
|
|
|
|
|
|
| 39 |
# API endpoints
|
| 40 |
VLLM_ENDPOINT = "http://localhost:" + str(LOCAL_PORT) + "/v1"
|
| 41 |
HYPERBOLIC_ENDPOINT = "https://api.hyperbolic.xyz/v1"
|
|
@@ -392,7 +395,7 @@ def get_tunnel_status_message():
|
|
| 392 |
"""
|
| 393 |
Return a formatted status message for display in the UI.
|
| 394 |
"""
|
| 395 |
-
global tunnel_status, use_fallback
|
| 396 |
|
| 397 |
api_mode = "Hyperbolic API" if use_fallback else "Local vLLM API"
|
| 398 |
model = get_model_name()
|
|
@@ -400,7 +403,7 @@ def get_tunnel_status_message():
|
|
| 400 |
status_color = "🟢" if (tunnel_status["is_running"] and not use_fallback) else "🔴"
|
| 401 |
status_text = tunnel_status["message"]
|
| 402 |
|
| 403 |
-
return f"{status_color} Tunnel Status: {status_text}\nCurrent API: {api_mode}\nCurrent Model: {model}"
|
| 404 |
|
| 405 |
def toggle_api():
|
| 406 |
"""
|
|
@@ -414,6 +417,30 @@ def toggle_api():
|
|
| 414 |
|
| 415 |
return f"Switched to {api_mode} using {model}"
|
| 416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
# Start the SSH tunnel in a background thread
|
| 418 |
if __name__ == "__main__":
|
| 419 |
# Start the SSH tunnel
|
|
@@ -451,22 +478,24 @@ if __name__ == "__main__":
|
|
| 451 |
# Clear button
|
| 452 |
clear_btn = gr.Button("Clear Chat")
|
| 453 |
|
| 454 |
-
# Set up submit event chain
|
| 455 |
submit_event = textbox.submit(
|
| 456 |
fn=process_chat,
|
| 457 |
inputs=[textbox, chatbot],
|
| 458 |
-
outputs=chatbot
|
|
|
|
| 459 |
).then(
|
| 460 |
fn=lambda: {"text": "", "files": []},
|
| 461 |
inputs=None,
|
| 462 |
outputs=textbox
|
| 463 |
)
|
| 464 |
|
| 465 |
-
# Connect the submit button to the same functions
|
| 466 |
submit_btn.click(
|
| 467 |
fn=process_chat,
|
| 468 |
inputs=[textbox, chatbot],
|
| 469 |
-
outputs=chatbot
|
|
|
|
| 470 |
).then(
|
| 471 |
fn=lambda: {"text": "", "files": []},
|
| 472 |
inputs=None,
|
|
@@ -509,7 +538,6 @@ if __name__ == "__main__":
|
|
| 509 |
# Refresh status button and toggle API button
|
| 510 |
with gr.Row():
|
| 511 |
refresh_btn = gr.Button("Refresh Status")
|
| 512 |
-
toggle_api_btn = gr.Button("Toggle API (Local/Hyperbolic)")
|
| 513 |
|
| 514 |
# Set up refresh status button
|
| 515 |
refresh_btn.click(
|
|
@@ -518,13 +546,6 @@ if __name__ == "__main__":
|
|
| 518 |
outputs=status_text
|
| 519 |
)
|
| 520 |
|
| 521 |
-
# Set up toggle API button
|
| 522 |
-
toggle_api_btn.click(
|
| 523 |
-
fn=toggle_api,
|
| 524 |
-
inputs=None,
|
| 525 |
-
outputs=status_text
|
| 526 |
-
)
|
| 527 |
-
|
| 528 |
# Just load the initial status without auto-refresh
|
| 529 |
demo.load(
|
| 530 |
fn=get_tunnel_status_message,
|
|
@@ -532,5 +553,6 @@ if __name__ == "__main__":
|
|
| 532 |
outputs=status_text
|
| 533 |
)
|
| 534 |
|
| 535 |
-
# Launch the interface
|
|
|
|
| 536 |
demo.launch()
|
|
|
|
| 36 |
HYPERBOLIC_KEY = os.environ.get('HYPERBOLIC_XYZ_KEY')
|
| 37 |
FALLBACK_MODEL = 'Qwen/Qwen2.5-VL-72B-Instruct' # Fallback model at Hyperbolic
|
| 38 |
|
| 39 |
+
# Set the maximum number of concurrent API calls before queuing
|
| 40 |
+
MAX_CONCURRENT = int(os.environ.get('MAX_CONCURRENT', 3)) # Default to 3 concurrent calls
|
| 41 |
+
|
| 42 |
# API endpoints
|
| 43 |
VLLM_ENDPOINT = "http://localhost:" + str(LOCAL_PORT) + "/v1"
|
| 44 |
HYPERBOLIC_ENDPOINT = "https://api.hyperbolic.xyz/v1"
|
|
|
|
| 395 |
"""
|
| 396 |
Return a formatted status message for display in the UI.
|
| 397 |
"""
|
| 398 |
+
global tunnel_status, use_fallback, MAX_CONCURRENT
|
| 399 |
|
| 400 |
api_mode = "Hyperbolic API" if use_fallback else "Local vLLM API"
|
| 401 |
model = get_model_name()
|
|
|
|
| 403 |
status_color = "🟢" if (tunnel_status["is_running"] and not use_fallback) else "🔴"
|
| 404 |
status_text = tunnel_status["message"]
|
| 405 |
|
| 406 |
+
return f"{status_color} Tunnel Status: {status_text}\nCurrent API: {api_mode}\nCurrent Model: {model}\nConcurrent Requests: {MAX_CONCURRENT}"
|
| 407 |
|
| 408 |
def toggle_api():
|
| 409 |
"""
|
|
|
|
| 417 |
|
| 418 |
return f"Switched to {api_mode} using {model}"
|
| 419 |
|
| 420 |
+
def update_concurrency(new_value):
|
| 421 |
+
"""
|
| 422 |
+
Update the MAX_CONCURRENT value.
|
| 423 |
+
|
| 424 |
+
Args:
|
| 425 |
+
new_value (str): New concurrency value as string
|
| 426 |
+
|
| 427 |
+
Returns:
|
| 428 |
+
str: Status message
|
| 429 |
+
"""
|
| 430 |
+
global MAX_CONCURRENT
|
| 431 |
+
try:
|
| 432 |
+
value = int(new_value)
|
| 433 |
+
if value < 1:
|
| 434 |
+
return f"Error: Concurrency must be at least 1. Keeping current value: {MAX_CONCURRENT}"
|
| 435 |
+
|
| 436 |
+
MAX_CONCURRENT = value
|
| 437 |
+
# Note: This only updates the value for future event handlers
|
| 438 |
+
# Existing event handlers keep their original concurrency_limit
|
| 439 |
+
# A page refresh is needed for this to fully take effect
|
| 440 |
+
return f"Concurrency updated to {MAX_CONCURRENT}. You may need to refresh the page for all changes to take effect."
|
| 441 |
+
except ValueError:
|
| 442 |
+
return f"Error: Invalid number. Keeping current value: {MAX_CONCURRENT}"
|
| 443 |
+
|
| 444 |
# Start the SSH tunnel in a background thread
|
| 445 |
if __name__ == "__main__":
|
| 446 |
# Start the SSH tunnel
|
|
|
|
| 478 |
# Clear button
|
| 479 |
clear_btn = gr.Button("Clear Chat")
|
| 480 |
|
| 481 |
+
# Set up submit event chain with concurrency limit
|
| 482 |
submit_event = textbox.submit(
|
| 483 |
fn=process_chat,
|
| 484 |
inputs=[textbox, chatbot],
|
| 485 |
+
outputs=chatbot,
|
| 486 |
+
concurrency_limit=MAX_CONCURRENT # Set concurrency limit for this event
|
| 487 |
).then(
|
| 488 |
fn=lambda: {"text": "", "files": []},
|
| 489 |
inputs=None,
|
| 490 |
outputs=textbox
|
| 491 |
)
|
| 492 |
|
| 493 |
+
# Connect the submit button to the same functions with same concurrency limit
|
| 494 |
submit_btn.click(
|
| 495 |
fn=process_chat,
|
| 496 |
inputs=[textbox, chatbot],
|
| 497 |
+
outputs=chatbot,
|
| 498 |
+
concurrency_limit=MAX_CONCURRENT # Set concurrency limit for this event
|
| 499 |
).then(
|
| 500 |
fn=lambda: {"text": "", "files": []},
|
| 501 |
inputs=None,
|
|
|
|
| 538 |
# Refresh status button and toggle API button
|
| 539 |
with gr.Row():
|
| 540 |
refresh_btn = gr.Button("Refresh Status")
|
|
|
|
| 541 |
|
| 542 |
# Set up refresh status button
|
| 543 |
refresh_btn.click(
|
|
|
|
| 546 |
outputs=status_text
|
| 547 |
)
|
| 548 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
# Just load the initial status without auto-refresh
|
| 550 |
demo.load(
|
| 551 |
fn=get_tunnel_status_message,
|
|
|
|
| 553 |
outputs=status_text
|
| 554 |
)
|
| 555 |
|
| 556 |
+
# Launch the interface with the specified concurrency setting
|
| 557 |
+
demo.queue(default_concurrency_limit=MAX_CONCURRENT)
|
| 558 |
demo.launch()
|