Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -45,15 +45,14 @@ from session_rag import SessionRAG
|
|
| 45 |
from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
|
| 46 |
|
| 47 |
# ---------- Config ----------
|
| 48 |
-
|
| 49 |
-
MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")
|
| 50 |
HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
|
| 51 |
|
| 52 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
| 53 |
USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
|
| 54 |
COHERE_TIMEOUT_SEC = float(os.getenv("COHERE_TIMEOUT_SEC", "30"))
|
| 55 |
|
| 56 |
-
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
|
| 57 |
|
| 58 |
# ---------- Helpers ----------
|
| 59 |
def pick_dtype_and_map():
|
|
@@ -376,7 +375,7 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
|
|
| 376 |
|
| 377 |
uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded)
|
| 378 |
|
| 379 |
-
# Send / Enter handlers
|
| 380 |
def _on_send(user_msg, history, tz, up_paths):
|
| 381 |
if not user_msg or not user_msg.strip():
|
| 382 |
return history, "", history
|
|
@@ -387,22 +386,22 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
|
|
| 387 |
fn=_on_send,
|
| 388 |
inputs=[msg, state_history, tz_box, state_uploaded],
|
| 389 |
outputs=[chat, msg, state_history],
|
|
|
|
| 390 |
queue=True,
|
| 391 |
)
|
| 392 |
msg.submit(
|
| 393 |
fn=_on_send,
|
| 394 |
inputs=[msg, state_history, tz_box, state_uploaded],
|
| 395 |
outputs=[chat, msg, state_history],
|
|
|
|
| 396 |
queue=True,
|
| 397 |
)
|
| 398 |
|
| 399 |
# Clear chat (keep uploads)
|
| 400 |
clear.click(lambda: ([], "", []), None, [chat, msg, state_history])
|
| 401 |
|
| 402 |
-
# Enable queue to avoid websocket timeouts on first call / heavy loads
|
| 403 |
-
demo = demo.queue(concurrency_count=2, max_size=32)
|
| 404 |
-
|
| 405 |
if __name__ == "__main__":
|
| 406 |
port = int(os.environ.get("PORT", "7860"))
|
| 407 |
demo.launch(server_name="0.0.0.0", server_port=port, show_api=False, max_threads=8)
|
| 408 |
|
|
|
|
|
|
| 45 |
from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
|
| 46 |
|
| 47 |
# ---------- Config ----------
|
| 48 |
+
MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct") # local fallback
|
|
|
|
| 49 |
HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
|
| 50 |
|
| 51 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
| 52 |
USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
|
| 53 |
COHERE_TIMEOUT_SEC = float(os.getenv("COHERE_TIMEOUT_SEC", "30"))
|
| 54 |
|
| 55 |
+
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
|
| 56 |
|
| 57 |
# ---------- Helpers ----------
|
| 58 |
def pick_dtype_and_map():
|
|
|
|
| 375 |
|
| 376 |
uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded)
|
| 377 |
|
| 378 |
+
# Send / Enter handlers (set concurrency_limit here per Gradio deprecation notice)
|
| 379 |
def _on_send(user_msg, history, tz, up_paths):
|
| 380 |
if not user_msg or not user_msg.strip():
|
| 381 |
return history, "", history
|
|
|
|
| 386 |
fn=_on_send,
|
| 387 |
inputs=[msg, state_history, tz_box, state_uploaded],
|
| 388 |
outputs=[chat, msg, state_history],
|
| 389 |
+
concurrency_limit=2,
|
| 390 |
queue=True,
|
| 391 |
)
|
| 392 |
msg.submit(
|
| 393 |
fn=_on_send,
|
| 394 |
inputs=[msg, state_history, tz_box, state_uploaded],
|
| 395 |
outputs=[chat, msg, state_history],
|
| 396 |
+
concurrency_limit=2,
|
| 397 |
queue=True,
|
| 398 |
)
|
| 399 |
|
| 400 |
# Clear chat (keep uploads)
|
| 401 |
clear.click(lambda: ([], "", []), None, [chat, msg, state_history])
|
| 402 |
|
|
|
|
|
|
|
|
|
|
| 403 |
if __name__ == "__main__":
|
| 404 |
port = int(os.environ.get("PORT", "7860"))
|
| 405 |
demo.launch(server_name="0.0.0.0", server_port=port, show_api=False, max_threads=8)
|
| 406 |
|
| 407 |
+
|