Rajan Sharma commited on
Commit
3cc2974
·
verified ·
1 Parent(s): ae93cdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -45,15 +45,14 @@ from session_rag import SessionRAG
45
  from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
46
 
47
  # ---------- Config ----------
48
- # Local fallback model (lightweight by default). You can override via env.
49
- MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")
50
  HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
51
 
52
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
53
  USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
54
  COHERE_TIMEOUT_SEC = float(os.getenv("COHERE_TIMEOUT_SEC", "30"))
55
 
56
- MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512")) # faster defaults; adjust as needed
57
 
58
  # ---------- Helpers ----------
59
  def pick_dtype_and_map():
@@ -376,7 +375,7 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
376
 
377
  uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded)
378
 
379
- # Send / Enter handlers
380
  def _on_send(user_msg, history, tz, up_paths):
381
  if not user_msg or not user_msg.strip():
382
  return history, "", history
@@ -387,22 +386,22 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
387
  fn=_on_send,
388
  inputs=[msg, state_history, tz_box, state_uploaded],
389
  outputs=[chat, msg, state_history],
 
390
  queue=True,
391
  )
392
  msg.submit(
393
  fn=_on_send,
394
  inputs=[msg, state_history, tz_box, state_uploaded],
395
  outputs=[chat, msg, state_history],
 
396
  queue=True,
397
  )
398
 
399
  # Clear chat (keep uploads)
400
  clear.click(lambda: ([], "", []), None, [chat, msg, state_history])
401
 
402
- # Enable queue to avoid websocket timeouts on first call / heavy loads
403
- demo = demo.queue(concurrency_count=2, max_size=32)
404
-
405
  if __name__ == "__main__":
406
  port = int(os.environ.get("PORT", "7860"))
407
  demo.launch(server_name="0.0.0.0", server_port=port, show_api=False, max_threads=8)
408
 
 
 
45
  from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
46
 
47
  # ---------- Config ----------
48
+ MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct") # local fallback
 
49
  HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
50
 
51
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
52
  USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
53
  COHERE_TIMEOUT_SEC = float(os.getenv("COHERE_TIMEOUT_SEC", "30"))
54
 
55
+ MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
56
 
57
  # ---------- Helpers ----------
58
  def pick_dtype_and_map():
 
375
 
376
  uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded)
377
 
378
+ # Send / Enter handlers (set concurrency_limit here per Gradio deprecation notice)
379
  def _on_send(user_msg, history, tz, up_paths):
380
  if not user_msg or not user_msg.strip():
381
  return history, "", history
 
386
  fn=_on_send,
387
  inputs=[msg, state_history, tz_box, state_uploaded],
388
  outputs=[chat, msg, state_history],
389
+ concurrency_limit=2,
390
  queue=True,
391
  )
392
  msg.submit(
393
  fn=_on_send,
394
  inputs=[msg, state_history, tz_box, state_uploaded],
395
  outputs=[chat, msg, state_history],
396
+ concurrency_limit=2,
397
  queue=True,
398
  )
399
 
400
  # Clear chat (keep uploads)
401
  clear.click(lambda: ([], "", []), None, [chat, msg, state_history])
402
 
 
 
 
403
  if __name__ == "__main__":
404
  port = int(os.environ.get("PORT", "7860"))
405
  demo.launch(server_name="0.0.0.0", server_port=port, show_api=False, max_threads=8)
406
 
407
+