Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 12

Commit

10cd369

verified ·

1 Parent(s): 3e53e6c

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -58,7 +58,8 @@ HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
-MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
 # ---------- System Master (two-phase, LLM-only behavior) ----------
 SYSTEM_MASTER = """
@@ -151,7 +152,7 @@ def cohere_chat(message, history):
             model="command-r7b-12-2024",
             message=prompt,
             temperature=0.3,
-            max_tokens=MAX_NEW_TOKENS,
         )
         if hasattr(resp, "text") and resp.text: return resp.text.strip()
         if hasattr(resp, "reply") and resp.reply: return resp.reply.strip()
@@ -203,7 +204,7 @@ def local_generate(model, tokenizer, input_ids, max_new_tokens=MAX_NEW_TOKENS):
     input_ids = input_ids.to(model.device)
     with torch.no_grad():
         out = model.generate(
-            input_ids=input_ids, max_new_tokens=max_new_tokens,
             do_sample=True, temperature=0.3, top_p=0.9,
             repetition_penalty=1.15,
             pad_token_id=tokenizer.eos_token_id,
@@ -419,9 +420,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     gr.Markdown("# ClarityOps Augmented Decision AI")
     with gr.Column(elem_id="chat-container"):
-        # Taller chat: use viewport math so UI fills the screen
         chat = gr.Chatbot(label="", show_label=False, height="62vh")
-        # Overlay rendered as a sibling inside the same container so absolute positioning covers chat
         handshake = gr.HTML(
             value=(
                 '<div id="handshake-overlay">'
@@ -504,3 +503,4 @@ if __name__ == "__main__":

 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
+# BIGGER OUTPUT LIMIT
+MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
 # ---------- System Master (two-phase, LLM-only behavior) ----------
 SYSTEM_MASTER = """
             model="command-r7b-12-2024",
             message=prompt,
             temperature=0.3,
+            max_tokens=MAX_NEW_TOKENS,  # uses 2048
         )
         if hasattr(resp, "text") and resp.text: return resp.text.strip()
         if hasattr(resp, "reply") and resp.reply: return resp.reply.strip()
     input_ids = input_ids.to(model.device)
     with torch.no_grad():
         out = model.generate(
+            input_ids=input_ids, max_new_tokens=max_new_tokens,  # uses 2048
             do_sample=True, temperature=0.3, top_p=0.9,
             repetition_penalty=1.15,
             pad_token_id=tokenizer.eos_token_id,
     gr.Markdown("# ClarityOps Augmented Decision AI")
     with gr.Column(elem_id="chat-container"):
         chat = gr.Chatbot(label="", show_label=False, height="62vh")
         handshake = gr.HTML(
             value=(
                 '<div id="handshake-overlay">'