Rajan Sharma commited on
Commit
10cd369
·
verified ·
1 Parent(s): 3e53e6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -58,7 +58,8 @@ HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
58
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
59
  USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
60
 
61
- MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
 
62
 
63
  # ---------- System Master (two-phase, LLM-only behavior) ----------
64
  SYSTEM_MASTER = """
@@ -151,7 +152,7 @@ def cohere_chat(message, history):
151
  model="command-r7b-12-2024",
152
  message=prompt,
153
  temperature=0.3,
154
- max_tokens=MAX_NEW_TOKENS,
155
  )
156
  if hasattr(resp, "text") and resp.text: return resp.text.strip()
157
  if hasattr(resp, "reply") and resp.reply: return resp.reply.strip()
@@ -203,7 +204,7 @@ def local_generate(model, tokenizer, input_ids, max_new_tokens=MAX_NEW_TOKENS):
203
  input_ids = input_ids.to(model.device)
204
  with torch.no_grad():
205
  out = model.generate(
206
- input_ids=input_ids, max_new_tokens=max_new_tokens,
207
  do_sample=True, temperature=0.3, top_p=0.9,
208
  repetition_penalty=1.15,
209
  pad_token_id=tokenizer.eos_token_id,
@@ -419,9 +420,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
419
  gr.Markdown("# ClarityOps Augmented Decision AI")
420
 
421
  with gr.Column(elem_id="chat-container"):
422
- # Taller chat: use viewport math so UI fills the screen
423
  chat = gr.Chatbot(label="", show_label=False, height="62vh")
424
- # Overlay rendered as a sibling inside the same container so absolute positioning covers chat
425
  handshake = gr.HTML(
426
  value=(
427
  '<div id="handshake-overlay">'
@@ -504,3 +503,4 @@ if __name__ == "__main__":
504
 
505
 
506
 
 
 
58
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
59
  USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
60
 
61
+ # BIGGER OUTPUT LIMIT
62
+ MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
63
 
64
  # ---------- System Master (two-phase, LLM-only behavior) ----------
65
  SYSTEM_MASTER = """
 
152
  model="command-r7b-12-2024",
153
  message=prompt,
154
  temperature=0.3,
155
+ max_tokens=MAX_NEW_TOKENS, # uses 2048
156
  )
157
  if hasattr(resp, "text") and resp.text: return resp.text.strip()
158
  if hasattr(resp, "reply") and resp.reply: return resp.reply.strip()
 
204
  input_ids = input_ids.to(model.device)
205
  with torch.no_grad():
206
  out = model.generate(
207
+ input_ids=input_ids, max_new_tokens=max_new_tokens, # uses 2048
208
  do_sample=True, temperature=0.3, top_p=0.9,
209
  repetition_penalty=1.15,
210
  pad_token_id=tokenizer.eos_token_id,
 
420
  gr.Markdown("# ClarityOps Augmented Decision AI")
421
 
422
  with gr.Column(elem_id="chat-container"):
 
423
  chat = gr.Chatbot(label="", show_label=False, height="62vh")
 
424
  handshake = gr.HTML(
425
  value=(
426
  '<div id="handshake-overlay">'
 
503
 
504
 
505
 
506
+