Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -58,7 +58,8 @@ HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
|
|
| 58 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
| 59 |
USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
|
| 60 |
|
| 61 |
-
|
|
|
|
| 62 |
|
| 63 |
# ---------- System Master (two-phase, LLM-only behavior) ----------
|
| 64 |
SYSTEM_MASTER = """
|
|
@@ -151,7 +152,7 @@ def cohere_chat(message, history):
|
|
| 151 |
model="command-r7b-12-2024",
|
| 152 |
message=prompt,
|
| 153 |
temperature=0.3,
|
| 154 |
-
max_tokens=MAX_NEW_TOKENS,
|
| 155 |
)
|
| 156 |
if hasattr(resp, "text") and resp.text: return resp.text.strip()
|
| 157 |
if hasattr(resp, "reply") and resp.reply: return resp.reply.strip()
|
|
@@ -203,7 +204,7 @@ def local_generate(model, tokenizer, input_ids, max_new_tokens=MAX_NEW_TOKENS):
|
|
| 203 |
input_ids = input_ids.to(model.device)
|
| 204 |
with torch.no_grad():
|
| 205 |
out = model.generate(
|
| 206 |
-
input_ids=input_ids, max_new_tokens=max_new_tokens,
|
| 207 |
do_sample=True, temperature=0.3, top_p=0.9,
|
| 208 |
repetition_penalty=1.15,
|
| 209 |
pad_token_id=tokenizer.eos_token_id,
|
|
@@ -419,9 +420,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
|
|
| 419 |
gr.Markdown("# ClarityOps Augmented Decision AI")
|
| 420 |
|
| 421 |
with gr.Column(elem_id="chat-container"):
|
| 422 |
-
# Taller chat: use viewport math so UI fills the screen
|
| 423 |
chat = gr.Chatbot(label="", show_label=False, height="62vh")
|
| 424 |
-
# Overlay rendered as a sibling inside the same container so absolute positioning covers chat
|
| 425 |
handshake = gr.HTML(
|
| 426 |
value=(
|
| 427 |
'<div id="handshake-overlay">'
|
|
@@ -504,3 +503,4 @@ if __name__ == "__main__":
|
|
| 504 |
|
| 505 |
|
| 506 |
|
|
|
|
|
|
| 58 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
| 59 |
USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
|
| 60 |
|
| 61 |
+
# BIGGER OUTPUT LIMIT
|
| 62 |
+
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
|
| 63 |
|
| 64 |
# ---------- System Master (two-phase, LLM-only behavior) ----------
|
| 65 |
SYSTEM_MASTER = """
|
|
|
|
| 152 |
model="command-r7b-12-2024",
|
| 153 |
message=prompt,
|
| 154 |
temperature=0.3,
|
| 155 |
+
max_tokens=MAX_NEW_TOKENS, # uses 2048
|
| 156 |
)
|
| 157 |
if hasattr(resp, "text") and resp.text: return resp.text.strip()
|
| 158 |
if hasattr(resp, "reply") and resp.reply: return resp.reply.strip()
|
|
|
|
| 204 |
input_ids = input_ids.to(model.device)
|
| 205 |
with torch.no_grad():
|
| 206 |
out = model.generate(
|
| 207 |
+
input_ids=input_ids, max_new_tokens=max_new_tokens, # uses 2048
|
| 208 |
do_sample=True, temperature=0.3, top_p=0.9,
|
| 209 |
repetition_penalty=1.15,
|
| 210 |
pad_token_id=tokenizer.eos_token_id,
|
|
|
|
| 420 |
gr.Markdown("# ClarityOps Augmented Decision AI")
|
| 421 |
|
| 422 |
with gr.Column(elem_id="chat-container"):
|
|
|
|
| 423 |
chat = gr.Chatbot(label="", show_label=False, height="62vh")
|
|
|
|
| 424 |
handshake = gr.HTML(
|
| 425 |
value=(
|
| 426 |
'<div id="handshake-overlay">'
|
|
|
|
| 503 |
|
| 504 |
|
| 505 |
|
| 506 |
+
|