wqdad / app.py
Reality123b's picture
Update app.py
f72ee94 verified
import gradio as gr
import requests
import json
import time
API_URL = "https://bonsai.locoremind.com/v1/chat/completions"
SYSTEM_PROMPT = "You are Xylaria made by SK Mahammad Saad Amin. Be concise and helpful. You should attempt to figure out what lnagauge the user uses for maximum compatibility."
def respond(message, history):
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for user_msg, bot_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
try:
response = requests.post(
API_URL,
headers={"Content-Type": "application/json"},
json={
"messages": messages,
"max_tokens": 96000,
"stream": True,
"repetition_penalty": 2,
},
stream=True,
timeout=600,
)
response.raise_for_status()
partial = ""
token_count = 0
start_time = None
for line in response.iter_lines():
if not line:
continue
line = line.decode("utf-8")
if line.startswith("data: "):
data = line[6:]
if data.strip() == "[DONE]":
break
try:
chunk = json.loads(data)
delta = chunk["choices"][0].get("delta", {})
content = delta.get("content", "")
if content:
if start_time is None:
start_time = time.time()
partial += content
token_count += 1
elapsed = time.time() - start_time
tps = token_count / elapsed if elapsed > 0 else 0
yield partial + f"\n\n`⚡ {tps:.1f} tok/s`"
except json.JSONDecodeError:
continue
if not partial:
result = response.json()
yield result["choices"][0]["message"]["content"]
except requests.exceptions.ConnectionError:
yield "The model backend is currently offline. Please try again later."
except requests.exceptions.Timeout:
yield "Request timed out. The model may be busy — please try again."
except Exception as e:
yield f"Error: {str(e)}"
demo = gr.ChatInterface(
fn=respond,
description="**Xylaria**",
examples=[
"Explain quantum computing in simple terms.",
"Write a Python function to find prime numbers.",
"What are the benefits of 1-bit quantization?",
],
theme=gr.themes.Soft(),
)
if __name__ == "__main__":
demo.launch()