import gradio as gr import requests import json import time API_URL = "https://bonsai.locoremind.com/v1/chat/completions" SYSTEM_PROMPT = "You are Xylaria made by SK Mahammad Saad Amin. Be concise and helpful. You should attempt to figure out what lnagauge the user uses for maximum compatibility." def respond(message, history): messages = [{"role": "system", "content": SYSTEM_PROMPT}] for user_msg, bot_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if bot_msg: messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": message}) try: response = requests.post( API_URL, headers={"Content-Type": "application/json"}, json={ "messages": messages, "max_tokens": 96000, "stream": True, "repetition_penalty": 2, }, stream=True, timeout=600, ) response.raise_for_status() partial = "" token_count = 0 start_time = None for line in response.iter_lines(): if not line: continue line = line.decode("utf-8") if line.startswith("data: "): data = line[6:] if data.strip() == "[DONE]": break try: chunk = json.loads(data) delta = chunk["choices"][0].get("delta", {}) content = delta.get("content", "") if content: if start_time is None: start_time = time.time() partial += content token_count += 1 elapsed = time.time() - start_time tps = token_count / elapsed if elapsed > 0 else 0 yield partial + f"\n\n`⚡ {tps:.1f} tok/s`" except json.JSONDecodeError: continue if not partial: result = response.json() yield result["choices"][0]["message"]["content"] except requests.exceptions.ConnectionError: yield "The model backend is currently offline. Please try again later." except requests.exceptions.Timeout: yield "Request timed out. The model may be busy — please try again." except Exception as e: yield f"Error: {str(e)}" demo = gr.ChatInterface( fn=respond, description="**Xylaria**", examples=[ "Explain quantum computing in simple terms.", "Write a Python function to find prime numbers.", "What are the benefits of 1-bit quantization?", ], theme=gr.themes.Soft(), ) if __name__ == "__main__": demo.launch()