Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import json | |
| import time | |
| API_URL = "https://bonsai.locoremind.com/v1/chat/completions" | |
| SYSTEM_PROMPT = "You are Xylaria made by SK Mahammad Saad Amin. Be concise and helpful. You should attempt to figure out what lnagauge the user uses for maximum compatibility." | |
| def respond(message, history): | |
| messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| for user_msg, bot_msg in history: | |
| if user_msg: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if bot_msg: | |
| messages.append({"role": "assistant", "content": bot_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| try: | |
| response = requests.post( | |
| API_URL, | |
| headers={"Content-Type": "application/json"}, | |
| json={ | |
| "messages": messages, | |
| "max_tokens": 96000, | |
| "stream": True, | |
| "repetition_penalty": 2, | |
| }, | |
| stream=True, | |
| timeout=600, | |
| ) | |
| response.raise_for_status() | |
| partial = "" | |
| token_count = 0 | |
| start_time = None | |
| for line in response.iter_lines(): | |
| if not line: | |
| continue | |
| line = line.decode("utf-8") | |
| if line.startswith("data: "): | |
| data = line[6:] | |
| if data.strip() == "[DONE]": | |
| break | |
| try: | |
| chunk = json.loads(data) | |
| delta = chunk["choices"][0].get("delta", {}) | |
| content = delta.get("content", "") | |
| if content: | |
| if start_time is None: | |
| start_time = time.time() | |
| partial += content | |
| token_count += 1 | |
| elapsed = time.time() - start_time | |
| tps = token_count / elapsed if elapsed > 0 else 0 | |
| yield partial + f"\n\n`⚡ {tps:.1f} tok/s`" | |
| except json.JSONDecodeError: | |
| continue | |
| if not partial: | |
| result = response.json() | |
| yield result["choices"][0]["message"]["content"] | |
| except requests.exceptions.ConnectionError: | |
| yield "The model backend is currently offline. Please try again later." | |
| except requests.exceptions.Timeout: | |
| yield "Request timed out. The model may be busy — please try again." | |
| except Exception as e: | |
| yield f"Error: {str(e)}" | |
| demo = gr.ChatInterface( | |
| fn=respond, | |
| description="**Xylaria**", | |
| examples=[ | |
| "Explain quantum computing in simple terms.", | |
| "Write a Python function to find prime numbers.", | |
| "What are the benefits of 1-bit quantization?", | |
| ], | |
| theme=gr.themes.Soft(), | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |