Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import traceback | |
| model_path = hf_hub_download( | |
| repo_id="AIencoder/Axon26-Coder-Q8_0-GGUF", | |
| filename="axon26-coder-q8_0.gguf" | |
| ) | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, | |
| n_threads=2, | |
| chat_format="chatml", | |
| verbose=False | |
| ) | |
| def chat(message, history): | |
| try: | |
| messages = [{"role": "system", "content": "You are a helpful assistant good at coding and general knowledge."}] | |
| for item in history: | |
| if isinstance(item, dict) and "role" in item and "content" in item: | |
| messages.append({"role": item["role"], "content": str(item["content"])}) | |
| elif hasattr(item, "role") and hasattr(item, "content"): | |
| messages.append({"role": item.role, "content": str(item.content)}) | |
| elif isinstance(item, (list, tuple)) and len(item) >= 2: | |
| user_msg = item[0] | |
| assistant_msg = item[1] | |
| if user_msg: | |
| messages.append({"role": "user", "content": str(user_msg)}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": str(assistant_msg)}) | |
| messages.append({"role": "user", "content": str(message)}) | |
| response = llm.create_chat_completion( | |
| messages=messages, | |
| max_tokens=512, | |
| temperature=0.7, | |
| stream=True | |
| ) | |
| text = "" | |
| for chunk in response: | |
| delta = chunk["choices"][0]["delta"] | |
| content = delta.get("content", "") | |
| if content: | |
| text += content | |
| yield text | |
| except Exception as e: | |
| error_msg = traceback.format_exc() | |
| yield f"**CRASH REPORT:**\n```python\n{error_msg}\n```" | |
| demo = gr.ChatInterface( | |
| fn=chat, | |
| title="T.C.S AI Bot" | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False) |