import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download import traceback model_path = hf_hub_download( repo_id="AIencoder/Axon26-Coder-Q8_0-GGUF", filename="axon26-coder-q8_0.gguf" ) llm = Llama( model_path=model_path, n_ctx=4096, n_threads=2, chat_format="chatml", verbose=False ) def chat(message, history): try: messages = [{"role": "system", "content": "You are a helpful assistant good at coding and general knowledge."}] for item in history: if isinstance(item, dict) and "role" in item and "content" in item: messages.append({"role": item["role"], "content": str(item["content"])}) elif hasattr(item, "role") and hasattr(item, "content"): messages.append({"role": item.role, "content": str(item.content)}) elif isinstance(item, (list, tuple)) and len(item) >= 2: user_msg = item[0] assistant_msg = item[1] if user_msg: messages.append({"role": "user", "content": str(user_msg)}) if assistant_msg: messages.append({"role": "assistant", "content": str(assistant_msg)}) messages.append({"role": "user", "content": str(message)}) response = llm.create_chat_completion( messages=messages, max_tokens=512, temperature=0.7, stream=True ) text = "" for chunk in response: delta = chunk["choices"][0]["delta"] content = delta.get("content", "") if content: text += content yield text except Exception as e: error_msg = traceback.format_exc() yield f"**CRASH REPORT:**\n```python\n{error_msg}\n```" demo = gr.ChatInterface( fn=chat, title="T.C.S AI Bot" ) demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)