import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import traceback

model_path = hf_hub_download(
    repo_id="AIencoder/Axon26-Coder-Q8_0-GGUF",
    filename="axon26-coder-q8_0.gguf"
)

llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=2,
    chat_format="chatml",
    verbose=False
)

def chat(message, history):
    try:
        messages = [{"role": "system", "content": "You are a helpful assistant good at coding and general knowledge."}]

        for item in history:

            if isinstance(item, dict) and "role" in item and "content" in item:
                messages.append({"role": item["role"], "content": str(item["content"])})

            elif hasattr(item, "role") and hasattr(item, "content"):
                messages.append({"role": item.role, "content": str(item.content)})

            elif isinstance(item, (list, tuple)) and len(item) >= 2:
                user_msg = item[0]
                assistant_msg = item[1]
                if user_msg:
                    messages.append({"role": "user", "content": str(user_msg)})
                if assistant_msg:
                    messages.append({"role": "assistant", "content": str(assistant_msg)})

        messages.append({"role": "user", "content": str(message)})

        response = llm.create_chat_completion(
            messages=messages,
            max_tokens=512,
            temperature=0.7,
            stream=True
        )

        text = ""
        for chunk in response:
            delta = chunk["choices"][0]["delta"]
            content = delta.get("content", "")
            if content:
                text += content
                yield text

    except Exception as e:
        error_msg = traceback.format_exc()
        yield f"**CRASH REPORT:**\n```python\n{error_msg}\n```"

demo = gr.ChatInterface(
    fn=chat,
    title="T.C.S AI Bot"
)

demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)