import gradio as gr
from llama_cpp import Llama
import os

# Path is now local because we downloaded it in Dockerfile
model_path = "/app/coding-agent-qwen-sft-v3-GGUF.q4_k_m.gguf"

print(f"Checking for model at {model_path}...")
if not os.path.exists(model_path):
    print("MODEL NOT FOUND!")

llm = Llama(model_path=model_path, n_ctx=2048)

def generate(prompt):
    output = llm(
        f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
        max_tokens=1024,
        stop=["<|im_end|>"],
        repeat_penalty=1.2,
        temperature=0.4
    )
    return output['choices'][0]['text']

gr.ChatInterface(generate).launch(server_name="0.0.0.0", server_port=7860)