import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# Nome do arquivo GGUF que realmente existe
FILENAME = "DeepHat-V1-7B-Q4_K.gguf"

# Baixar automaticamente do Hugging Face
model_path = hf_hub_download(
    repo_id="mradermacher/DeepHat-V1-7B-GGUF",
    filename=FILENAME,
    local_dir=".",
)

model = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=4,
    n_gpu_layers=0,
    verbose=False,
)

def respond(message, history):
    prompt = ""
    for user, assistant in history:
        prompt += f"<|user|>{user}\n<|assistant|>{assistant}\n"
    prompt += f"<|user|>{message}\n<|assistant|>"

    result = model(
        prompt,
        max_tokens=512,
        temperature=0.7,
        top_p=0.95,
        stop=["<|user|>"],
    )

    return result["choices"][0]["text"].strip()

demo = gr.ChatInterface(
    respond,
    title="DeepHat 7B - CPU GGUF Chatbot",
)

if __name__ == "__main__":
    demo.launch()