import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama # Nome do arquivo GGUF que realmente existe FILENAME = "DeepHat-V1-7B-Q4_K.gguf" # Baixar automaticamente do Hugging Face model_path = hf_hub_download( repo_id="mradermacher/DeepHat-V1-7B-GGUF", filename=FILENAME, local_dir=".", ) model = Llama( model_path=model_path, n_ctx=4096, n_threads=4, n_gpu_layers=0, verbose=False, ) def respond(message, history): prompt = "" for user, assistant in history: prompt += f"<|user|>{user}\n<|assistant|>{assistant}\n" prompt += f"<|user|>{message}\n<|assistant|>" result = model( prompt, max_tokens=512, temperature=0.7, top_p=0.95, stop=["<|user|>"], ) return result["choices"][0]["text"].strip() demo = gr.ChatInterface( respond, title="DeepHat 7B - CPU GGUF Chatbot", ) if __name__ == "__main__": demo.launch()