Spaces:
Runtime error
Runtime error
File size: 970 Bytes
25a6622 724aa4e 4b637a7 3609661 724aa4e 3609661 724aa4e 4b637a7 724aa4e 4b637a7 3609661 724aa4e 4b637a7 25a6622 4b637a7 724aa4e 25a6622 3609661 4b637a7 3609661 4b637a7 25a6622 3609661 25a6622 724aa4e 25a6622 4b637a7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# Nome do arquivo GGUF que realmente existe
FILENAME = "DeepHat-V1-7B-Q4_K.gguf"
# Baixar automaticamente do Hugging Face
model_path = hf_hub_download(
repo_id="mradermacher/DeepHat-V1-7B-GGUF",
filename=FILENAME,
local_dir=".",
)
model = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=4,
n_gpu_layers=0,
verbose=False,
)
def respond(message, history):
prompt = ""
for user, assistant in history:
prompt += f"<|user|>{user}\n<|assistant|>{assistant}\n"
prompt += f"<|user|>{message}\n<|assistant|>"
result = model(
prompt,
max_tokens=512,
temperature=0.7,
top_p=0.95,
stop=["<|user|>"],
)
return result["choices"][0]["text"].strip()
demo = gr.ChatInterface(
respond,
title="DeepHat 7B - CPU GGUF Chatbot",
)
if __name__ == "__main__":
demo.launch()
|