File size: 970 Bytes
25a6622
724aa4e
4b637a7
 
3609661
 
724aa4e
3609661
724aa4e
 
 
 
 
 
4b637a7
724aa4e
4b637a7
3609661
 
724aa4e
4b637a7
25a6622
4b637a7
 
 
724aa4e
 
25a6622
3609661
4b637a7
 
 
 
3609661
4b637a7
25a6622
3609661
25a6622
724aa4e
 
 
 
25a6622
 
4b637a7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# Nome do arquivo GGUF que realmente existe
FILENAME = "DeepHat-V1-7B-Q4_K.gguf"

# Baixar automaticamente do Hugging Face
model_path = hf_hub_download(
    repo_id="mradermacher/DeepHat-V1-7B-GGUF",
    filename=FILENAME,
    local_dir=".",
)

model = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=4,
    n_gpu_layers=0,
    verbose=False,
)

def respond(message, history):
    prompt = ""
    for user, assistant in history:
        prompt += f"<|user|>{user}\n<|assistant|>{assistant}\n"
    prompt += f"<|user|>{message}\n<|assistant|>"

    result = model(
        prompt,
        max_tokens=512,
        temperature=0.7,
        top_p=0.95,
        stop=["<|user|>"],
    )

    return result["choices"][0]["text"].strip()

demo = gr.ChatInterface(
    respond,
    title="DeepHat 7B - CPU GGUF Chatbot",
)

if __name__ == "__main__":
    demo.launch()