Ryan-PC commited on
Commit
724aa4e
verified
1 Parent(s): b9c6a12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -11
app.py CHANGED
@@ -1,34 +1,49 @@
1
  import gradio as gr
 
2
  from llama_cpp import Llama
3
 
4
- # carregue o GGUF local (use o nome correto no Space)
 
 
 
 
 
 
 
 
 
 
5
  model = Llama(
6
- model_path="DeepHat-V1-7B-Q4_K.gguf",
7
  n_ctx=4096,
8
- n_threads=4, # Ajuste conforme CPU do Space
9
- n_gpu_layers=0, # Spaces CPU gratuito n茫o tem GPU
 
10
  )
11
 
12
  def respond(message, history):
 
13
  prompt = ""
14
-
15
- # Constr贸i prompt estilo chat
16
  for user, assistant in history:
17
- prompt += f"<|user|>{user}<|assistant|>{assistant}"
18
- prompt += f"<|user|>{message}<|assistant|>"
19
 
 
20
  output = model(
21
  prompt,
22
  max_tokens=512,
23
  temperature=0.7,
24
  top_p=0.95,
25
- stop=["<|user|>"]
26
  )
27
 
28
  text = output["choices"][0]["text"]
29
- return text
30
 
31
- demo = gr.ChatInterface(fn=respond)
 
 
 
32
 
33
  if __name__ == "__main__":
34
  demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
 
5
+ # Nome do arquivo GGUF recomendado
6
+ FILENAME = "DeepHat-V1-7B-Q4_K_M.gguf"
7
+
8
+ # Baixa automaticamente o GGUF do Hugging Face
9
+ model_path = hf_hub_download(
10
+ repo_id="mradermacher/DeepHat-V1-7B-GGUF",
11
+ filename=FILENAME,
12
+ local_dir=".",
13
+ )
14
+
15
+ # Carrega o modelo com par芒metros ideais para CPU fraca do Spaces
16
  model = Llama(
17
+ model_path=model_path,
18
  n_ctx=4096,
19
+ n_threads=4, # Pode ajustar para 2 ou 3 se ficar lento
20
+ n_gpu_layers=0, # Space gr谩tis N脙O tem GPU
21
+ verbose=False,
22
  )
23
 
24
  def respond(message, history):
25
+ # Constr贸i um prompt estilo chat simples
26
  prompt = ""
 
 
27
  for user, assistant in history:
28
+ prompt += f"<|user|>{user}\n<|assistant|>{assistant}\n"
29
+ prompt += f"<|user|>{message}\n<|assistant|>"
30
 
31
+ # Gera a resposta
32
  output = model(
33
  prompt,
34
  max_tokens=512,
35
  temperature=0.7,
36
  top_p=0.95,
37
+ stop=["<|user|>"], # evita que o modelo engate outra pergunta
38
  )
39
 
40
  text = output["choices"][0]["text"]
41
+ return text.strip()
42
 
43
+ demo = gr.ChatInterface(
44
+ respond,
45
+ title="DeepHat 7B - CPU GGUF Chatbot",
46
+ )
47
 
48
  if __name__ == "__main__":
49
  demo.launch()