Spaces:

Ryan-PC
/

Deephatgguf

Runtime error

Ryan-PC commited on Nov 23, 2025

Commit

724aa4e

verified ·

1 Parent(s): b9c6a12

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,34 +1,49 @@
 import gradio as gr
 from llama_cpp import Llama
-# carregue o GGUF local (use o nome correto no Space)
 model = Llama(
-    model_path="DeepHat-V1-7B-Q4_K.gguf",
     n_ctx=4096,
-    n_threads=4,    # Ajuste conforme CPU do Space
-    n_gpu_layers=0, # Spaces CPU gratuito não tem GPU
 )
 def respond(message, history):
     prompt = ""
-    # Constrói prompt estilo chat
     for user, assistant in history:
-        prompt += f"<|user|>{user}<|assistant|>{assistant}"
-    prompt += f"<|user|>{message}<|assistant|>"
     output = model(
         prompt,
         max_tokens=512,
         temperature=0.7,
         top_p=0.95,
-        stop=["<|user|>"]
     )
     text = output["choices"][0]["text"]
-    return text
-demo = gr.ChatInterface(fn=respond)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# Nome do arquivo GGUF recomendado
+FILENAME = "DeepHat-V1-7B-Q4_K_M.gguf"
+# Baixa automaticamente o GGUF do Hugging Face
+model_path = hf_hub_download(
+    repo_id="mradermacher/DeepHat-V1-7B-GGUF",
+    filename=FILENAME,
+    local_dir=".",
+)
+# Carrega o modelo com parâmetros ideais para CPU fraca do Spaces
 model = Llama(
+    model_path=model_path,
     n_ctx=4096,
+    n_threads=4,     # Pode ajustar para 2 ou 3 se ficar lento
+    n_gpu_layers=0,  # Space grátis NÃO tem GPU
+    verbose=False,
 )
 def respond(message, history):
+    # Constrói um prompt estilo chat simples
     prompt = ""
     for user, assistant in history:
+        prompt += f"<|user|>{user}\n<|assistant|>{assistant}\n"
+    prompt += f"<|user|>{message}\n<|assistant|>"
+    # Gera a resposta
     output = model(
         prompt,
         max_tokens=512,
         temperature=0.7,
         top_p=0.95,
+        stop=["<|user|>"],   # evita que o modelo engate outra pergunta
     )
     text = output["choices"][0]["text"]
+    return text.strip()
+demo = gr.ChatInterface(
+    respond,
+    title="DeepHat 7B - CPU GGUF Chatbot",
+)
 if __name__ == "__main__":
     demo.launch()