Spaces:

Ryan-PC
/

Deephat03

Runtime error

Ryan-PC commited on Nov 25, 2025

Commit

9397cee

verified ·

1 Parent(s): b216acf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,22 +1,38 @@
 import gradio as gr
 from llama_cpp import Llama
 import os
-# Baixe o GGUF e suba pro Space (via Files > Upload files)
-MODEL_PATH = "DeepHat-V1-7B.Q4_K_M.gguf"  # Coloque o arquivo aqui
-# Carregue o modelo (ajuste n_ctx pra contexto, n_threads pra CPU cores)
-llm = Llama(
-    model_path=MODEL_PATH,
-    n_ctx=2048,  # Contexto pra prompts longos (ex.: tutoriais hacking)
-    n_threads=4,  # Use mais se sua máquina tiver
-    verbose=False
-)
 def generate_response(prompt, max_tokens=500):
-    # Prompt template pro DeepHat (ajuste se precisar)
     full_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
     output = llm(
         full_prompt,
         max_tokens=max_tokens,
@@ -26,7 +42,7 @@ def generate_response(prompt, max_tokens=500):
     )
     return output['choices'][0]['text'].strip()
-# Interface Gradio simples pra chat
 with gr.Blocks(title="DeepHat Uncensored Chat") as demo:
     gr.Markdown("# DeepHat - IA Uncensored pra Cibersegurança & Hacking Ético")
     chatbot = gr.Chatbot()

 import gradio as gr
 from llama_cpp import Llama
+from huggingface_hub import snapshot_download
 import os
+# Baixe o GGUF direto do Hub no runtime (pula limite de 1GB upload)
+MODEL_REPO = "mradermacher/DeepHat-V1-7B-GGUF"
+MODEL_FILE = "DeepHat-V1-7B.Q4_K_M.gguf"  # ~4.8GB, baixa uma vez e cacheia
+LOCAL_PATH = "./models/"  # Pasta local no Space
+# Função pra carregar modelo (roda na init)
+def load_model():
+    os.makedirs(LOCAL_PATH, exist_ok=True)
+    model_path = snapshot_download(
+        repo_id=MODEL_REPO,
+        filename=MODEL_FILE,
+        local_dir=LOCAL_PATH,
+        local_dir_use_symlinks=False  # Evita links quebrados
+    )
+    full_path = os.path.join(model_path, MODEL_FILE)
+    llm = Llama(
+        model_path=full_path,
+        n_ctx=2048,
+        n_threads=4,
+        verbose=False
+    )
+    return llm
+# Carregue na init (leva ~5-10 min na primeira build, depois cache)
+print("Baixando DeepHat... (pode demorar na CPU)")
+llm = load_model()
 def generate_response(prompt, max_tokens=500):
     full_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
     output = llm(
         full_prompt,
         max_tokens=max_tokens,
     )
     return output['choices'][0]['text'].strip()
+# Interface Gradio
 with gr.Blocks(title="DeepHat Uncensored Chat") as demo:
     gr.Markdown("# DeepHat - IA Uncensored pra Cibersegurança & Hacking Ético")
     chatbot = gr.Chatbot()