Ryan-PC commited on
Commit
9397cee
·
verified ·
1 Parent(s): b216acf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -12
app.py CHANGED
@@ -1,22 +1,38 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
 
3
  import os
4
 
5
- # Baixe o GGUF e suba pro Space (via Files > Upload files)
6
- MODEL_PATH = "DeepHat-V1-7B.Q4_K_M.gguf" # Coloque o arquivo aqui
 
 
7
 
8
- # Carregue o modelo (ajuste n_ctx pra contexto, n_threads pra CPU cores)
9
- llm = Llama(
10
- model_path=MODEL_PATH,
11
- n_ctx=2048, # Contexto pra prompts longos (ex.: tutoriais hacking)
12
- n_threads=4, # Use mais se sua máquina tiver
13
- verbose=False
14
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def generate_response(prompt, max_tokens=500):
17
- # Prompt template pro DeepHat (ajuste se precisar)
18
  full_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
19
-
20
  output = llm(
21
  full_prompt,
22
  max_tokens=max_tokens,
@@ -26,7 +42,7 @@ def generate_response(prompt, max_tokens=500):
26
  )
27
  return output['choices'][0]['text'].strip()
28
 
29
- # Interface Gradio simples pra chat
30
  with gr.Blocks(title="DeepHat Uncensored Chat") as demo:
31
  gr.Markdown("# DeepHat - IA Uncensored pra Cibersegurança & Hacking Ético")
32
  chatbot = gr.Chatbot()
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
+ from huggingface_hub import snapshot_download
4
  import os
5
 
6
+ # Baixe o GGUF direto do Hub no runtime (pula limite de 1GB upload)
7
+ MODEL_REPO = "mradermacher/DeepHat-V1-7B-GGUF"
8
+ MODEL_FILE = "DeepHat-V1-7B.Q4_K_M.gguf" # ~4.8GB, baixa uma vez e cacheia
9
+ LOCAL_PATH = "./models/" # Pasta local no Space
10
 
11
+ # Função pra carregar modelo (roda na init)
12
+ def load_model():
13
+ os.makedirs(LOCAL_PATH, exist_ok=True)
14
+ model_path = snapshot_download(
15
+ repo_id=MODEL_REPO,
16
+ filename=MODEL_FILE,
17
+ local_dir=LOCAL_PATH,
18
+ local_dir_use_symlinks=False # Evita links quebrados
19
+ )
20
+ full_path = os.path.join(model_path, MODEL_FILE)
21
+
22
+ llm = Llama(
23
+ model_path=full_path,
24
+ n_ctx=2048,
25
+ n_threads=4,
26
+ verbose=False
27
+ )
28
+ return llm
29
+
30
+ # Carregue na init (leva ~5-10 min na primeira build, depois cache)
31
+ print("Baixando DeepHat... (pode demorar na CPU)")
32
+ llm = load_model()
33
 
34
  def generate_response(prompt, max_tokens=500):
 
35
  full_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
 
36
  output = llm(
37
  full_prompt,
38
  max_tokens=max_tokens,
 
42
  )
43
  return output['choices'][0]['text'].strip()
44
 
45
+ # Interface Gradio
46
  with gr.Blocks(title="DeepHat Uncensored Chat") as demo:
47
  gr.Markdown("# DeepHat - IA Uncensored pra Cibersegurança & Hacking Ético")
48
  chatbot = gr.Chatbot()