jpmendes commited on
Commit
6a814ba
·
verified ·
1 Parent(s): a6c6912

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +16 -18
  2. download_models.py +15 -10
  3. install.sh +8 -1
app.py CHANGED
@@ -1,21 +1,26 @@
1
  import gradio as gr
2
  import subprocess
3
  from pathlib import Path
 
 
4
 
5
- # Caminhos dos modelos
6
- MODEL_DIR = Path("./models")
7
- MODELS = {
8
- "Q3_K_XL (Inferior)": MODEL_DIR / "Llama-3.2-1B-Instruct-Q3_K_XL.gguf",
9
- "Q4_K_M (Alta)": MODEL_DIR / "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
10
- }
11
 
12
- # Histórico global de chat
 
 
 
 
 
 
 
 
13
  history_dict = {}
14
 
15
  def run_llama(prompt, model_path, max_tokens, temperature):
16
- """
17
- Executa o llama.cpp localmente e retorna a saída
18
- """
19
  cmd = [
20
  "./llama.cpp/main",
21
  "-m", str(model_path),
@@ -25,27 +30,21 @@ def run_llama(prompt, model_path, max_tokens, temperature):
25
  "-temp", str(temperature),
26
  "--color", "false",
27
  ]
28
-
29
  result = subprocess.run(cmd, capture_output=True, text=True)
30
  return result.stdout.strip()
31
 
32
  def respond(user_message, model_choice, max_tokens, temperature, session_id="default"):
33
- """
34
- Função chamada pelo Gradio, mantém histórico
35
- """
36
  global history_dict
37
  if session_id not in history_dict:
38
  history_dict[session_id] = []
39
 
40
  history = history_dict[session_id]
41
- # Concatena histórico em um único prompt
42
  prompt = "\n".join([f"User: {m['user']}\nAI: {m['ai']}" for m in history])
43
  prompt += f"\nUser: {user_message}\nAI:"
44
 
45
- # Roda o llama.cpp
46
  response = run_llama(prompt, MODELS[model_choice], max_tokens, temperature)
47
 
48
- # Atualiza histórico
49
  history.append({"user": user_message, "ai": response})
50
  return response
51
 
@@ -58,7 +57,6 @@ with gr.Blocks() as demo:
58
  max_tokens_slider = gr.Slider(1, 2048, value=512, step=1, label="Max tokens")
59
  temperature_slider = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
60
  output_box = gr.Textbox(label="Resposta")
61
-
62
  btn = gr.Button("Enviar")
63
  btn.click(
64
  respond,
 
1
  import gradio as gr
2
  import subprocess
3
  from pathlib import Path
4
+ import os
5
+ from download_models import MODEL_DIR, MODELS, hf_hub_download
6
 
7
+ # Compile llama.cpp se necessário
8
+ if not Path("./llama.cpp/main").exists():
9
+ print("Binário do llama.cpp não encontrado, compilando...")
10
+ subprocess.run(["bash", "./install.sh"], check=True)
 
 
11
 
12
+ # Garantir que modelos existam
13
+ for key, file_name in MODELS.items():
14
+ file_path = MODEL_DIR / file_name
15
+ if not file_path.exists():
16
+ print(f"Baixando {file_name}...")
17
+ from download_models import hf_hub_download, REPO_ID
18
+ hf_hub_download(repo_id=REPO_ID, filename=file_name, local_dir=MODEL_DIR, force_filename=file_name)
19
+
20
+ # Histórico global de chat por sessão
21
  history_dict = {}
22
 
23
  def run_llama(prompt, model_path, max_tokens, temperature):
 
 
 
24
  cmd = [
25
  "./llama.cpp/main",
26
  "-m", str(model_path),
 
30
  "-temp", str(temperature),
31
  "--color", "false",
32
  ]
 
33
  result = subprocess.run(cmd, capture_output=True, text=True)
34
  return result.stdout.strip()
35
 
36
  def respond(user_message, model_choice, max_tokens, temperature, session_id="default"):
 
 
 
37
  global history_dict
38
  if session_id not in history_dict:
39
  history_dict[session_id] = []
40
 
41
  history = history_dict[session_id]
42
+ # Concatena histórico em prompt
43
  prompt = "\n".join([f"User: {m['user']}\nAI: {m['ai']}" for m in history])
44
  prompt += f"\nUser: {user_message}\nAI:"
45
 
 
46
  response = run_llama(prompt, MODELS[model_choice], max_tokens, temperature)
47
 
 
48
  history.append({"user": user_message, "ai": response})
49
  return response
50
 
 
57
  max_tokens_slider = gr.Slider(1, 2048, value=512, step=1, label="Max tokens")
58
  temperature_slider = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
59
  output_box = gr.Textbox(label="Resposta")
 
60
  btn = gr.Button("Enviar")
61
  btn.click(
62
  respond,
download_models.py CHANGED
@@ -1,11 +1,12 @@
1
  from huggingface_hub import hf_hub_download
2
  from pathlib import Path
 
3
 
4
- # Cria pasta models se não existir
5
  MODEL_DIR = Path("./models")
6
  MODEL_DIR.mkdir(exist_ok=True)
7
 
8
- # Dicionário com os modelos e os arquivos GGUF correspondentes
9
  MODELS = {
10
  "Q3_K_XL": "Llama-3.2-1B-Instruct-Q3_K_XL.gguf",
11
  "Q4_K_M": "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
@@ -14,11 +15,15 @@ MODELS = {
14
  REPO_ID = "bartowski/Llama-3.2-1B-Instruct-GGUF"
15
 
16
  for key, file_name in MODELS.items():
17
- print(f"Baixando {file_name}...")
18
- path = hf_hub_download(
19
- repo_id=REPO_ID,
20
- filename=file_name,
21
- local_dir=MODEL_DIR,
22
- force_filename=file_name
23
- )
24
- print(f"{file_name} salvo em {path}")
 
 
 
 
 
1
  from huggingface_hub import hf_hub_download
2
  from pathlib import Path
3
+ import os
4
 
5
+ # Pasta dos modelos
6
  MODEL_DIR = Path("./models")
7
  MODEL_DIR.mkdir(exist_ok=True)
8
 
9
+ # Modelos e arquivos
10
  MODELS = {
11
  "Q3_K_XL": "Llama-3.2-1B-Instruct-Q3_K_XL.gguf",
12
  "Q4_K_M": "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
 
15
  REPO_ID = "bartowski/Llama-3.2-1B-Instruct-GGUF"
16
 
17
  for key, file_name in MODELS.items():
18
+ file_path = MODEL_DIR / file_name
19
+ if not file_path.exists():
20
+ print(f"Baixando {file_name}...")
21
+ hf_hub_download(
22
+ repo_id=REPO_ID,
23
+ filename=file_name,
24
+ local_dir=MODEL_DIR,
25
+ force_filename=file_name
26
+ )
27
+ print(f"{file_name} salvo em {file_path}")
28
+ else:
29
+ print(f"{file_name} já existe, pulando download")
install.sh CHANGED
@@ -1,7 +1,14 @@
1
  #!/bin/bash
 
 
2
  echo "==== Compilando llama.cpp ===="
3
  cd llama.cpp || exit 1
 
 
4
  make clean
 
 
5
  make -j4 || { echo "Erro ao compilar llama.cpp"; exit 1; }
 
6
  cd ..
7
- echo "==== Compilação concluída ===="
 
1
  #!/bin/bash
2
+ set -e
3
+
4
  echo "==== Compilando llama.cpp ===="
5
  cd llama.cpp || exit 1
6
+
7
+ # Limpa builds anteriores
8
  make clean
9
+
10
+ # Compila com 4 threads
11
  make -j4 || { echo "Erro ao compilar llama.cpp"; exit 1; }
12
+
13
  cd ..
14
+ echo "==== Compilação concluída ===="