Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- app.py +16 -18
- download_models.py +15 -10
- install.sh +8 -1
app.py
CHANGED
|
@@ -1,21 +1,26 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import subprocess
|
| 3 |
from pathlib import Path
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
#
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
"Q4_K_M (Alta)": MODEL_DIR / "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
|
| 10 |
-
}
|
| 11 |
|
| 12 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
history_dict = {}
|
| 14 |
|
| 15 |
def run_llama(prompt, model_path, max_tokens, temperature):
|
| 16 |
-
"""
|
| 17 |
-
Executa o llama.cpp localmente e retorna a saída
|
| 18 |
-
"""
|
| 19 |
cmd = [
|
| 20 |
"./llama.cpp/main",
|
| 21 |
"-m", str(model_path),
|
|
@@ -25,27 +30,21 @@ def run_llama(prompt, model_path, max_tokens, temperature):
|
|
| 25 |
"-temp", str(temperature),
|
| 26 |
"--color", "false",
|
| 27 |
]
|
| 28 |
-
|
| 29 |
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 30 |
return result.stdout.strip()
|
| 31 |
|
| 32 |
def respond(user_message, model_choice, max_tokens, temperature, session_id="default"):
|
| 33 |
-
"""
|
| 34 |
-
Função chamada pelo Gradio, mantém histórico
|
| 35 |
-
"""
|
| 36 |
global history_dict
|
| 37 |
if session_id not in history_dict:
|
| 38 |
history_dict[session_id] = []
|
| 39 |
|
| 40 |
history = history_dict[session_id]
|
| 41 |
-
# Concatena histórico em
|
| 42 |
prompt = "\n".join([f"User: {m['user']}\nAI: {m['ai']}" for m in history])
|
| 43 |
prompt += f"\nUser: {user_message}\nAI:"
|
| 44 |
|
| 45 |
-
# Roda o llama.cpp
|
| 46 |
response = run_llama(prompt, MODELS[model_choice], max_tokens, temperature)
|
| 47 |
|
| 48 |
-
# Atualiza histórico
|
| 49 |
history.append({"user": user_message, "ai": response})
|
| 50 |
return response
|
| 51 |
|
|
@@ -58,7 +57,6 @@ with gr.Blocks() as demo:
|
|
| 58 |
max_tokens_slider = gr.Slider(1, 2048, value=512, step=1, label="Max tokens")
|
| 59 |
temperature_slider = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
|
| 60 |
output_box = gr.Textbox(label="Resposta")
|
| 61 |
-
|
| 62 |
btn = gr.Button("Enviar")
|
| 63 |
btn.click(
|
| 64 |
respond,
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import subprocess
|
| 3 |
from pathlib import Path
|
| 4 |
+
import os
|
| 5 |
+
from download_models import MODEL_DIR, MODELS, hf_hub_download
|
| 6 |
|
| 7 |
+
# Compile llama.cpp se necessário
|
| 8 |
+
if not Path("./llama.cpp/main").exists():
|
| 9 |
+
print("Binário do llama.cpp não encontrado, compilando...")
|
| 10 |
+
subprocess.run(["bash", "./install.sh"], check=True)
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# Garantir que modelos existam
|
| 13 |
+
for key, file_name in MODELS.items():
|
| 14 |
+
file_path = MODEL_DIR / file_name
|
| 15 |
+
if not file_path.exists():
|
| 16 |
+
print(f"Baixando {file_name}...")
|
| 17 |
+
from download_models import hf_hub_download, REPO_ID
|
| 18 |
+
hf_hub_download(repo_id=REPO_ID, filename=file_name, local_dir=MODEL_DIR, force_filename=file_name)
|
| 19 |
+
|
| 20 |
+
# Histórico global de chat por sessão
|
| 21 |
history_dict = {}
|
| 22 |
|
| 23 |
def run_llama(prompt, model_path, max_tokens, temperature):
|
|
|
|
|
|
|
|
|
|
| 24 |
cmd = [
|
| 25 |
"./llama.cpp/main",
|
| 26 |
"-m", str(model_path),
|
|
|
|
| 30 |
"-temp", str(temperature),
|
| 31 |
"--color", "false",
|
| 32 |
]
|
|
|
|
| 33 |
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 34 |
return result.stdout.strip()
|
| 35 |
|
| 36 |
def respond(user_message, model_choice, max_tokens, temperature, session_id="default"):
|
|
|
|
|
|
|
|
|
|
| 37 |
global history_dict
|
| 38 |
if session_id not in history_dict:
|
| 39 |
history_dict[session_id] = []
|
| 40 |
|
| 41 |
history = history_dict[session_id]
|
| 42 |
+
# Concatena histórico em prompt
|
| 43 |
prompt = "\n".join([f"User: {m['user']}\nAI: {m['ai']}" for m in history])
|
| 44 |
prompt += f"\nUser: {user_message}\nAI:"
|
| 45 |
|
|
|
|
| 46 |
response = run_llama(prompt, MODELS[model_choice], max_tokens, temperature)
|
| 47 |
|
|
|
|
| 48 |
history.append({"user": user_message, "ai": response})
|
| 49 |
return response
|
| 50 |
|
|
|
|
| 57 |
max_tokens_slider = gr.Slider(1, 2048, value=512, step=1, label="Max tokens")
|
| 58 |
temperature_slider = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
|
| 59 |
output_box = gr.Textbox(label="Resposta")
|
|
|
|
| 60 |
btn = gr.Button("Enviar")
|
| 61 |
btn.click(
|
| 62 |
respond,
|
download_models.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
from huggingface_hub import hf_hub_download
|
| 2 |
from pathlib import Path
|
|
|
|
| 3 |
|
| 4 |
-
#
|
| 5 |
MODEL_DIR = Path("./models")
|
| 6 |
MODEL_DIR.mkdir(exist_ok=True)
|
| 7 |
|
| 8 |
-
#
|
| 9 |
MODELS = {
|
| 10 |
"Q3_K_XL": "Llama-3.2-1B-Instruct-Q3_K_XL.gguf",
|
| 11 |
"Q4_K_M": "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
|
|
@@ -14,11 +15,15 @@ MODELS = {
|
|
| 14 |
REPO_ID = "bartowski/Llama-3.2-1B-Instruct-GGUF"
|
| 15 |
|
| 16 |
for key, file_name in MODELS.items():
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from huggingface_hub import hf_hub_download
|
| 2 |
from pathlib import Path
|
| 3 |
+
import os
|
| 4 |
|
| 5 |
+
# Pasta dos modelos
|
| 6 |
MODEL_DIR = Path("./models")
|
| 7 |
MODEL_DIR.mkdir(exist_ok=True)
|
| 8 |
|
| 9 |
+
# Modelos e arquivos
|
| 10 |
MODELS = {
|
| 11 |
"Q3_K_XL": "Llama-3.2-1B-Instruct-Q3_K_XL.gguf",
|
| 12 |
"Q4_K_M": "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
|
|
|
|
| 15 |
REPO_ID = "bartowski/Llama-3.2-1B-Instruct-GGUF"
|
| 16 |
|
| 17 |
for key, file_name in MODELS.items():
|
| 18 |
+
file_path = MODEL_DIR / file_name
|
| 19 |
+
if not file_path.exists():
|
| 20 |
+
print(f"Baixando {file_name}...")
|
| 21 |
+
hf_hub_download(
|
| 22 |
+
repo_id=REPO_ID,
|
| 23 |
+
filename=file_name,
|
| 24 |
+
local_dir=MODEL_DIR,
|
| 25 |
+
force_filename=file_name
|
| 26 |
+
)
|
| 27 |
+
print(f"{file_name} salvo em {file_path}")
|
| 28 |
+
else:
|
| 29 |
+
print(f"{file_name} já existe, pulando download")
|
install.sh
CHANGED
|
@@ -1,7 +1,14 @@
|
|
| 1 |
#!/bin/bash
|
|
|
|
|
|
|
| 2 |
echo "==== Compilando llama.cpp ===="
|
| 3 |
cd llama.cpp || exit 1
|
|
|
|
|
|
|
| 4 |
make clean
|
|
|
|
|
|
|
| 5 |
make -j4 || { echo "Erro ao compilar llama.cpp"; exit 1; }
|
|
|
|
| 6 |
cd ..
|
| 7 |
-
echo "==== Compilação concluída ===="
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
echo "==== Compilando llama.cpp ===="
|
| 5 |
cd llama.cpp || exit 1
|
| 6 |
+
|
| 7 |
+
# Limpa builds anteriores
|
| 8 |
make clean
|
| 9 |
+
|
| 10 |
+
# Compila com 4 threads
|
| 11 |
make -j4 || { echo "Erro ao compilar llama.cpp"; exit 1; }
|
| 12 |
+
|
| 13 |
cd ..
|
| 14 |
+
echo "==== Compilação concluída ===="
|