Spaces:

jpmendes
/

llama31InstSmall

Sleeping

App Files Files Community

jpmendes commited on Oct 26, 2025

Commit

6a814ba

verified ·

1 Parent(s): a6c6912

Upload 3 files

Browse files

Files changed (3) hide show

app.py +16 -18
download_models.py +15 -10
install.sh +8 -1

app.py CHANGED Viewed

@@ -1,21 +1,26 @@
 import gradio as gr
 import subprocess
 from pathlib import Path
-# Caminhos dos modelos
-MODEL_DIR = Path("./models")
-MODELS = {
-    "Q3_K_XL (Inferior)": MODEL_DIR / "Llama-3.2-1B-Instruct-Q3_K_XL.gguf",
-    "Q4_K_M (Alta)": MODEL_DIR / "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
-}
-# Histórico global de chat
 history_dict = {}
 def run_llama(prompt, model_path, max_tokens, temperature):
-    """
-    Executa o llama.cpp localmente e retorna a saída
-    """
     cmd = [
         "./llama.cpp/main",
         "-m", str(model_path),
@@ -25,27 +30,21 @@ def run_llama(prompt, model_path, max_tokens, temperature):
         "-temp", str(temperature),
         "--color", "false",
     ]
     result = subprocess.run(cmd, capture_output=True, text=True)
     return result.stdout.strip()
 def respond(user_message, model_choice, max_tokens, temperature, session_id="default"):
-    """
-    Função chamada pelo Gradio, mantém histórico
-    """
     global history_dict
     if session_id not in history_dict:
         history_dict[session_id] = []
     history = history_dict[session_id]
-    # Concatena histórico em um único prompt
     prompt = "\n".join([f"User: {m['user']}\nAI: {m['ai']}" for m in history])
     prompt += f"\nUser: {user_message}\nAI:"
-    # Roda o llama.cpp
     response = run_llama(prompt, MODELS[model_choice], max_tokens, temperature)
-    # Atualiza histórico
     history.append({"user": user_message, "ai": response})
     return response
@@ -58,7 +57,6 @@ with gr.Blocks() as demo:
         max_tokens_slider = gr.Slider(1, 2048, value=512, step=1, label="Max tokens")
         temperature_slider = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
     output_box = gr.Textbox(label="Resposta")
     btn = gr.Button("Enviar")
     btn.click(
         respond,

 import gradio as gr
 import subprocess
 from pathlib import Path
+import os
+from download_models import MODEL_DIR, MODELS, hf_hub_download
+# Compile llama.cpp se necessário
+if not Path("./llama.cpp/main").exists():
+    print("Binário do llama.cpp não encontrado, compilando...")
+    subprocess.run(["bash", "./install.sh"], check=True)
+# Garantir que modelos existam
+for key, file_name in MODELS.items():
+    file_path = MODEL_DIR / file_name
+    if not file_path.exists():
+        print(f"Baixando {file_name}...")
+        from download_models import hf_hub_download, REPO_ID
+        hf_hub_download(repo_id=REPO_ID, filename=file_name, local_dir=MODEL_DIR, force_filename=file_name)
+# Histórico global de chat por sessão
 history_dict = {}
 def run_llama(prompt, model_path, max_tokens, temperature):
     cmd = [
         "./llama.cpp/main",
         "-m", str(model_path),
         "-temp", str(temperature),
         "--color", "false",
     ]
     result = subprocess.run(cmd, capture_output=True, text=True)
     return result.stdout.strip()
 def respond(user_message, model_choice, max_tokens, temperature, session_id="default"):
     global history_dict
     if session_id not in history_dict:
         history_dict[session_id] = []
     history = history_dict[session_id]
+    # Concatena histórico em prompt
     prompt = "\n".join([f"User: {m['user']}\nAI: {m['ai']}" for m in history])
     prompt += f"\nUser: {user_message}\nAI:"
     response = run_llama(prompt, MODELS[model_choice], max_tokens, temperature)
     history.append({"user": user_message, "ai": response})
     return response
         max_tokens_slider = gr.Slider(1, 2048, value=512, step=1, label="Max tokens")
         temperature_slider = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
     output_box = gr.Textbox(label="Resposta")
     btn = gr.Button("Enviar")
     btn.click(
         respond,

download_models.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from huggingface_hub import hf_hub_download
 from pathlib import Path
-# Cria pasta models se não existir
 MODEL_DIR = Path("./models")
 MODEL_DIR.mkdir(exist_ok=True)
-# Dicionário com os modelos e os arquivos GGUF correspondentes
 MODELS = {
     "Q3_K_XL": "Llama-3.2-1B-Instruct-Q3_K_XL.gguf",
     "Q4_K_M": "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
@@ -14,11 +15,15 @@ MODELS = {
 REPO_ID = "bartowski/Llama-3.2-1B-Instruct-GGUF"
 for key, file_name in MODELS.items():
-    print(f"Baixando {file_name}...")
-    path = hf_hub_download(
-        repo_id=REPO_ID,
-        filename=file_name,
-        local_dir=MODEL_DIR,
-        force_filename=file_name
-    )
-    print(f"{file_name} salvo em {path}")

 from huggingface_hub import hf_hub_download
 from pathlib import Path
+import os
+# Pasta dos modelos
 MODEL_DIR = Path("./models")
 MODEL_DIR.mkdir(exist_ok=True)
+# Modelos e arquivos
 MODELS = {
     "Q3_K_XL": "Llama-3.2-1B-Instruct-Q3_K_XL.gguf",
     "Q4_K_M": "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
 REPO_ID = "bartowski/Llama-3.2-1B-Instruct-GGUF"
 for key, file_name in MODELS.items():
+    file_path = MODEL_DIR / file_name
+    if not file_path.exists():
+        print(f"Baixando {file_name}...")
+        hf_hub_download(
+            repo_id=REPO_ID,
+            filename=file_name,
+            local_dir=MODEL_DIR,
+            force_filename=file_name
+        )
+        print(f"{file_name} salvo em {file_path}")
+    else:
+        print(f"{file_name} já existe, pulando download")

install.sh CHANGED Viewed

@@ -1,7 +1,14 @@
 #!/bin/bash
 echo "==== Compilando llama.cpp ===="
 cd llama.cpp || exit 1
 make clean
 make -j4 || { echo "Erro ao compilar llama.cpp"; exit 1; }
 cd ..
-echo "==== Compilação concluída ===="

 #!/bin/bash
+set -e
 echo "==== Compilando llama.cpp ===="
 cd llama.cpp || exit 1
+# Limpa builds anteriores
 make clean
+# Compila com 4 threads
 make -j4 || { echo "Erro ao compilar llama.cpp"; exit 1; }
 cd ..
+echo "==== Compilação concluída ===="