Spaces:

albertoo85
/

modelo

Sleeping

albertoo85 commited on 29 days ago

Commit

40139ea

verified ·

1 Parent(s): 8824600

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,36 +3,35 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import os
-# Configuración del modelo Qwen 2.5 7B
-print("Descargando modelo GGUF...")
-model_path = hf_hub_download(
-    repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF",
-    filename="qwen2.5-7b-instruct-q4_k_m.gguf"
-)
-# Carga optimizada
-print("Cargando modelo en Llama-cpp...")
 llm = Llama(
     model_path=model_path,
-    n_ctx=2048,
-    n_threads=4, # Los Spaces de Docker suelen tener 4 cores
     n_batch=512
 )
-def predict(message, system_prompt="Responde en español de forma clara."):
     prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     output = llm(prompt, max_tokens=1024, stop=["<|im_end|>"], echo=False)
     return output['choices'][0]['text']
 with gr.Blocks() as demo:
-    gr.Markdown("# Nodo API Qwen 2.5 (Estable)")
     with gr.Row():
         msg = gr.Textbox(label="Input")
-        sys = gr.Textbox(label="System Prompt", value="Responde en español.")
     out = gr.Textbox(label="Output")
     btn = gr.Button("Generar")
-    # API name para tu VM externa
     btn.click(predict, [msg, sys], out, api_name="query")
 if __name__ == "__main__":

 from huggingface_hub import hf_hub_download
 import os
+# CONFIGURACIÓN DEL MODELO - Elegimos el 3B para estabilidad total
+REPO_ID = "Qwen/Qwen2.5-3B-Instruct-GGUF"
+FILENAME = "qwen2.5-3b-instruct-q5_k_m.gguf"
+print(f"Descargando {FILENAME}...")
+model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
+print("Cargando modelo...")
 llm = Llama(
     model_path=model_path,
+    n_ctx=4096,      # Ahora podemos permitirnos más contexto
+    n_threads=4,     # Aprovecha la CPU del Space
     n_batch=512
 )
+def predict(message, system_prompt="Responde en español."):
+    # Formato ChatML para Qwen
     prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     output = llm(prompt, max_tokens=1024, stop=["<|im_end|>"], echo=False)
     return output['choices'][0]['text']
 with gr.Blocks() as demo:
+    gr.Markdown(f"# Nodo API estable: {REPO_ID}")
     with gr.Row():
         msg = gr.Textbox(label="Input")
+        sys = gr.Textbox(label="System Prompt", value="Eres un asistente útil.")
     out = gr.Textbox(label="Output")
     btn = gr.Button("Generar")
     btn.click(predict, [msg, sys], out, api_name="query")
 if __name__ == "__main__":