Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,36 +3,35 @@ from llama_cpp import Llama
|
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
import os
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
)
|
| 12 |
|
| 13 |
-
|
| 14 |
-
print("Cargando modelo en Llama-cpp...")
|
| 15 |
llm = Llama(
|
| 16 |
model_path=model_path,
|
| 17 |
-
n_ctx=
|
| 18 |
-
n_threads=4,
|
| 19 |
n_batch=512
|
| 20 |
)
|
| 21 |
|
| 22 |
-
def predict(message, system_prompt="Responde en espa帽ol
|
|
|
|
| 23 |
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
| 24 |
output = llm(prompt, max_tokens=1024, stop=["<|im_end|>"], echo=False)
|
| 25 |
return output['choices'][0]['text']
|
| 26 |
|
| 27 |
with gr.Blocks() as demo:
|
| 28 |
-
gr.Markdown("# Nodo API
|
| 29 |
with gr.Row():
|
| 30 |
msg = gr.Textbox(label="Input")
|
| 31 |
-
sys = gr.Textbox(label="System Prompt", value="
|
| 32 |
out = gr.Textbox(label="Output")
|
| 33 |
btn = gr.Button("Generar")
|
| 34 |
|
| 35 |
-
# API name para tu VM externa
|
| 36 |
btn.click(predict, [msg, sys], out, api_name="query")
|
| 37 |
|
| 38 |
if __name__ == "__main__":
|
|
|
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
import os
|
| 5 |
|
| 6 |
+
# CONFIGURACI脫N DEL MODELO - Elegimos el 3B para estabilidad total
|
| 7 |
+
REPO_ID = "Qwen/Qwen2.5-3B-Instruct-GGUF"
|
| 8 |
+
FILENAME = "qwen2.5-3b-instruct-q5_k_m.gguf"
|
| 9 |
+
|
| 10 |
+
print(f"Descargando {FILENAME}...")
|
| 11 |
+
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
|
| 12 |
|
| 13 |
+
print("Cargando modelo...")
|
|
|
|
| 14 |
llm = Llama(
|
| 15 |
model_path=model_path,
|
| 16 |
+
n_ctx=4096, # Ahora podemos permitirnos m谩s contexto
|
| 17 |
+
n_threads=4, # Aprovecha la CPU del Space
|
| 18 |
n_batch=512
|
| 19 |
)
|
| 20 |
|
| 21 |
+
def predict(message, system_prompt="Responde en espa帽ol."):
|
| 22 |
+
# Formato ChatML para Qwen
|
| 23 |
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
| 24 |
output = llm(prompt, max_tokens=1024, stop=["<|im_end|>"], echo=False)
|
| 25 |
return output['choices'][0]['text']
|
| 26 |
|
| 27 |
with gr.Blocks() as demo:
|
| 28 |
+
gr.Markdown(f"# Nodo API estable: {REPO_ID}")
|
| 29 |
with gr.Row():
|
| 30 |
msg = gr.Textbox(label="Input")
|
| 31 |
+
sys = gr.Textbox(label="System Prompt", value="Eres un asistente 煤til.")
|
| 32 |
out = gr.Textbox(label="Output")
|
| 33 |
btn = gr.Button("Generar")
|
| 34 |
|
|
|
|
| 35 |
btn.click(predict, [msg, sys], out, api_name="query")
|
| 36 |
|
| 37 |
if __name__ == "__main__":
|