albertoo85 commited on
Commit
40139ea
verified
1 Parent(s): 8824600

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -3,36 +3,35 @@ from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
  import os
5
 
6
- # Configuraci贸n del modelo Qwen 2.5 7B
7
- print("Descargando modelo GGUF...")
8
- model_path = hf_hub_download(
9
- repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF",
10
- filename="qwen2.5-7b-instruct-q4_k_m.gguf"
11
- )
12
 
13
- # Carga optimizada
14
- print("Cargando modelo en Llama-cpp...")
15
  llm = Llama(
16
  model_path=model_path,
17
- n_ctx=2048,
18
- n_threads=4, # Los Spaces de Docker suelen tener 4 cores
19
  n_batch=512
20
  )
21
 
22
- def predict(message, system_prompt="Responde en espa帽ol de forma clara."):
 
23
  prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
24
  output = llm(prompt, max_tokens=1024, stop=["<|im_end|>"], echo=False)
25
  return output['choices'][0]['text']
26
 
27
  with gr.Blocks() as demo:
28
- gr.Markdown("# Nodo API Qwen 2.5 (Estable)")
29
  with gr.Row():
30
  msg = gr.Textbox(label="Input")
31
- sys = gr.Textbox(label="System Prompt", value="Responde en espa帽ol.")
32
  out = gr.Textbox(label="Output")
33
  btn = gr.Button("Generar")
34
 
35
- # API name para tu VM externa
36
  btn.click(predict, [msg, sys], out, api_name="query")
37
 
38
  if __name__ == "__main__":
 
3
  from huggingface_hub import hf_hub_download
4
  import os
5
 
6
+ # CONFIGURACI脫N DEL MODELO - Elegimos el 3B para estabilidad total
7
+ REPO_ID = "Qwen/Qwen2.5-3B-Instruct-GGUF"
8
+ FILENAME = "qwen2.5-3b-instruct-q5_k_m.gguf"
9
+
10
+ print(f"Descargando {FILENAME}...")
11
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
12
 
13
+ print("Cargando modelo...")
 
14
  llm = Llama(
15
  model_path=model_path,
16
+ n_ctx=4096, # Ahora podemos permitirnos m谩s contexto
17
+ n_threads=4, # Aprovecha la CPU del Space
18
  n_batch=512
19
  )
20
 
21
+ def predict(message, system_prompt="Responde en espa帽ol."):
22
+ # Formato ChatML para Qwen
23
  prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
24
  output = llm(prompt, max_tokens=1024, stop=["<|im_end|>"], echo=False)
25
  return output['choices'][0]['text']
26
 
27
  with gr.Blocks() as demo:
28
+ gr.Markdown(f"# Nodo API estable: {REPO_ID}")
29
  with gr.Row():
30
  msg = gr.Textbox(label="Input")
31
+ sys = gr.Textbox(label="System Prompt", value="Eres un asistente 煤til.")
32
  out = gr.Textbox(label="Output")
33
  btn = gr.Button("Generar")
34
 
 
35
  btn.click(predict, [msg, sys], out, api_name="query")
36
 
37
  if __name__ == "__main__":