albertoo85 commited on
Commit
8824600
verified
1 Parent(s): 4190685

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -3,37 +3,37 @@ from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
  import os
5
 
6
- # Configuraci贸n del modelo
7
- print("Descargando Qwen2.5-7B GGUF...")
8
  model_path = hf_hub_download(
9
  repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF",
10
  filename="qwen2.5-7b-instruct-q4_k_m.gguf"
11
  )
12
 
13
- # Cargamos con n_ctx reducido para ahorrar RAM en el Space gratuito
14
- print("Cargando modelo...")
15
  llm = Llama(
16
  model_path=model_path,
17
- n_ctx=1024, # Reducido de 2048 para mayor estabilidad
18
- n_threads=4, # Aprovecha mejor la CPU del Space
19
  n_batch=512
20
  )
21
 
22
- def predict(message, system_prompt="Responde en espa帽ol."):
23
  prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
24
- output = llm(prompt, max_tokens=512, stop=["<|im_end|>"], echo=False)
25
  return output['choices'][0]['text']
26
 
27
  with gr.Blocks() as demo:
28
- gr.Markdown("# Qwen 2.5 API Node")
29
  with gr.Row():
30
- input_t = gr.Textbox(label="Input")
31
- sys_t = gr.Textbox(label="System Prompt", value="Responde en espa帽ol de forma concisa.")
32
- output_t = gr.Textbox(label="Output")
33
- btn = gr.Button("Enviar")
34
 
35
- # Nombre de la API para tu VM externa
36
- btn.click(predict, [input_t, sys_t], output_t, api_name="query")
37
 
38
  if __name__ == "__main__":
39
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
3
  from huggingface_hub import hf_hub_download
4
  import os
5
 
6
+ # Configuraci贸n del modelo Qwen 2.5 7B
7
+ print("Descargando modelo GGUF...")
8
  model_path = hf_hub_download(
9
  repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF",
10
  filename="qwen2.5-7b-instruct-q4_k_m.gguf"
11
  )
12
 
13
+ # Carga optimizada
14
+ print("Cargando modelo en Llama-cpp...")
15
  llm = Llama(
16
  model_path=model_path,
17
+ n_ctx=2048,
18
+ n_threads=4, # Los Spaces de Docker suelen tener 4 cores
19
  n_batch=512
20
  )
21
 
22
+ def predict(message, system_prompt="Responde en espa帽ol de forma clara."):
23
  prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
24
+ output = llm(prompt, max_tokens=1024, stop=["<|im_end|>"], echo=False)
25
  return output['choices'][0]['text']
26
 
27
  with gr.Blocks() as demo:
28
+ gr.Markdown("# Nodo API Qwen 2.5 (Estable)")
29
  with gr.Row():
30
+ msg = gr.Textbox(label="Input")
31
+ sys = gr.Textbox(label="System Prompt", value="Responde en espa帽ol.")
32
+ out = gr.Textbox(label="Output")
33
+ btn = gr.Button("Generar")
34
 
35
+ # API name para tu VM externa
36
+ btn.click(predict, [msg, sys], out, api_name="query")
37
 
38
  if __name__ == "__main__":
39
  demo.launch(server_name="0.0.0.0", server_port=7860)