VeuReu commited on
Commit
c306d02
·
verified ·
1 Parent(s): 2cdac6d

Upload 4 files

Browse files
Files changed (3) hide show
  1. README.md +12 -10
  2. app.py +171 -53
  3. clients/client_test.py +0 -0
README.md CHANGED
@@ -1,24 +1,26 @@
1
  ---
2
- title: veureu-schat
3
- emoji: 💬
4
- colorFrom: purple
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: "4.44.1"
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- # 💬 veureu-schat (Salamandra-7B-Instruct · ZeroGPU)
13
 
14
  ## Endpoints
15
- - **`/api/predict`** (Gradio): entrada `["<prompt>"]` → salida `"<texto>"`.
16
  ➜ Este es el endpoint que usa el Space **engine**.
17
- - **`/api/generate`** (Gradio): entrada `[prompt, system, max_new_tokens, temperature, top_p]` → salida `"<texto>"`.
18
 
19
  ### Variables de entorno
20
- - `MODEL_ID` (opcional): por defecto `BSC-LT/salamandra-7b-instruct`.
 
21
 
22
  ### Notas
23
- - El modelo usa `chat_template` si existe; si no, se compone un prompt clásico con bloque `system`.
24
- - GPU: se activa con `@spaces.GPU` automáticamente (ZeroGPU).
 
 
1
  ---
2
+ title: veureu-stools
3
+ emoji: 🛠️
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
  sdk: gradio
7
  sdk_version: "4.44.1"
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # 🛠️ veureu-stools (Salamandra-7B-Tools · ZeroGPU)
13
 
14
  ## Endpoints
15
+ - **`/api/predict`** (Gradio): entrada `[ "<messages_json>", "<tools_json>" ]` → salida `{ "text": "...", "tool_calls": [...], "tool_results": [...] }`.
16
  ➜ Este es el endpoint que usa el Space **engine**.
17
+ - **`/api/chat`** (Gradio): entrada `[ "<messages_json>", "<tools_json>", max_new_tokens, temperature, top_p ]` → salida idéntica.
18
 
19
  ### Variables de entorno
20
+ - `MODEL_ID` (opcional): por defecto `BSC-LT/salamandra-7b-tools`.
21
+ Puedes apuntar a `BSC-LT/salamandra-7b-instruct` si prefieres.
22
 
23
  ### Notas
24
+ - El modelo **no ejecuta** herramientas reales salvo un **ejemplo local**: `calculator` (seguro).
25
+ Si el modelo devuelve `{"tool_calls":[...]}`, el Space intentará ejecutar esas llamadas en sandbox y añadirá `tool_results`.
26
+ Puedes desactivar la ejecución poniendo `EXECUTE_TOOLS=False` en `app.py`.
app.py CHANGED
@@ -1,19 +1,15 @@
1
- # app.py — veureu/schat (Salamandra 7B Instruct · ZeroGPU) — compatible con ENGINE
2
  from __future__ import annotations
3
- import os, json
4
- from typing import List, Dict, Optional, Tuple
5
 
6
  import gradio as gr
7
  import spaces
8
  import torch
9
- from transformers import (
10
- AutoTokenizer,
11
- AutoModelForCausalLM,
12
- TextIteratorStreamer,
13
- )
14
-
15
- # ===== Config =====
16
- MODEL_ID = os.environ.get("MODEL_ID", "BSC-LT/salamandra-7b-instruct")
17
  DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
18
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
19
 
@@ -34,36 +30,121 @@ def _lazy_load() -> Tuple[AutoTokenizer, AutoModelForCausalLM]:
34
  ).to(DEVICE)
35
  return _tok, _model
36
 
37
- def _build_prompt(prompt: str, system: Optional[str]) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  """
39
- Si el tokenizer posee 'chat_template', lo usamos con mensajes [system?, user].
40
- Si no, hacemos un prompt plano con system arriba.
41
  """
42
  tok, _ = _lazy_load()
43
- messages = []
44
- if system and system.strip():
45
- messages.append({"role": "system", "content": system.strip()})
46
- messages.append({"role": "user", "content": prompt})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  chat_template = getattr(tok, "chat_template", None)
49
  if chat_template:
50
- return tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
51
- # Fallback sin chat template
52
- sys_part = (f"<<SYS>>\n{system.strip()}\n<</SYS>>\n\n" if system and system.strip() else "")
53
- return sys_part + f"### Instrucción\n{prompt}\n\n### Respuesta\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  @spaces.GPU # usa GPU si está disponible (ZeroGPU)
56
- def _generate(
57
- prompt: str,
58
- system: str = "",
59
  max_new_tokens: int = 512,
60
  temperature: float = 0.7,
61
  top_p: float = 0.95,
62
- ) -> str:
63
  tok, model = _lazy_load()
64
- text = _build_prompt(prompt, system or "")
65
- inputs = tok(text, return_tensors="pt").to(DEVICE)
66
 
 
67
  with torch.inference_mode():
68
  out = model.generate(
69
  **inputs,
@@ -74,40 +155,77 @@ def _generate(
74
  pad_token_id=tok.eos_token_id,
75
  eos_token_id=tok.eos_token_id,
76
  )
77
- return tok.decode(out[0], skip_special_tokens=True).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- # ------------------- Gradio Endpoints -------------------
80
- # 1) /predict — lo que espera el ENGINE (solo 'prompt' → string)
81
- def predict_for_engine(prompt: str) -> str:
82
- return _generate(prompt=prompt, system="", max_new_tokens=512, temperature=0.7, top_p=0.95)
83
 
84
- # 2) /generate — más controles (prompt + system + params)
85
- def generate_advanced(prompt: str, system: str, max_new_tokens: int, temperature: float, top_p: float) -> str:
86
- return _generate(prompt=prompt, system=system, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
87
 
88
- # ------------------- HTTP (opcional, clientes puros) -------------------
89
- # Si quieres, puedes añadir un endpoint HTTP POST /generate (FastAPI),
90
- # pero con Gradio Client es suficiente para engine/local.
91
 
92
- # ------------------- UI -------------------
93
- with gr.Blocks(title="Salamandra 7B Instruct · ZeroGPU") as demo:
94
- gr.Markdown("## Salamandra-7B-Instruct · ZeroGPU\nTexto → respuesta instruccional.")
95
  with gr.Row():
96
- with gr.Column(scale=1):
97
- in_system = gr.Textbox(label="System (opcional)", value="")
98
- in_prompt = gr.Textbox(label="Prompt", placeholder="Escribe tu instrucción…", lines=6)
99
  max_new = gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens")
100
  temp = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="temperature")
101
- top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.01, label="top_p")
102
  btn = gr.Button("Generar", variant="primary")
103
- with gr.Column(scale=1):
104
- out = gr.Textbox(label="Respuesta", lines=18)
105
 
106
- btn.click(generate_advanced, [in_prompt, in_system, max_new, temp, top_p], out, api_name="generate")
107
 
108
- # Endpoint minimalista compatible con el ENGINE (/predict: solo prompt)
109
- in_prompt_engine = gr.Textbox(label="Prompt (ENGINE)", value="Di hola en una frase.")
110
- out_engine = gr.Textbox(label="Respuesta (ENGINE)")
111
- gr.Button("Probar /predict").click(predict_for_engine, [in_prompt_engine], out_engine, api_name="predict")
112
 
113
  demo.queue(concurrency_count=1, max_size=16).launch()
 
1
+ # app.py — veureu/stools (Salamandra 7B Tools · ZeroGPU) — compatible con ENGINE
2
  from __future__ import annotations
3
+ import os, json, re
4
+ from typing import List, Dict, Any, Optional, Tuple
5
 
6
  import gradio as gr
7
  import spaces
8
  import torch
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM
10
+
11
+ # ================= Config =================
12
+ MODEL_ID = os.environ.get("MODEL_ID", "BSC-LT/salamandra-7b-tools")
 
 
 
 
13
  DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
 
 
30
  ).to(DEVICE)
31
  return _tok, _model
32
 
33
+
34
+ # =============== Helpers ===============
35
+
36
+ def _render_tools_md(tools: List[Dict[str, Any]]) -> str:
37
+ """Convierte la especificación OpenAI-style de tools a un bloque breve markdown para el prompt."""
38
+ if not tools:
39
+ return ""
40
+ lines = ["Herramientas disponibles (formato JSON):"]
41
+ for t in tools:
42
+ name = t.get("function", {}).get("name") or t.get("name") or "tool"
43
+ desc = t.get("function", {}).get("description") or t.get("description") or ""
44
+ params = t.get("function", {}).get("parameters") or t.get("parameters") or {}
45
+ lines.append(f"- **{name}**: {desc} | parámetros: {json.dumps(params)[:600]}")
46
+ return "\n".join(lines)
47
+
48
+ def _compose_chat_prompt(messages: List[Dict[str, str]], tools_md: str) -> str:
49
  """
50
+ Soporta mensajes estilo OpenAI: [{"role":"system|user|assistant", "content":"..."}]
51
+ Usa chat_template si está disponible.
52
  """
53
  tok, _ = _lazy_load()
54
+ sys_text = ""
55
+ usr_msgs: List[Dict[str, str]] = []
56
+ for m in messages:
57
+ role = m.get("role", "")
58
+ content = (m.get("content") or "").strip()
59
+ if role == "system":
60
+ sys_text += ("\n" + content) if sys_text else content
61
+ else:
62
+ usr_msgs.append({"role": role, "content": content})
63
+
64
+ # injerta descripción de tools en el system
65
+ if tools_md:
66
+ sys_text = (sys_text + "\n\n" if sys_text else "") + tools_md + \
67
+ "\n\nSi decides llamar a una herramienta, devuelve un objeto JSON con la clave 'tool_calls' " \
68
+ "y describe tus razonamientos de forma concisa en 'thought' (opcional)."
69
+
70
+ # reconstruimos la conversación con system delante
71
+ conv: List[Dict[str, str]] = []
72
+ if sys_text:
73
+ conv.append({"role":"system", "content": sys_text})
74
+ conv.extend(usr_msgs)
75
 
76
  chat_template = getattr(tok, "chat_template", None)
77
  if chat_template:
78
+ return tok.apply_chat_template(conv, tokenize=False, add_generation_prompt=True)
79
+
80
+ # Fallback sin plantilla
81
+ rendered = ""
82
+ if sys_text:
83
+ rendered += f"<<SYS>>\n{sys_text}\n<</SYS>>\n\n"
84
+ for m in usr_msgs:
85
+ if m["role"] == "user":
86
+ rendered += f"### Usuario\n{m['content']}\n\n"
87
+ elif m["role"] == "assistant":
88
+ rendered += f"### Asistente\n{m['content']}\n\n"
89
+ rendered += "### Asistente\n"
90
+ return rendered
91
+
92
+
93
+ # =============== (Opcional) Mini-ejecutor local de herramientas seguras ===============
94
+ # Si el LLM devuelve {"tool_calls":[{"name":"calculator","arguments":{"expr":"2+2"}}]}
95
+ # podemos ejecutar algunas herramientas inofensivas de ejemplo.
96
+ # Nota: mantén esto muy simple/seguro. Puedes desactivarlo poniendo EXECUTE_TOOLS=False.
97
+ EXECUTE_TOOLS = True
98
+
99
+ def _safe_calculator(expr: str) -> str:
100
+ # Permite solo dígitos, espacios, (), y +-*/.%**
101
+ if not re.fullmatch(r"[0-9\.\s\+\-\*\/\%\(\)\^eE]+", expr.replace("**","^")):
102
+ return "Rejected expression."
103
+ # soporta ^ como potencia -> **
104
+ expr = expr.replace("^", "**")
105
+ try:
106
+ return str(eval(expr, {"__builtins__":{}}, {}))
107
+ except Exception as e:
108
+ return f"Error: {e}"
109
+
110
+ LOCAL_TOOLBOX = {
111
+ "calculator": lambda args: _safe_calculator(str(args.get("expr",""))),
112
+ }
113
+
114
+ def maybe_execute_tool_calls(tool_calls: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
115
+ if not EXECUTE_TOOLS:
116
+ return []
117
+ results = []
118
+ for call in tool_calls:
119
+ name = call.get("name")
120
+ args = call.get("arguments", {})
121
+ fn = LOCAL_TOOLBOX.get(name)
122
+ if fn is None:
123
+ results.append({"name": name, "error": "tool_not_available"})
124
+ continue
125
+ try:
126
+ out = fn(args)
127
+ results.append({"name": name, "output": out})
128
+ except Exception as e:
129
+ results.append({"name": name, "error": str(e)})
130
+ return results
131
+
132
+
133
+ # =============== Core generation ===============
134
 
135
  @spaces.GPU # usa GPU si está disponible (ZeroGPU)
136
+ def _generate_with_tools(
137
+ messages: List[Dict[str, str]],
138
+ tools: List[Dict[str, Any]],
139
  max_new_tokens: int = 512,
140
  temperature: float = 0.7,
141
  top_p: float = 0.95,
142
+ ) -> Dict[str, Any]:
143
  tok, model = _lazy_load()
144
+ tools_md = _render_tools_md(tools)
145
+ prompt = _compose_chat_prompt(messages, tools_md)
146
 
147
+ inputs = tok(prompt, return_tensors="pt").to(DEVICE)
148
  with torch.inference_mode():
149
  out = model.generate(
150
  **inputs,
 
155
  pad_token_id=tok.eos_token_id,
156
  eos_token_id=tok.eos_token_id,
157
  )
158
+ text = tok.decode(out[0], skip_special_tokens=True).strip()
159
+
160
+ # Si el modelo devuelve un bloque JSON con 'tool_calls', lo intentamos extraer.
161
+ tool_calls: List[Dict[str, Any]] = []
162
+ try:
163
+ # busca el último {...} que contenga "tool_calls"
164
+ matches = list(re.finditer(r"\{.*?\"tool_calls\".*?\}", text, flags=re.S))
165
+ if matches:
166
+ block = text[matches[-1].start():matches[-1].end()]
167
+ obj = json.loads(block)
168
+ tc = obj.get("tool_calls", [])
169
+ if isinstance(tc, list):
170
+ tool_calls = tc
171
+ except Exception:
172
+ pass
173
+
174
+ tool_results = maybe_execute_tool_calls(tool_calls) if tool_calls else []
175
+
176
+ return {"text": text, "tool_calls": tool_calls, "tool_results": tool_results}
177
+
178
+
179
+ # =================== Gradio Endpoints ===================
180
+
181
+ def predict_for_engine(messages_json: str, tools_json: str) -> Dict[str, Any]:
182
+ """
183
+ Endpoint esperado por ENGINE (ToolsClient.chat):
184
+ - messages_json: JSON de [{"role":"user|assistant|system","content":"..."}]
185
+ - tools_json: JSON OpenAI-like de herramientas (opcional)
186
+ Devuelve: {"text": "...", "tool_calls": [...], "tool_results": [...]}
187
+ """
188
+ try:
189
+ messages = json.loads(messages_json) if messages_json else []
190
+ except Exception:
191
+ messages = []
192
+ try:
193
+ tools = json.loads(tools_json) if tools_json else []
194
+ except Exception:
195
+ tools = []
196
+ return _generate_with_tools(messages, tools, max_new_tokens=512, temperature=0.7, top_p=0.95)
197
+
198
+ def chat_advanced(messages_json: str, tools_json: str, max_new_tokens: int, temperature: float, top_p: float) -> Dict[str, Any]:
199
+ try:
200
+ messages = json.loads(messages_json) if messages_json else []
201
+ except Exception:
202
+ messages = []
203
+ try:
204
+ tools = json.loads(tools_json) if tools_json else []
205
+ except Exception:
206
+ tools = []
207
+ return _generate_with_tools(messages, tools, max_new_tokens=int(max_new_tokens), temperature=float(temperature), top_p=float(top_p))
208
 
 
 
 
 
209
 
210
+ # =================== UI ===================
 
 
211
 
212
+ with gr.Blocks(title="Salamandra 7B Tools · ZeroGPU") as demo:
213
+ gr.Markdown("## Salamandra-7B-Tools · ZeroGPU\nChat con especificación de herramientas (function-calling).")
 
214
 
 
 
 
215
  with gr.Row():
216
+ with gr.Column():
217
+ messages = gr.Textbox(label="messages_json", value='[{"role":"user","content":"¿Cuánto es (2+2)^3?"}]', lines=6)
218
+ tools = gr.Textbox(label="tools_json (opcional)", value='[{"type":"function","function":{"name":"calculator","description":"Evalúa expresiones aritméticas básicas.","parameters":{"type":"object","properties":{"expr":{"type":"string"}},"required":["expr"]}}}]', lines=6)
219
  max_new = gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens")
220
  temp = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="temperature")
221
+ topp = gr.Slider(0.1, 1.0, value=0.95, step=0.01, label="top_p")
222
  btn = gr.Button("Generar", variant="primary")
223
+ with gr.Column():
224
+ out = gr.JSON(label="Salida")
225
 
226
+ btn.click(chat_advanced, [messages, tools, max_new, temp, topp], out, api_name="chat")
227
 
228
+ # Endpoint minimalista /predict para ENGINE (mensajes + tools)
229
+ gr.Button("Probar /predict").click(predict_for_engine, [messages, tools], out, api_name="predict")
 
 
230
 
231
  demo.queue(concurrency_count=1, max_size=16).launch()
clients/client_test.py ADDED
File without changes