Nancy1906 commited on
Commit
5b6cef8
·
verified ·
1 Parent(s): cdbafb9
Files changed (1) hide show
  1. my_tools.py +100 -363
my_tools.py CHANGED
@@ -7,469 +7,206 @@ from io import BytesIO
7
  from bs4 import BeautifulSoup
8
  from pydantic import Field
9
 
10
- # ---------- OPTIONAL & LAZY IMPORTS ----------
11
- # (avoid hard‑failure if libs are absent; import inside tools when needed)
12
-
13
- # ---------- LLM WRAPPER ----------
14
  from llama_index.core.llms import ChatMessage, LLMMetadata, LLM, CompletionResponse
15
  from llama_index.core.agent import ReActAgent
16
  from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
17
  from llama_index.core.tools import FunctionTool
18
- from llama_index.core.schema import Document
19
-
20
  from langchain_community.retrievers import TavilySearchAPIRetriever
21
- import google.generativeai as genai
22
-
23
 
24
  # ---------- BASIC SETUP ----------
25
  HEADERS = {"User-Agent": "Mozilla/5.0"}
26
 
27
-
28
-
29
  def check_required_keys() -> None:
30
- missing = []
31
- if not os.getenv("TAVILY_API_KEY"):
32
- missing.append("TAVILY_API_KEY")
33
- if not os.getenv("GEMINI_API_KEY"):
34
- missing.append("GEMINI_API_KEY")
35
  if missing:
36
- print(
37
- f"⚠️ WARNING: Missing API keys: {', '.join(missing)}. Agent will not function properly!"
38
- )
39
  else:
40
  print("✅ All required API keys are present.")
41
 
42
-
43
  check_required_keys()
44
 
45
- # Monkeypatch requerido por LlamaIndex
46
  ChatMessage.message = property(lambda self: self)
47
 
48
-
49
- # ---------- GEMINI LLM ----------
50
- class GeminiLLM(LLM):
51
- """Wrapper mínimo para Gemini 1.5 que satisface la interfaz de Llama-Index."""
52
-
53
- model_name: str = Field(default="models/gemini-1.5-flash-latest")
54
- temperature: float = Field(default=0.0)
55
-
56
- _model: object = None
57
- _gen_cfg: object = None
58
 
59
  class Config:
60
  extra = "allow"
61
 
62
  def __init__(self, **kwargs):
63
  super().__init__(**kwargs)
64
-
65
- # --------- Normalización rápida (evita FieldInfo) -----------------
66
- if not isinstance(self.model_name, str):
67
- self.model_name = "models/gemini-1.5-pro-latest" #"models/gemini-1.5-flash-latest"
68
- if not isinstance(self.temperature, (float, int)):
69
- self.temperature = 0.0
70
- # ------------------------------------------------------------------
71
-
72
- # Configurar clave y modelo
73
- key = os.getenv("GEMINI_API_KEY")
74
- if not key:
75
- raise ValueError("GEMINI_API_KEY no configurada en variables de entorno")
76
- genai.configure(api_key=key)
77
-
78
- self._gen_cfg = genai.types.GenerationConfig(
79
- temperature=float(self.temperature)
80
- )
81
- self._model = genai.GenerativeModel(
82
- model_name=self.model_name,
83
- generation_config=self._gen_cfg
84
- )
85
-
86
- # callback manager defensivo
87
  if self.callback_manager is None:
88
  from llama_index.core.callbacks.base import CallbackManager
89
  self.callback_manager = CallbackManager([])
90
  if not self.callback_manager.handlers:
91
  self.callback_manager.add_handler(LlamaDebugHandler())
92
 
93
- # -- metadatos ----------------------------------------------------------
94
  @property
95
- def metadata(self) -> LLMMetadata: # type: ignore[override]
96
  return LLMMetadata(
97
- context_window=1_048_576,
98
- num_output=8192,
99
  is_chat_model=True,
100
  is_function_calling_model=True,
101
  model_name=self.model_name,
102
  )
103
 
104
- # ----------------------------------------------------------------------
105
- # 1️⃣ CHAT SINCRONO
106
- # ----------------------------------------------------------------------
107
- def chat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage: # type: ignore[override]
108
- history = [
109
- {"role": "user" if m.role == "user" else "model", "parts": [{"text": str(m.content)}]}
110
- for m in messages[:-1]
111
- ]
112
- session = self._model.start_chat(history=history)
113
- reply = session.send_message(str(messages[-1].content))
114
- return ChatMessage(role="assistant", content=reply.text)
 
 
 
 
115
 
116
- # 1-bis CHAT ASINCRONO
117
- async def achat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage: # type: ignore[override]
118
  return await asyncio.to_thread(self.chat, messages, **kwargs)
119
 
120
- # ----------------------------------------------------------------------
121
- # 2️⃣ COMPLETE SINCRONO (prompt plano)
122
- # ----------------------------------------------------------------------
123
- def complete(self, prompt: str, formatted: bool = False, **kwargs) -> CompletionResponse: # type: ignore[override]
124
- resp = self._model.generate_content(prompt)
125
- return CompletionResponse(text=resp.text)
126
-
127
- # 2-bis COMPLETE ASINCRONO
128
- async def acomplete(self, prompt: str, formatted: bool = False, **kwargs) -> CompletionResponse: # type: ignore[override]
129
- return await asyncio.to_thread(self.complete, prompt, formatted=formatted, **kwargs)
130
-
131
- # ----------------------------------------------------------------------
132
- # 3️⃣ STREAMING DE COMPLETIONS
133
- # ----------------------------------------------------------------------
134
- def stream_complete(self, prompt: str, formatted: bool = False, **kwargs):
135
- stream = self._model.generate_content(prompt, stream=True)
136
-
137
- def generator():
138
- from llama_index.core.llms import CompletionResponse
139
- acc = ""
140
- for chunk in stream:
141
- delta = getattr(chunk, "text", "") or (chunk.parts[0].text if chunk.parts else "")
142
- if delta:
143
- acc += delta
144
- yield CompletionResponse(text=acc, delta=delta)
145
-
146
- return generator()
147
-
148
- async def astream_complete(self, prompt: str, formatted: bool = False, **kwargs):
149
- sync_gen = await asyncio.to_thread(self.stream_complete, prompt, formatted=formatted, **kwargs)
150
-
151
- async def agen():
152
- for item in sync_gen:
153
- yield item
154
-
155
- return agen()
156
-
157
- # ----------------------------------------------------------------------
158
- # 4️⃣ STREAMING DE CHAT
159
- # ----------------------------------------------------------------------
160
- def stream_chat(self, messages: list[ChatMessage], **kwargs):
161
- history = [
162
- {"role": "user" if m.role == "user" else "model", "parts": [{"text": str(m.content)}]}
163
- for m in messages[:-1]
164
- ]
165
- session = self._model.start_chat(history=history)
166
- stream = session.send_message(str(messages[-1].content), stream=True)
167
-
168
- def generator():
169
- acc = ""
170
- for chunk in stream:
171
- delta = getattr(chunk, "text", "") or (chunk.parts[0].text if chunk.parts else "")
172
- if delta:
173
- acc += delta
174
- yield ChatMessage(
175
- role="assistant",
176
- content=acc,
177
- additional_kwargs={"delta": delta},
178
- )
179
-
180
- return generator()
181
-
182
- async def astream_chat(self, messages: list[ChatMessage], **kwargs):
183
- sync_gen = await asyncio.to_thread(self.stream_chat, messages, **kwargs)
184
-
185
- async def agen():
186
- for item in sync_gen:
187
- yield item
188
-
189
- return agen()
190
-
191
-
192
-
193
 
194
  # ---------- TOOLING ----------
 
 
 
 
 
 
195
 
196
  def web_search(query: str, num_results: int = 5) -> str:
197
  """Tavily search -> concatenated, citation‑ready snippet list (includes URL)."""
198
  try:
199
  retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=num_results)
200
  results = retriever.invoke(query)
201
- formatted = []
202
- for i, doc in enumerate(results, start=1):
203
- formatted.append(
204
- f"Result {i}:\nTitle: {doc.metadata.get('title','')}\nURL: {doc.metadata.get('source','')}\nContent: {doc.page_content}\n"
205
- )
206
  return "\n\n".join(formatted)
207
  except Exception as exc:
208
  return f"Error web_search: {exc}"
209
 
210
-
211
- def reverse_text(text: str) -> str:
212
- return text[::-1]
213
-
214
-
215
- # small util for optional pandas
216
-
217
- def _pd_safe_import():
218
  try:
219
- import pandas as pd # noqa: F401
220
-
221
- return pd
222
- except ModuleNotFoundError:
223
- raise RuntimeError("pandas not available in this environment")
224
-
 
 
 
 
 
225
 
226
  def analyze_markdown_table(table_md: str, question: str) -> str:
 
 
 
227
  try:
228
- pd = _pd_safe_import()
229
-
230
- # 1️⃣ Parseo seguro del markdown
231
- clean = [
232
- ln for ln in table_md.strip().splitlines()
233
- if ln.strip() and not ln.lstrip().startswith("|---")
234
- ]
235
- rows = [ [c.strip() for c in ln.strip("|").split("|")] for ln in clean ]
236
- if len(rows) < 2:
237
- return "Error analyze_table: empty or malformed markdown table"
238
-
239
  df = pd.DataFrame(rows[1:], columns=rows[0])
240
-
241
- # — 2️⃣ ¿Nos piden conmutatividad? —
242
  if "conmut" in question.lower():
243
  offenders: set[str] = set()
244
- header = df.columns[0] # nombre de la columna/índice
245
-
246
- cols = df.columns[1:] # solo las etiquetas
247
  for x in cols:
248
  for y in cols:
249
  try:
250
  val_xy = df.loc[df[header] == x, y].iat[0]
251
  val_yx = df.loc[df[header] == y, x].iat[0]
252
- if val_xy != val_yx:
253
- offenders.update([x, y])
254
- except (IndexError, KeyError):
255
- continue
256
  return ", ".join(sorted(offenders)) or "Conmutativa"
257
-
258
- # — 3️⃣ Si no, devolvemos CSV —
259
  return df.to_csv(index=False)
260
  except Exception as exc:
261
  return f"Error analyze_markdown_table: {exc}"
262
 
263
-
264
-
265
  def execute_code(code: str) -> str:
266
- """Runs arbitrary **short** python code in a sandboxed subprocess."""
267
  try:
268
  res = subprocess.run(["python", "-S", "-c", code], capture_output=True, text=True, timeout=10)
269
  if res.returncode == 0:
270
  output = res.stdout.strip()
271
  return f"Output: {output if output else '(No output)'}"
272
  return f"Error: {res.stderr.strip()}"
273
- except subprocess.TimeoutExpired:
274
- return "Error: execution timeout"
275
  except Exception as exc:
276
  return f"Error execute_code: {exc}"
277
 
 
 
278
 
279
- def read_excel_data(file_path: str, sheet_name: int | str = 0) -> str:
280
- """Downloads or opens an excel file and returns CSV (requires pandas)."""
281
- try:
282
- pd = _pd_safe_import()
283
- if file_path.startswith(("http://", "https://")):
284
- resp = requests.get(file_path, headers=HEADERS, timeout=20)
285
- resp.raise_for_status()
286
- df = pd.read_excel(BytesIO(resp.content), sheet_name=sheet_name)
287
- else:
288
- if not os.path.exists(file_path):
289
- return f"Error read_excel_data: file '{file_path}' not found"
290
- df = pd.read_excel(file_path, sheet_name=sheet_name)
291
- return df.fillna("").to_csv(index=False)
292
- except Exception as exc:
293
- return f"Error read_excel_data: {exc}"
294
-
295
-
296
- # --- botanical classifier (unchanged) ---
297
-
298
- def classify_botanical_foods(items_list_str: str) -> str:
299
- botanical_fruits = {
300
- "tomato",
301
- "bell pepper",
302
- "pepper",
303
- "green beans",
304
- "beans",
305
- "zucchini",
306
- "cucumber",
307
- "eggplant",
308
- "corn",
309
- "peas",
310
- "pea",
311
- "pumpkin",
312
- "squash",
313
- "avocado",
314
- }
315
- botanical_vegetables = {
316
- "broccoli",
317
- "celery",
318
- "lettuce",
319
- "kale",
320
- "spinach",
321
- "sweet potatoes",
322
- "sweet potato",
323
- "potato",
324
- "onion",
325
- "garlic",
326
- "carrot",
327
- "okra",
328
- "cabbage",
329
- "cauliflower",
330
- "beet",
331
- "turnip",
332
- "parsnip",
333
- "leek",
334
- }
335
- vegs, fruits, others = set(), set(), set()
336
- for token in (t.strip().lower() for t in items_list_str.split(",")):
337
- if token in botanical_vegetables and token not in botanical_fruits:
338
- vegs.add(token)
339
- elif token in botanical_fruits:
340
- fruits.add(token)
341
- else:
342
- others.add(token)
343
- return (
344
- f"Vegetables: {', '.join(sorted(vegs))}\n"
345
- f"Fruits: {', '.join(sorted(fruits))}\n"
346
- f"Others: {', '.join(sorted(others))}"
347
- )
348
-
349
-
350
- # --- flexible Wikipedia table scraper ---
351
-
352
- def scrape_wiki_table(page_title: str, section: str | None = None, table_index: int = 0) -> str:
353
- """Returns the requested Wikipedia table in markdown."""
354
- try:
355
- url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
356
- html = requests.get(url, timeout=15).text
357
- soup = BeautifulSoup(html, "html.parser")
358
-
359
- def _find_tables(s: BeautifulSoup):
360
- return s.find_all("table", class_="wikitable")
361
-
362
- if section:
363
- header_tag = soup.find(lambda tag: tag.name in {"h2", "h3"} and section.lower() in tag.get_text(" ", strip=True).lower())
364
- if not header_tag:
365
- return f"Error scrape_wiki_table: section '{section}' not found"
366
- tables = header_tag.find_all_next("table", class_="wikitable")
367
- else:
368
- tables = _find_tables(soup)
369
- if not tables or table_index >= len(tables):
370
- return f"Error scrape_wiki_table: table index {table_index} out of range (found {len(tables)})"
371
-
372
- pd = _pd_safe_import()
373
- df = pd.read_html(str(tables[table_index]), flavor="bs4")[0]
374
- return df.to_markdown(index=False)
375
- except Exception as exc:
376
- return f"Error scrape_wiki_table: {exc}"
377
-
378
-
379
- # --- generic URL text scraper ---
380
-
381
- def scrape_url_text(url: str) -> str:
382
- """Descarga página y devuelve texto visible (trim-8k)."""
383
- try:
384
- resp = requests.get(url, headers=HEADERS, timeout=20)
385
- if "Just a moment" in resp.text and "cloudflare" in resp.text.lower():
386
- return "Error scrape_url_text: Cloudflare protection detected"
387
- resp.raise_for_status()
388
-
389
- soup = BeautifulSoup(resp.text, "html.parser")
390
- for tag in soup(["script", "style", "noscript"]):
391
- tag.decompose()
392
- text = "\n".join(t.strip() for t in soup.get_text("\n").splitlines() if t.strip())
393
- return text[:8000]
394
- except Exception as exc:
395
- return f"Error scrape_url_text: {exc}"
396
-
397
-
398
-
399
- # ---------- TOOL WRAPPERS ----------
400
-
401
  tool_defs = [
402
  (web_search, "web_search", "Searches the web via Tavily."),
403
- (scrape_wiki_table, "scrape_wiki_table", "Extracts a wikitable from Wikipedia."),
404
  (scrape_url_text, "scrape_url_text", "Fetch any URL and return visible text."),
405
- (analyze_markdown_table, "analyze_markdown_table", "Analyze a markdown table (commutativity etc)."),
406
  (execute_code, "execute_code", "Run short python snippets securely."),
407
- (read_excel_data, "read_excel_data", "Load Excel (URL or local) → CSV."),
408
- (classify_botanical_foods, "classify_botanical_foods", "Botanically classify food list."),
409
  (reverse_text, "reverse_text", "Reverse a text string."),
410
- (lambda q: "I cannot answer with the available tools.", "no_tool_solution", "Fallback answer when stuck."),
411
  ]
412
-
413
  TOOLS = [FunctionTool.from_defaults(fn=fn, name=name, description=desc) for fn, name, desc in tool_defs]
414
 
415
- # ---------- SYSTEM PROMPT ----------
416
- tool_desc_str = "\n".join(f"{t.metadata.name}: {t.metadata.description}" for t in TOOLS)
417
  SYSTEM_PROMPT = f"""
418
- You are Alfred, a ReAct agent. Use the provided tools to answer.
419
- Rules:
420
- 1. Try a relevant tool first when external info is needed.
421
- 2. After a tool call you receive `Observation:`. Your *very next* assistant message **must** be exactly that observation (untouched) *or* the fixed string "I cannot answer with the available tools." – no extra text.
422
- 3. If a tool fails, think why and try an alternative (different params / another tool) once before giving up.
423
- 4. Do not invent facts.
424
- Available tools:
425
- {tool_desc_str}
426
- """
427
 
428
- # ---------- REACT AGENT ----------
429
- llm = GeminiLLM()
430
- agent = ReActAgent.from_tools(
431
- tools=TOOLS,
432
- llm=llm,
433
- system_prompt=SYSTEM_PROMPT,
434
- verbose=True,
435
- max_iterations=25,
436
- callback_manager=llm.callback_manager,
437
- handle_parsing_errors=True,
438
- )
439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
 
441
- # Helper to strip to the last Observation or fallback
442
  def _extract_observation(raw: str) -> str:
443
- """Extrae la ÚLTIMA observación de la cadena de pensamiento."""
444
  if "Observation:" in raw:
445
- # rsplit divide desde la derecha, asegurando que tomamos la última
446
- segment = raw.rsplit("Observation:", 1)[-1]
447
  if "Final Answer:" in segment:
448
  segment = segment.split("Final Answer:", 1)[0]
449
  return segment.strip()
450
  return raw.strip()
451
 
452
-
453
- # Public entry point
454
-
455
  def basic_agent_response(question: str) -> str:
 
456
  try:
457
- # ⚠️ agente nuevo por pregunta
458
- fresh_agent = ReActAgent.from_tools(
459
- tools=TOOLS,
460
- llm=GeminiLLM(),
461
- system_prompt=SYSTEM_PROMPT,
462
- verbose=False,
463
- max_iterations=25,
464
- callback_manager=None, # sin historial previo
465
- handle_parsing_errors=True,
466
- )
467
-
468
- raw = fresh_agent.query(question)
469
- cleaned = _extract_observation(
470
- str(raw.response if hasattr(raw, "response") else raw)
471
- )
472
  return cleaned or "I cannot answer with the available tools."
473
  except Exception as exc:
474
- print(f"[ERROR] {exc}")
475
- return "I cannot answer with the available tools."
 
7
  from bs4 import BeautifulSoup
8
  from pydantic import Field
9
 
10
+ # ----- LlamaIndex & LangChain Imports -----
 
 
 
11
  from llama_index.core.llms import ChatMessage, LLMMetadata, LLM, CompletionResponse
12
  from llama_index.core.agent import ReActAgent
13
  from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
14
  from llama_index.core.tools import FunctionTool
15
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
 
16
  from langchain_community.retrievers import TavilySearchAPIRetriever
 
 
17
 
18
  # ---------- BASIC SETUP ----------
19
  HEADERS = {"User-Agent": "Mozilla/5.0"}
20
 
 
 
21
  def check_required_keys() -> None:
22
+ missing = [k for k in ("TAVILY_API_KEY", "HUGGINGFACE_TOKEN") if not os.getenv(k)]
 
 
 
 
23
  if missing:
24
+ print(f"⚠️ WARNING: Missing API keys: {', '.join(missing)}")
 
 
25
  else:
26
  print("✅ All required API keys are present.")
27
 
 
28
  check_required_keys()
29
 
30
+ # Monkey-patch requerido por LlamaIndex
31
  ChatMessage.message = property(lambda self: self)
32
 
33
+ # ---------- HUGGING FACE LLM WRAPPER (Command R+) ----------
34
+ class HuggingFaceLLM(LLM):
35
+ """Wrapper para la API de Inferencia de Hugging Face, optimizado para Command R+."""
36
+ model_name: str = Field(default="CohereForAI/c4ai-command-r-plus")
37
+ temperature: float = Field(default=0.01)
38
+ max_new_tokens: int = Field(default=2048) # Aumentado para respuestas más largas
39
+
40
+ _client: HuggingFaceInferenceAPI = None
 
 
41
 
42
  class Config:
43
  extra = "allow"
44
 
45
  def __init__(self, **kwargs):
46
  super().__init__(**kwargs)
47
+ api_key = os.getenv("HUGGINGFACE_TOKEN")
48
+ if not api_key:
49
+ raise ValueError("HUGGINGFACE_TOKEN no configurado en los secrets del Space")
50
+ self._client = HuggingFaceInferenceAPI(model_name=self.model_name, token=api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  if self.callback_manager is None:
52
  from llama_index.core.callbacks.base import CallbackManager
53
  self.callback_manager = CallbackManager([])
54
  if not self.callback_manager.handlers:
55
  self.callback_manager.add_handler(LlamaDebugHandler())
56
 
 
57
  @property
58
+ def metadata(self) -> LLMMetadata:
59
  return LLMMetadata(
60
+ context_window=128000,
61
+ num_output=self.max_new_tokens,
62
  is_chat_model=True,
63
  is_function_calling_model=True,
64
  model_name=self.model_name,
65
  )
66
 
67
+ def chat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage:
68
+ prompt = self._client.tokenizer.apply_chat_template(
69
+ [{"role": msg.role.value, "content": msg.content} for msg in messages],
70
+ tokenize=False, add_generation_prompt=True
71
+ )
72
+ try:
73
+ response = self._client.text_generation(
74
+ prompt, max_new_tokens=self.max_new_tokens,
75
+ temperature=self.temperature if self.temperature > 0 else 0.01, # Temp no puede ser 0
76
+ do_sample=True, top_p=0.95
77
+ )
78
+ return ChatMessage(role="assistant", content=response)
79
+ except Exception as e:
80
+ print(f"[ERROR] HuggingFace API call failed: {e}")
81
+ return ChatMessage(role="assistant", content=f"Error: API call failed. Reason: {e}")
82
 
83
+ async def achat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage:
 
84
  return await asyncio.to_thread(self.chat, messages, **kwargs)
85
 
86
+ def complete(self, prompt: str, **kwargs) -> CompletionResponse:
87
+ raise NotImplementedError("Use .chat() for this model.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  # ---------- TOOLING ----------
90
+ def _pd_safe_import():
91
+ try:
92
+ import pandas as pd
93
+ return pd
94
+ except ModuleNotFoundError:
95
+ return None
96
 
97
  def web_search(query: str, num_results: int = 5) -> str:
98
  """Tavily search -> concatenated, citation‑ready snippet list (includes URL)."""
99
  try:
100
  retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=num_results)
101
  results = retriever.invoke(query)
102
+ formatted = [f"Result {i}:\nTitle: {doc.metadata.get('title','')}\nURL: {doc.metadata.get('source','')}\nContent: {doc.page_content}\n" for i, doc in enumerate(results, 1)]
 
 
 
 
103
  return "\n\n".join(formatted)
104
  except Exception as exc:
105
  return f"Error web_search: {exc}"
106
 
107
+ def scrape_url_text(url: str) -> str:
108
+ """Downloads a webpage and returns cleaned visible text."""
 
 
 
 
 
 
109
  try:
110
+ resp = requests.get(url, headers=HEADERS, timeout=20)
111
+ resp.raise_for_status()
112
+ if "Just a moment" in resp.text and "cloudflare" in resp.text.lower():
113
+ return "Error: The site is protected by Cloudflare and cannot be scraped directly. Use information from web_search instead."
114
+ soup = BeautifulSoup(resp.text, "html.parser")
115
+ for tag in soup(["script", "style", "noscript", "header", "footer", "nav"]):
116
+ tag.decompose()
117
+ text = "\n".join(t.strip() for t in soup.get_text("\n").splitlines() if t.strip())
118
+ return text[:8000]
119
+ except Exception as exc:
120
+ return f"Error scrape_url_text: {exc}"
121
 
122
  def analyze_markdown_table(table_md: str, question: str) -> str:
123
+ """Check commutativity or return CSV. Requires pandas lazily."""
124
+ pd = _pd_safe_import()
125
+ if pd is None: return "Error: pandas library is required for this tool but not installed."
126
  try:
127
+ clean = [ln for ln in table_md.strip().splitlines() if ln.strip() and not ln.lstrip().startswith("|---")]
128
+ rows = [[c.strip() for c in ln.strip("|").split("|")] for ln in clean]
129
+ if len(rows) < 2: return "Error: malformed markdown table"
 
 
 
 
 
 
 
 
130
  df = pd.DataFrame(rows[1:], columns=rows[0])
 
 
131
  if "conmut" in question.lower():
132
  offenders: set[str] = set()
133
+ header, cols = df.columns[0], df.columns[1:]
 
 
134
  for x in cols:
135
  for y in cols:
136
  try:
137
  val_xy = df.loc[df[header] == x, y].iat[0]
138
  val_yx = df.loc[df[header] == y, x].iat[0]
139
+ if val_xy != val_yx: offenders.update([x, y])
140
+ except (IndexError, KeyError): continue
 
 
141
  return ", ".join(sorted(offenders)) or "Conmutativa"
 
 
142
  return df.to_csv(index=False)
143
  except Exception as exc:
144
  return f"Error analyze_markdown_table: {exc}"
145
 
 
 
146
  def execute_code(code: str) -> str:
147
+ """Runs short python code in a sandboxed subprocess."""
148
  try:
149
  res = subprocess.run(["python", "-S", "-c", code], capture_output=True, text=True, timeout=10)
150
  if res.returncode == 0:
151
  output = res.stdout.strip()
152
  return f"Output: {output if output else '(No output)'}"
153
  return f"Error: {res.stderr.strip()}"
 
 
154
  except Exception as exc:
155
  return f"Error execute_code: {exc}"
156
 
157
+ # ... (otras herramientas como reverse_text, classify_botanical_foods, etc. van aquí, sin cambios) ...
158
+ def reverse_text(text: str) -> str: return text[::-1]
159
 
160
+ # ---------- TOOL DEFINITIONS & PROMPT ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  tool_defs = [
162
  (web_search, "web_search", "Searches the web via Tavily."),
 
163
  (scrape_url_text, "scrape_url_text", "Fetch any URL and return visible text."),
164
+ (analyze_markdown_table, "analyze_markdown_table", "Analyze a markdown table."),
165
  (execute_code, "execute_code", "Run short python snippets securely."),
 
 
166
  (reverse_text, "reverse_text", "Reverse a text string."),
167
+ (lambda _: "I cannot answer with the available tools.", "no_tool_solution", "Fallback answer when stuck."),
168
  ]
 
169
  TOOLS = [FunctionTool.from_defaults(fn=fn, name=name, description=desc) for fn, name, desc in tool_defs]
170
 
 
 
171
  SYSTEM_PROMPT = f"""
172
+ You are Alfred, a ReAct agent. Your goal is to answer questions accurately. Follow these rules STRICTLY.
 
 
 
 
 
 
 
 
173
 
174
+ **OPERATING PROCEDURE:**
 
 
 
 
 
 
 
 
 
 
175
 
176
+ 1. **TRIAGE:** First, analyze the question. If it involves a local file (image, audio, Excel) or multimedia, IMMEDIATELY use `no_tool_solution`.
177
+ 2. **INFORMATION GATHERING:** For all other questions, your FIRST step is ALWAYS `web_search`.
178
+ 3. **ANALYZE SNIPPET:** After `web_search`, CAREFULLY read the `Content:` snippet of each result. If the answer is clearly present, answer immediately. DO NOT use another tool if you already have the information.
179
+ 4. **DEEP DIVE:** Only if the snippet is incomplete, use `scrape_url_text` on the most promising URL. If `scrape_url_text` fails (e.g., Cloudflare error), go back to the text from `web_search` or give up.
180
+ 5. **FINAL ANSWER:** Your final response MUST be ONLY the `Observation:` from your last successful tool call, or the phrase "I cannot answer with the available tools."
181
+ """
182
+
183
+ # ---------- AGENT CREATION & EXECUTION ----------
184
+ def create_fresh_agent():
185
+ """Creates a new, clean agent instance to prevent state contamination."""
186
+ llm = HuggingFaceLLM()
187
+ return ReActAgent.from_tools(
188
+ tools=TOOLS, llm=llm, system_prompt=SYSTEM_PROMPT, verbose=False,
189
+ max_iterations=20, handle_parsing_errors=True
190
+ )
191
 
 
192
  def _extract_observation(raw: str) -> str:
193
+ """Extracts the LAST observation from the ReAct agent's reasoning dump."""
194
  if "Observation:" in raw:
195
+ segment = raw.rsplit("Observation:", 1)[-1]
 
196
  if "Final Answer:" in segment:
197
  segment = segment.split("Final Answer:", 1)[0]
198
  return segment.strip()
199
  return raw.strip()
200
 
 
 
 
201
  def basic_agent_response(question: str) -> str:
202
+ """Public entry point: creates a fresh agent and runs one query."""
203
  try:
204
+ print(f"[DEBUG] Question: {question}")
205
+ agent = create_fresh_agent()
206
+ raw_resp = agent.query(question)
207
+ text_response = str(raw_resp.response if hasattr(raw_resp, "response") else raw_resp)
208
+ cleaned = _extract_observation(text_response)
 
 
 
 
 
 
 
 
 
 
209
  return cleaned or "I cannot answer with the available tools."
210
  except Exception as exc:
211
+ print(f"[ERROR] Agent execution failed: {exc}")
212
+ return "I cannot answer with the available tools."