Nancy1906 commited on
Commit
5b46a08
·
verified ·
1 Parent(s): b83ef5c
Files changed (1) hide show
  1. my_tools.py +242 -477
my_tools.py CHANGED
@@ -1,598 +1,363 @@
1
  import os
2
  import math
3
- import time
4
  import asyncio
5
  import subprocess
6
  import requests
7
- import pandas as pd
8
- from io import BytesIO, StringIO
9
  from bs4 import BeautifulSoup
10
- import wikipedia
11
  from pydantic import Field
12
- import google.generativeai as genai
13
 
14
- # LlamaIndex imports
 
 
 
15
  from llama_index.core.llms import ChatMessage, LLMMetadata, LLM, CompletionResponse
16
- from llama_index.core.tools import FunctionTool
17
  from llama_index.core.agent import ReActAgent
18
  from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
 
 
19
 
20
  from langchain_community.retrievers import TavilySearchAPIRetriever
21
- from llama_index.core.schema import Document
 
22
 
 
 
23
 
24
- def check_required_keys():
 
25
  missing = []
26
  if not os.getenv("TAVILY_API_KEY"):
27
  missing.append("TAVILY_API_KEY")
28
- # Podés agregar más claves si querés chequear otras
 
29
  if missing:
30
- print(f"⚠️ WARNING: Missing API keys: {', '.join(missing)}. Agent will not function properly!")
 
 
31
  else:
32
  print("✅ All required API keys are present.")
33
 
34
- # Lo llamás apenas arranca:
35
  check_required_keys()
36
- # -------------------------------------------------------------------
37
- # 1) MONKEY-PATCH PARA ChatMessage (por requerimiento de LlamaIndex)
38
- # -------------------------------------------------------------------
39
  ChatMessage.message = property(lambda self: self)
40
 
41
- # -------------------------------------------------------------------
42
- # 2) Clase GeminiLLM personalizada
43
- # -------------------------------------------------------------------
44
  class GeminiLLM(LLM):
45
  model_name: str = Field(default="models/gemini-1.5-flash-latest")
46
  temperature: float = Field(default=0.0)
47
 
48
- _model: object = None
49
- _gen_cfg: object = None
50
 
51
  class Config:
52
  extra = "allow"
53
 
54
  def __init__(self, **kwargs):
55
  super().__init__(**kwargs)
56
- actual_model_name = self.model_name
57
- if not isinstance(actual_model_name, str):
58
- field_def = self.__fields__.get("model_name")
59
- if field_def and hasattr(field_def, 'default'):
60
- actual_model_name = field_def.default
61
- if not isinstance(actual_model_name, str):
62
- actual_model_name = "models/gemini-1.5-flash-latest"
63
-
64
- actual_temperature = self.temperature
65
- if not isinstance(actual_temperature, (float, int)):
66
- temp_field_def = self.__fields__.get("temperature")
67
- if temp_field_def and hasattr(temp_field_def, 'default'):
68
- actual_temperature = temp_field_def.default
69
- if not isinstance(actual_temperature, (float, int)):
70
- actual_temperature = 0.0
71
-
72
- key = os.getenv("GEMINI_API_KEY")
73
- if not key:
74
- raise ValueError("GEMINI_API_KEY no configurada en variables de entorno")
75
- genai.configure(api_key=key)
76
-
77
- self._gen_cfg = genai.types.GenerationConfig(temperature=actual_temperature)
78
  self._model = genai.GenerativeModel(
79
- model_name=actual_model_name,
80
- generation_config=self._gen_cfg
81
  )
82
-
83
  if self.callback_manager is None:
84
  from llama_index.core.callbacks.base import CallbackManager
 
85
  self.callback_manager = CallbackManager([])
86
  if not self.callback_manager.handlers:
87
  self.callback_manager.add_handler(LlamaDebugHandler())
88
 
 
89
  @property
90
  def metadata(self):
91
- actual_model_name_meta = self.model_name
92
- if not isinstance(actual_model_name_meta, str):
93
- field_meta = self.__fields__.get("model_name")
94
- if field_meta and hasattr(field_meta, 'default'):
95
- actual_model_name_meta = field_meta.default
96
- if not isinstance(actual_model_name_meta, str):
97
- actual_model_name_meta = "models/gemini-1.5-flash-latest"
98
  return LLMMetadata(
99
- context_window=1048576,
100
  num_output=8192,
101
  is_chat_model=True,
102
  is_function_calling_model=True,
103
- model_name=actual_model_name_meta,
104
  )
105
 
106
- def chat(self, messages: list[ChatMessage], **kwargs):
107
- hist = []
108
- for m in messages[:-1]:
109
- role = "user" if m.role == "user" else "model"
110
- hist.append({"role": role, "parts": [{"text": str(m.content)}]})
111
- last = str(messages[-1].content)
112
- session = self._model.start_chat(history=hist)
 
113
  try:
114
- resp = session.send_message(last)
115
- return ChatMessage(role="assistant", content=resp.text)
116
- except Exception as e:
117
- return ChatMessage(role="assistant", content=f"Error Gemini chat: {e}")
118
 
 
119
  async def achat(self, messages: list[ChatMessage], **kwargs):
120
  return await asyncio.to_thread(self.chat, messages, **kwargs)
121
 
122
- def stream_complete(self, prompt: str, formatted=False, **kwargs):
123
- stream = self._model.generate_content(str(prompt), stream=True)
124
- def gen():
125
- acc = ""
126
- for chunk in stream:
127
- delta = getattr(chunk, "text", "")
128
- if not delta and hasattr(chunk, 'parts') and chunk.parts:
129
- delta = chunk.parts[0].text
130
- if delta:
131
- acc += delta
132
- yield CompletionResponse(text=acc, delta=delta)
133
- return gen()
134
-
135
- async def astream_complete(self, prompt: str, formatted=False, **kwargs):
136
- sync_gen = await asyncio.to_thread(self.stream_complete, prompt, formatted=formatted, **kwargs)
137
- async def async_gen_wrapper():
138
- for item in sync_gen:
139
- yield item
140
- return async_gen_wrapper()
141
-
142
- def stream_chat(self, messages: list[ChatMessage], **kwargs):
143
- hist = []
144
- for m in messages[:-1]:
145
- role = "user" if m.role == "user" else "model"
146
- hist.append({"role": role, "parts": [{"text": str(m.content)}]})
147
- last = str(messages[-1].content)
148
- session = self._model.start_chat(history=hist)
149
- stream = session.send_message(last, stream=True)
150
- def gen():
151
- acc = ""
152
- for chunk in stream:
153
- delta = getattr(chunk, "text", "")
154
- if not delta and hasattr(chunk, 'parts') and chunk.parts:
155
- delta = chunk.parts[0].text
156
- if delta:
157
- acc += delta
158
- yield ChatMessage(role="assistant", content=acc, additional_kwargs={"delta": delta})
159
- return gen()
160
-
161
- async def astream_chat(self, messages: list[ChatMessage], **kwargs):
162
- sync_gen = await asyncio.to_thread(self.stream_chat, messages, **kwargs)
163
- async def async_gen_wrapper():
164
- for item in sync_gen:
165
- yield item
166
- return async_gen_wrapper()
167
-
168
- def complete(self, prompt: str, formatted=False, **kwargs):
169
  try:
170
  resp = self._model.generate_content(str(prompt))
171
  return CompletionResponse(text=resp.text)
172
- except Exception as e:
173
- return CompletionResponse(text=f"Error Gemini complete: {e}")
174
 
175
- async def acomplete(self, prompt: str, formatted=False, **kwargs):
176
  return await asyncio.to_thread(self.complete, prompt, formatted=formatted, **kwargs)
177
 
178
- # -------------------------------------------------------------------
179
- # 3) HERRAMIENTAS PERSONALIZADAS
180
- # -------------------------------------------------------------------
181
- HEADERS = {'User-Agent': 'Mozilla/5.0'}
182
-
183
- def buscar_web(query, num_results=8):
184
- retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=num_results)
185
- results = retriever.invoke(query)
186
- # Formatear resultados para pasarlos al LLM
187
- formatted_results = []
188
- for i, doc in enumerate(results):
189
- formatted_results.append(
190
- f"Result {i+1}:\nTitle: {doc.metadata.get('title','')}\nSource: {doc.metadata.get('source','')}\nContent: {doc.page_content}\n"
191
- )
192
- return "\n\n".join(formatted_results)
 
 
193
 
194
  def reverse_text(text: str) -> str:
195
- """Invierte el orden de los caracteres en 'text'."""
196
  return text[::-1]
197
 
198
- def analyze_table(table_md: str, question: str) -> str:
199
- """
200
- Recibe una tabla en Markdown (con pipes y separadores) y, si la pregunta menciona 'conmut',
201
- verifica la conmutatividad de la matriz; en otro caso, devuelve el CSV equivalente.
202
- """
203
  try:
204
- #lines = [l for l in table_md.splitlines() if l.strip() and '---' not in l]
205
- # quitamos separadores y líneas vacías; aseguramos salto tras el encabezado
206
- lines = [
207
- l for l in table_md.strip().splitlines()
208
- if l.strip() and not l.lstrip().startswith('|---')
209
- ]
210
- rows = [[c.strip() for c in l.strip().strip('|').split('|')] for l in lines]
 
 
 
 
 
 
211
  if len(rows) < 2:
212
- return "Tabla Markdown mal formateada o vacía."
213
  df = pd.DataFrame(rows[1:], columns=rows[0])
214
- if 'conmut' in question.lower():
215
  cols = df.columns.tolist()[1:]
216
- counter = set()
217
- for x in cols:
218
- for y in cols:
219
- try:
220
- if df.loc[df[rows[0][0]] == x, y].iat[0] != df.loc[df[rows[0][0]] == y, x].iat[0]:
221
- counter.update([x, y])
222
- except:
223
- continue
224
- return ', '.join(sorted(counter)) or 'Conmutativa'
225
  return df.to_csv(index=False)
226
- except Exception as e:
227
- return f"Error analyze_table: {e}"
 
228
 
229
  def execute_code(code: str) -> str:
230
- """
231
- Primero intenta evaluar con eval() en un entorno protegido; si falla, invoca un subproceso 'python -c'.
232
- """
233
  try:
234
- allowed_globals = {'__builtins__': None, 'math': math}
235
- try:
236
- val = eval(code, allowed_globals, {})
237
- return str(val)
238
- except:
239
- res = subprocess.run(["python", "-S", "-c", code],
240
- capture_output=True, text=True, timeout=10)
241
- if res.returncode != 0:
242
- return f"Error código: {res.stderr.strip()}"
243
- return res.stdout.strip() or "(sin salida)"
244
  except subprocess.TimeoutExpired:
245
- return "Error ejecutar código: timeout"
246
- except Exception as e:
247
- return f"Error crítico: {e}"
248
-
249
- def read_excel_data(file_path: str, sheet_name=0) -> str:
250
- """
251
- Si file_path empieza con 'http', descarga el contenido y lee con pandas.
252
- Si es una ruta local, lee directamente. Devuelve el CSV.
253
- """
254
  try:
255
- if file_path.startswith(('http://', 'https://')):
256
- resp = requests.get(file_path, headers=HEADERS, timeout=30)
 
257
  resp.raise_for_status()
258
  df = pd.read_excel(BytesIO(resp.content), sheet_name=sheet_name)
259
  else:
260
  if not os.path.exists(file_path):
261
- return f"Error read_excel_data: archivo '{file_path}' no encontrado"
262
  df = pd.read_excel(file_path, sheet_name=sheet_name)
263
- df = df.fillna('')
264
- return df.to_csv(index=False)
265
- except Exception as e:
266
- return f"Error read_excel_data: {e}"
267
 
268
- def classify_botanical_foods(items_list_str: str) -> str:
269
- """
270
- Splits an input list of foods (English names) into botanical Vegetables,
271
- Fruits, and Others, and returns the three groups as comma-separated lists.
272
- Nothing that is a botanical fruit appears in the Vegetables list.
273
- """
274
 
275
- # --- botanical criteria -------------------------------------------------
 
 
276
  botanical_fruits = {
277
- "tomato", "bell pepper", "pepper", "green beans", "beans", "zucchini",
278
- "cucumber", "eggplant", "corn", "peas", "pea", "pumpkin", "squash",
279
- "avocado"
 
 
 
 
 
 
 
 
 
 
 
280
  }
281
-
282
  botanical_vegetables = {
283
- "broccoli", "celery", "lettuce", "kale", "spinach", "sweet potatoes",
284
- "sweet potato", "potato", "onion", "garlic", "carrot", "okra",
285
- "cabbage", "cauliflower", "beet", "turnip", "parsnip", "leek"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  }
287
-
288
- # -----------------------------------------------------------------------
289
- raw_items = [token.strip().lower() for token in items_list_str.split(",")]
290
-
291
- vegetables = []
292
- fruits = []
293
- others = []
294
-
295
- for item in raw_items:
296
- if item in botanical_vegetables and item not in botanical_fruits:
297
- vegetables.append(item)
298
- elif item in botanical_fruits:
299
- fruits.append(item)
300
  else:
301
- others.append(item)
302
-
303
- # De-duplicate and alphabetise
304
- vegetables = sorted(set(vegetables))
305
- fruits = sorted(set(fruits))
306
- others = sorted(set(others))
307
-
308
  return (
309
- f"Vegetables: {', '.join(vegetables)}\n"
310
- f"Fruits: {', '.join(fruits)}\n"
311
- f"Others: {', '.join(others)}"
312
  )
313
 
314
 
315
- def scrape_wikipedia_table_deprecated(page_title: str, section: str, table_index: int = 0) -> str:
316
- """
317
- Busca una sección en una página de Wikipedia y extrae la tabla indicada (por índice).
318
- Devuelve el CSV.
319
- """
320
- try:
321
- wikipedia.set_lang("en")
322
- page = wikipedia.page(page_title, auto_suggest=False)
323
- soup = BeautifulSoup(page.html(), 'html.parser')
324
- header = next(
325
- (h for h in soup.find_all(['h2', 'h3']) if section.lower() in h.get_text(strip=True).lower()),
326
- None
327
- )
328
- if not header:
329
- return f"Sección '{section}' no encontrada en '{page_title}'"
330
- tables = []
331
- for sib in header.find_next_siblings():
332
- if sib.name in ['h2', 'h3']:
333
- break
334
- if sib.name == 'table' and 'wikitable' in sib.get('class', []):
335
- tables.append(sib)
336
- if table_index >= len(tables):
337
- return f"Tabla índice {table_index} fuera de rango (solo {len(tables)} tablas)."
338
- df = pd.read_html(str(tables[table_index]))[0]
339
- return df.to_csv(index=False)
340
- except Exception as e:
341
- return f"Error scrape_wiki_table: {e}"
342
-
343
- def scrape_wikipedia_table(page_title: str,
344
- section: str | None = None,
345
- table_index: int = 0) -> str:
346
- """
347
- Devuelve la tabla pedida en Markdown.
348
- Si `section` es None ⇒ busca en toda la página.
349
- """
350
- base_url = "https://en.wikipedia.org/wiki/"
351
- url = base_url + page_title.replace(" ", "_")
352
-
353
- html = requests.get(url, timeout=15).text
354
- soup = BeautifulSoup(html, "html.parser")
355
-
356
- # encontrar tablas
357
- if section:
358
- header = soup.find(id=section)
359
- if not header:
360
- raise ValueError(f"Section '{section}' not found.")
361
- tables = header.find_all_next("table", class_="wikitable")
362
- else:
363
- tables = soup.find_all("table", class_="wikitable")
364
 
365
- if not tables or table_index >= len(tables):
366
- raise ValueError(f"Table index {table_index} out of range (found {len(tables)})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
- df = pd.read_html(str(tables[table_index]), flavor="bs4")[0]
369
- return df.to_markdown(index=False)
 
 
 
370
 
371
- # -------------------------------------------------------------------
372
- # 4) ENVUELTORES DE HERRAMIENTAS (FunctionTool)
373
- # -------------------------------------------------------------------
374
- search_tool = FunctionTool.from_defaults(
375
- fn=buscar_web,
376
- name="web_search",
377
- description="Searches the web using TavilySearch API."
378
- )
379
-
380
- reverse_tool = FunctionTool.from_defaults(
381
- fn=reverse_text,
382
- name="reverse_text",
383
- description="Invierte el texto recibido."
384
- )
385
- table_tool = FunctionTool.from_defaults(
386
- fn=analyze_table,
387
- name="analyze_markdown_table",
388
- description="Procesa tabla Markdown y verifica conmutatividad si se menciona 'conmut'."
389
- )
390
- code_tool = FunctionTool.from_defaults(
391
- fn=execute_code,
392
- name="execute_code",
393
- description="Ejecuta código Python de forma segura."
394
- )
395
- excel_tool = FunctionTool.from_defaults(
396
- fn=read_excel_data,
397
- name="read_excel_data",
398
- description="Lee un archivo Excel (local o URL) y devuelve CSV."
399
- )
400
- botanical_tool = FunctionTool.from_defaults(
401
- fn=classify_botanical_foods,
402
- name="classify_botanical_foods",
403
- description="Clasifica botánicamente una lista de alimentos."
404
- )
405
- scrape_tool = FunctionTool.from_defaults(
406
- fn=scrape_wikipedia_table,
407
- name="scrape_wiki_table",
408
- description="Extrae tabla de sección específica de Wikipedia."
409
- )
410
 
411
- fallback_tool = FunctionTool.from_defaults(
412
- fn=lambda q: "I cannot answer with the available tools.",
413
- name="no_tool_solution",
414
- description="Returns the standard sentence when no tool can help."
415
- )
416
 
417
- all_tools = [
418
- search_tool,
419
- scrape_tool,
420
- table_tool,
421
- code_tool,
422
- excel_tool,
423
- botanical_tool,
424
- reverse_tool,
425
- fallback_tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  ]
427
 
428
- # -------------------------------------------------------------------
429
- # 5) DESCRIPCIONES DE HERRAMIENTAS (con ejemplos)
430
- # -------------------------------------------------------------------
431
- tool_descriptions = "\n".join([
432
- f"{t.metadata.name}: {t.metadata.description} "
433
- + {
434
- "classify_botanical_foods": "(Ej: classify_botanical_foods('milk, eggs, broccoli, celery, lettuce'))",
435
- "read_excel_data": "(Ej: read_excel_data('ventas.xlsx', sheet_name=0))",
436
- "analyze_markdown_table": "(Ej: analyze_markdown_table('| A | B |\\n|---|---|\\n|1|2|', '¿Es conmut?'))",
437
- "web_search": "(Ej: web_search('Hokkaido Nippon-Ham Fighters roster'))",
438
- "scrape_wiki_table": "(Ej: scrape_wiki_table('Malko Competition', 'Winners', 0))",
439
- "reverse_text": "(Ej: reverse_text('Hola'))",
440
- "execute_code": "(Ej: execute_code('5*7'))",
441
- }.get(t.metadata.name, "")
442
- for t in all_tools
443
- ])
444
- # -------------------------------------------------------------------
445
- # 6) PROMPT DE SISTEMA MEJORADO with few-shot examples
446
- # -------------------------------------------------------------------
447
- system_prompt = f"""
448
- You are Alfred, a ReAct agent. Your goal is to answer correctly using the available tools.
449
-
450
- Strict guidelines:
451
- STOP: After you output "Observation:", your *very next* message **must** be the final answer and **must** be EXACTLY the observation text unchanged, or the sentence "I cannot answer with the available tools." No extra words.
452
-
453
- 1️. ALWAYS use the available tools first if the question requires information you cannot deduce internally.
454
- 2️. When a tool is used, ONLY answer based on the tool output. DO NOT add or invent any content not explicitly present in the tool output.
455
- 3️. If a tool fails, you may explain the failure clearly. DO NOT fabricate the answer.
456
- 4️. If no tool can help and you don't know, say "I cannot answer with the available tools."
457
-
458
- Flow:
459
-
460
- - **READ the question carefully.**
461
- - **SELECT the most appropriate tool:**
462
- - `classify_botanical_foods` → grocery list, vegetables, fruits
463
- - `read_excel_data` → Excel or attached Excel
464
- - `scrape_wiki_table` → Wikipedia, featured articles, tables
465
- - `analyze_markdown_table` → Markdown table, commutativity
466
- - `reverse_text` → reverse text
467
- - `execute_code` → math, code
468
- - `web_search` → all other general questions
469
- - **CALL the tool → COPY its output EXACTLY**
470
- - **When answering, ONLY use the tool output. DO NOT add any interpretation unless the tool explicitly asked you to process it.**
471
-
472
- Few-shot examples:
473
-
474
- ### Example: classify_botanical_foods
475
- User: "milk, eggs, broccoli, celery, lettuce"
476
- Agent:
477
- {{
478
- "tool": "classify_botanical_foods",
479
- "input": "milk, eggs, broccoli, celery, lettuce"
480
- }}
481
- Observation: Verduras: broccoli, celery, lettuce
482
- Frutas:
483
- Otros: eggs, milk
484
- Final Answer: "broccoli, celery, lettuce"
485
-
486
- ### Example: analyze_markdown_table
487
- User: "Check commutativity"
488
- Agent:
489
- {{
490
- "tool": "analyze_markdown_table",
491
- "input": "|A|B|C|\\n|---|---|---|\\n|A|A|B|C|..."
492
- }}
493
- Observation: a, b
494
- Final Answer: "a, b"
495
-
496
- ---
497
-
498
- ONLY respond following this flow. DO NOT answer using your internal knowledge if a tool is required and available.
499
- If unsure, default to using the most appropriate tool first.
500
-
501
  Available tools:
502
-
503
- {tool_descriptions}
504
  """
505
 
506
-
507
- # -------------------------------------------------------------------
508
- # 7) INICIALIZAR EL AGENTE ReActAgent
509
- # -------------------------------------------------------------------
510
  llm = GeminiLLM()
511
- alfred_agent = ReActAgent.from_tools(
512
- tools=all_tools,
513
  llm=llm,
514
- system_prompt=system_prompt,
515
  verbose=True,
516
  max_iterations=25,
517
  callback_manager=llm.callback_manager,
518
- handle_parsing_errors=True
519
  )
520
 
521
- # --- auxiliar: extraer observación limpia o fallback ----------
 
 
522
  def _extract_observation(raw: str) -> str:
523
- """
524
- Si el agente produjo un paso con 'Observation:', devuelve exactamente
525
- ese texto (sin espacios iniciales/finales). De lo contrario devuelve raw.
526
- """
527
  if "Observation:" in raw:
528
- # ejemplo: "Observation: Verduras: ...\nFinal Answer: ..."
529
  obs = raw.split("Observation:", 1)[1].strip()
530
- # cortamos si accidentalmente quedó un "Final Answer:" concatenado
531
  if "Final Answer:" in obs:
532
  obs = obs.split("Final Answer:", 1)[0].strip()
533
- # si el fallback-tool fue llamado, obs ya contiene la frase estándar
534
  return obs
535
  return raw.strip()
536
 
537
- # --------------------------------------------------------------
538
 
539
- def basic_agent_response(question: str) -> str:
540
- """
541
- - Maneja el caso especial de Excel adjunto.
542
- - Ejecuta el ReActAgent y limpia la salida para cumplir las reglas SAIA.
543
- """
544
- try:
545
- lower_q = question.lower()
546
-
547
- # 1) Caso Excel adjunto ------------------------------------------------
548
- if "attached excel" in lower_q or "archivo excel" in lower_q:
549
- excel_result = read_excel_data("data/attached.xlsx")
550
- return (
551
- excel_result
552
- if "Error" not in excel_result
553
- else "The Excel file is not available."
554
- )
555
-
556
- # 2) Ejecutar agente ---------------------------------------------------
557
- print(f"[DEBUG] ➜ Pregunta: {question}")
558
- raw_resp = alfred_agent.query(question) # puede ser ChatMessage o str
559
-
560
- # 3) Normalizar respuesta ---------------------------------------------
561
- # a) si es ChatMessage
562
- if hasattr(raw_resp, "response") and raw_resp.response is not None:
563
- cleaned = _extract_observation(str(raw_resp.response))
564
- else:
565
- cleaned = _extract_observation(str(raw_resp))
566
-
567
- # 4) Garantizar fallback único -----------------------------------------
568
- if not cleaned:
569
- cleaned = "I cannot answer with the available tools."
570
 
571
- return cleaned
572
-
573
- # 5) Manejo de errores -----------------------------------------------------
574
- except Exception as e:
575
- print(f"[ERROR] {e}")
576
- return "I cannot answer with the available tools."
577
-
578
- '''
579
  def basic_agent_response(question: str) -> str:
580
- """
581
- Detecta "Excel adjunto" o usa ReActAgent.query para el resto.
582
- """
583
  try:
584
- if "attached excel" in question.lower() or "archivo excel" in question.lower():
585
- excel_result = read_excel_data("data/attached.xlsx")
586
- if "Error" in excel_result:
587
- return "The Excel file is not available."
588
- return excel_result
589
-
590
- resp = alfred_agent.query(question)
591
- if hasattr(resp, 'response') and resp.response is not None:
592
- return str(resp.response)
593
- elif resp is not None:
594
- return str(resp)
595
- return "No se generó una respuesta válida."
596
- except Exception as e:
597
- return f"Error crítico del agente: {e}"
598
- '''
 
1
  import os
2
  import math
 
3
  import asyncio
4
  import subprocess
5
  import requests
6
+ from io import BytesIO
 
7
  from bs4 import BeautifulSoup
 
8
  from pydantic import Field
 
9
 
10
+ # ---------- OPTIONAL & LAZY IMPORTS ----------
11
+ # (avoid hard‑failure if libs are absent; import inside tools when needed)
12
+
13
+ # ---------- LLM WRAPPER ----------
14
  from llama_index.core.llms import ChatMessage, LLMMetadata, LLM, CompletionResponse
 
15
  from llama_index.core.agent import ReActAgent
16
  from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
17
+ from llama_index.core.tools import FunctionTool
18
+ from llama_index.core.schema import Document
19
 
20
  from langchain_community.retrievers import TavilySearchAPIRetriever
21
+ import google.generativeai as genai
22
+
23
 
24
+ # ---------- BASIC SETUP ----------
25
+ HEADERS = {"User-Agent": "Mozilla/5.0"}
26
 
27
+
28
+ def check_required_keys() -> None:
29
  missing = []
30
  if not os.getenv("TAVILY_API_KEY"):
31
  missing.append("TAVILY_API_KEY")
32
+ if not os.getenv("GEMINI_API_KEY"):
33
+ missing.append("GEMINI_API_KEY")
34
  if missing:
35
+ print(
36
+ f"⚠️ WARNING: Missing API keys: {', '.join(missing)}. Agent will not function properly!"
37
+ )
38
  else:
39
  print("✅ All required API keys are present.")
40
 
41
+
42
  check_required_keys()
43
+
44
+ # Monkey‑patch requerido por LlamaIndex
 
45
  ChatMessage.message = property(lambda self: self)
46
 
47
+
48
+ # ---------- GEMINI LLM ----------
 
49
  class GeminiLLM(LLM):
50
  model_name: str = Field(default="models/gemini-1.5-flash-latest")
51
  temperature: float = Field(default=0.0)
52
 
53
+ _model = None
 
54
 
55
  class Config:
56
  extra = "allow"
57
 
58
  def __init__(self, **kwargs):
59
  super().__init__(**kwargs)
60
+ api_key = os.getenv("GEMINI_API_KEY")
61
+ if not api_key:
62
+ raise ValueError("GEMINI_API_KEY not set in environment")
63
+ genai.configure(api_key=api_key)
64
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  self._model = genai.GenerativeModel(
66
+ model_name=self.model_name, generation_config=genai.types.GenerationConfig(temperature=self.temperature)
 
67
  )
 
68
  if self.callback_manager is None:
69
  from llama_index.core.callbacks.base import CallbackManager
70
+
71
  self.callback_manager = CallbackManager([])
72
  if not self.callback_manager.handlers:
73
  self.callback_manager.add_handler(LlamaDebugHandler())
74
 
75
+ # ----- metadata -----
76
  @property
77
  def metadata(self):
 
 
 
 
 
 
 
78
  return LLMMetadata(
79
+ context_window=1_048_576,
80
  num_output=8192,
81
  is_chat_model=True,
82
  is_function_calling_model=True,
83
+ model_name=self.model_name,
84
  )
85
 
86
+ # ----- sync chat -----
87
+ def chat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage:
88
+ history = [
89
+ {"role": ("user" if m.role == "user" else "model"), "parts": [{"text": str(m.content)}]}
90
+ for m in messages[:-1]
91
+ ]
92
+ last_user_msg = str(messages[-1].content)
93
+ session = self._model.start_chat(history=history)
94
  try:
95
+ response = session.send_message(last_user_msg)
96
+ return ChatMessage(role="assistant", content=response.text)
97
+ except Exception as exc:
98
+ return ChatMessage(role="assistant", content=f"Error Gemini chat: {exc}")
99
 
100
+ # ----- async chat -----
101
  async def achat(self, messages: list[ChatMessage], **kwargs):
102
  return await asyncio.to_thread(self.chat, messages, **kwargs)
103
 
104
+ # ----- completion helpers (rarely used) -----
105
+ def complete(self, prompt: str, formatted: bool = False, **kwargs):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  try:
107
  resp = self._model.generate_content(str(prompt))
108
  return CompletionResponse(text=resp.text)
109
+ except Exception as exc:
110
+ return CompletionResponse(text=f"Error Gemini complete: {exc}")
111
 
112
+ async def acomplete(self, prompt: str, formatted: bool = False, **kwargs):
113
  return await asyncio.to_thread(self.complete, prompt, formatted=formatted, **kwargs)
114
 
115
+
116
+ # ---------- TOOLING ----------
117
+
118
+ def web_search(query: str, num_results: int = 5) -> str:
119
+ """Tavily search -> concatenated, citation‑ready snippet list (includes URL)."""
120
+ try:
121
+ retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=num_results)
122
+ results = retriever.invoke(query)
123
+ formatted = []
124
+ for i, doc in enumerate(results, start=1):
125
+ formatted.append(
126
+ f"Result {i}:\nTitle: {doc.metadata.get('title','')}\nURL: {doc.metadata.get('source','')}\nContent: {doc.page_content}\n"
127
+ )
128
+ return "\n\n".join(formatted)
129
+ except Exception as exc:
130
+ return f"Error web_search: {exc}"
131
+
132
 
133
  def reverse_text(text: str) -> str:
 
134
  return text[::-1]
135
 
136
+
137
+ # small util for optional pandas
138
+
139
+ def _pd_safe_import():
 
140
  try:
141
+ import pandas as pd # noqa: F401
142
+
143
+ return pd
144
+ except ModuleNotFoundError:
145
+ raise RuntimeError("pandas not available in this environment")
146
+
147
+
148
+ def analyze_markdown_table(table_md: str, question: str) -> str:
149
+ """Check commutativity or return CSV. Requires pandas lazily."""
150
+ try:
151
+ pd = _pd_safe_import()
152
+ lines = [l for l in table_md.strip().splitlines() if l.strip() and not l.lstrip().startswith("|---")]
153
+ rows = [[c.strip() for c in l.strip().strip("|").split("|")] for l in lines]
154
  if len(rows) < 2:
155
+ return "Error analyze_table: empty or malformed markdown table"
156
  df = pd.DataFrame(rows[1:], columns=rows[0])
157
+ if "conmut" in question.lower():
158
  cols = df.columns.tolist()[1:]
159
+ offenders = {
160
+ col
161
+ for x in cols
162
+ for y in cols
163
+ if df.loc[df[rows[0][0]] == x, y].iat[0] != df.loc[df[rows[0][0]] == y, x].iat[0]
164
+ }
165
+ return ", ".join(sorted(offenders)) or "Conmutativa"
 
 
166
  return df.to_csv(index=False)
167
+ except Exception as exc:
168
+ return f"Error analyze_markdown_table: {exc}"
169
+
170
 
171
  def execute_code(code: str) -> str:
172
+ """Runs arbitrary **short** python code in a sandboxed subprocess."""
 
 
173
  try:
174
+ res = subprocess.run(["python", "-S", "-c", code], capture_output=True, text=True, timeout=10)
175
+ if res.returncode == 0:
176
+ output = res.stdout.strip()
177
+ return f"Output: {output if output else '(No output)'}"
178
+ return f"Error: {res.stderr.strip()}"
 
 
 
 
 
179
  except subprocess.TimeoutExpired:
180
+ return "Error: execution timeout"
181
+ except Exception as exc:
182
+ return f"Error execute_code: {exc}"
183
+
184
+
185
+ def read_excel_data(file_path: str, sheet_name: int | str = 0) -> str:
186
+ """Downloads or opens an excel file and returns CSV (requires pandas)."""
 
 
187
  try:
188
+ pd = _pd_safe_import()
189
+ if file_path.startswith(("http://", "https://")):
190
+ resp = requests.get(file_path, headers=HEADERS, timeout=20)
191
  resp.raise_for_status()
192
  df = pd.read_excel(BytesIO(resp.content), sheet_name=sheet_name)
193
  else:
194
  if not os.path.exists(file_path):
195
+ return f"Error read_excel_data: file '{file_path}' not found"
196
  df = pd.read_excel(file_path, sheet_name=sheet_name)
197
+ return df.fillna("").to_csv(index=False)
198
+ except Exception as exc:
199
+ return f"Error read_excel_data: {exc}"
 
200
 
 
 
 
 
 
 
201
 
202
+ # --- botanical classifier (unchanged) ---
203
+
204
+ def classify_botanical_foods(items_list_str: str) -> str:
205
  botanical_fruits = {
206
+ "tomato",
207
+ "bell pepper",
208
+ "pepper",
209
+ "green beans",
210
+ "beans",
211
+ "zucchini",
212
+ "cucumber",
213
+ "eggplant",
214
+ "corn",
215
+ "peas",
216
+ "pea",
217
+ "pumpkin",
218
+ "squash",
219
+ "avocado",
220
  }
 
221
  botanical_vegetables = {
222
+ "broccoli",
223
+ "celery",
224
+ "lettuce",
225
+ "kale",
226
+ "spinach",
227
+ "sweet potatoes",
228
+ "sweet potato",
229
+ "potato",
230
+ "onion",
231
+ "garlic",
232
+ "carrot",
233
+ "okra",
234
+ "cabbage",
235
+ "cauliflower",
236
+ "beet",
237
+ "turnip",
238
+ "parsnip",
239
+ "leek",
240
  }
241
+ vegs, fruits, others = set(), set(), set()
242
+ for token in (t.strip().lower() for t in items_list_str.split(",")):
243
+ if token in botanical_vegetables and token not in botanical_fruits:
244
+ vegs.add(token)
245
+ elif token in botanical_fruits:
246
+ fruits.add(token)
 
 
 
 
 
 
 
247
  else:
248
+ others.add(token)
 
 
 
 
 
 
249
  return (
250
+ f"Vegetables: {', '.join(sorted(vegs))}\n"
251
+ f"Fruits: {', '.join(sorted(fruits))}\n"
252
+ f"Others: {', '.join(sorted(others))}"
253
  )
254
 
255
 
256
+ # --- flexible Wikipedia table scraper ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
+ def scrape_wiki_table(page_title: str, section: str | None = None, table_index: int = 0) -> str:
259
+ """Returns the requested Wikipedia table in markdown."""
260
+ try:
261
+ url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
262
+ html = requests.get(url, timeout=15).text
263
+ soup = BeautifulSoup(html, "html.parser")
264
+
265
+ def _find_tables(s: BeautifulSoup):
266
+ return s.find_all("table", class_="wikitable")
267
+
268
+ if section:
269
+ header_tag = soup.find(lambda tag: tag.name in {"h2", "h3"} and section.lower() in tag.get_text(" ", strip=True).lower())
270
+ if not header_tag:
271
+ return f"Error scrape_wiki_table: section '{section}' not found"
272
+ tables = header_tag.find_all_next("table", class_="wikitable")
273
+ else:
274
+ tables = _find_tables(soup)
275
+ if not tables or table_index >= len(tables):
276
+ return f"Error scrape_wiki_table: table index {table_index} out of range (found {len(tables)})"
277
 
278
+ pd = _pd_safe_import()
279
+ df = pd.read_html(str(tables[table_index]), flavor="bs4")[0]
280
+ return df.to_markdown(index=False)
281
+ except Exception as exc:
282
+ return f"Error scrape_wiki_table: {exc}"
283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
+ # --- generic URL text scraper ---
 
 
 
 
286
 
287
+ def scrape_url_text(url: str) -> str:
288
+ """Downloads a webpage and returns cleaned visible text (trimmed to 8k chars)."""
289
+ try:
290
+ html = requests.get(url, headers=HEADERS, timeout=20).text
291
+ soup = BeautifulSoup(html, "html.parser")
292
+ for tag in soup(["script", "style", "noscript"]):
293
+ tag.decompose()
294
+ raw_text = "\n".join(t.strip() for t in soup.get_text("\n").splitlines() if t.strip())
295
+ return raw_text[:8000]
296
+ except Exception as exc:
297
+ return f"Error scrape_url_text: {exc}"
298
+
299
+
300
+ # ---------- TOOL WRAPPERS ----------
301
+
302
+ tool_defs = [
303
+ (web_search, "web_search", "Searches the web via Tavily."),
304
+ (scrape_wiki_table, "scrape_wiki_table", "Extracts a wikitable from Wikipedia."),
305
+ (scrape_url_text, "scrape_url_text", "Fetch any URL and return visible text."),
306
+ (analyze_markdown_table, "analyze_markdown_table", "Analyze a markdown table (commutativity etc)."),
307
+ (execute_code, "execute_code", "Run short python snippets securely."),
308
+ (read_excel_data, "read_excel_data", "Load Excel (URL or local) → CSV."),
309
+ (classify_botanical_foods, "classify_botanical_foods", "Botanically classify food list."),
310
+ (reverse_text, "reverse_text", "Reverse a text string."),
311
+ (lambda q: "I cannot answer with the available tools.", "no_tool_solution", "Fallback answer when stuck."),
312
  ]
313
 
314
+ TOOLS = [FunctionTool.from_defaults(fn=fn, name=name, description=desc) for fn, name, desc in tool_defs]
315
+
316
+ # ---------- SYSTEM PROMPT ----------
317
+ tool_desc_str = "\n".join(f"{t.metadata.name}: {t.metadata.description}" for t in TOOLS)
318
+ SYSTEM_PROMPT = f"""
319
+ You are Alfred, a ReAct agent. Use the provided tools to answer.
320
+ Rules:
321
+ 1. Try a relevant tool first when external info is needed.
322
+ 2. After a tool call you receive `Observation:`. Your *very next* assistant message **must** be exactly that observation (untouched) *or* the fixed string "I cannot answer with the available tools." – no extra text.
323
+ 3. If a tool fails, think why and try an alternative (different params / another tool) once before giving up.
324
+ 4. Do not invent facts.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  Available tools:
326
+ {tool_desc_str}
 
327
  """
328
 
329
+ # ---------- REACT AGENT ----------
 
 
 
330
  llm = GeminiLLM()
331
+ agent = ReActAgent.from_tools(
332
+ tools=TOOLS,
333
  llm=llm,
334
+ system_prompt=SYSTEM_PROMPT,
335
  verbose=True,
336
  max_iterations=25,
337
  callback_manager=llm.callback_manager,
338
+ handle_parsing_errors=True,
339
  )
340
 
341
+
342
+ # Helper to strip to the last Observation or fallback
343
+
344
  def _extract_observation(raw: str) -> str:
 
 
 
 
345
  if "Observation:" in raw:
 
346
  obs = raw.split("Observation:", 1)[1].strip()
 
347
  if "Final Answer:" in obs:
348
  obs = obs.split("Final Answer:", 1)[0].strip()
 
349
  return obs
350
  return raw.strip()
351
 
 
352
 
353
+ # Public entry point
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
 
 
 
 
 
 
 
 
 
355
  def basic_agent_response(question: str) -> str:
 
 
 
356
  try:
357
+ print(f"[DEBUG] Question: {question}")
358
+ raw_resp = agent.query(question)
359
+ cleaned = _extract_observation(str(raw_resp.response if hasattr(raw_resp, "response") else raw_resp))
360
+ return cleaned or "I cannot answer with the available tools."
361
+ except Exception as exc:
362
+ print(f"[ERROR] {exc}")
363
+ return "I cannot answer with the available tools."