Nancy1906 commited on
Commit
44d2a9f
·
verified ·
1 Parent(s): 7ca40c9

Update xxxx

Browse files
Files changed (1) hide show
  1. my_tools.py +113 -30
my_tools.py CHANGED
@@ -18,6 +18,11 @@ from llama_index.core.tools import FunctionTool
18
  from llama_index.core.agent import ReActAgent
19
  from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
20
 
 
 
 
 
 
21
  # -------------------------------------------------------------------
22
  # 1) MONKEY-PATCH PARA ChatMessage (por requerimiento de LlamaIndex)
23
  # -------------------------------------------------------------------
@@ -163,30 +168,43 @@ class GeminiLLM(LLM):
163
  # -------------------------------------------------------------------
164
  # 3) HERRAMIENTAS PERSONALIZADAS
165
  # -------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  HEADERS = {'User-Agent': 'Mozilla/5.0'}
167
 
168
- def buscar_web(query: str, max_attempts: int = 2, num_results: int = 5) -> str:
169
- """
170
- Usa DuckDuckGo (vía duckduckgo_search) para devolver hasta 'num_results' resultados.
171
- """
172
- for i in range(max_attempts):
173
- try:
174
- with DDGS(headers=HEADERS, timeout=25) as ddgs:
175
- results = list(ddgs.text(query, region='es-es', safesearch='moderate', max_results=num_results))
176
- if results:
177
- salida = []
178
- for idx, r in enumerate(results):
179
- titulo = r.get('title', 'Sin título')
180
- enlace = r.get('href', 'N/A')
181
- cuerpo = r.get('body', '')
182
- salida.append(f"Fuente {idx+1}: Título: {titulo}\nEnlace: {enlace}\nCuerpo: {cuerpo}")
183
- return "\n\n".join(salida)
184
- return "No se encontraron resultados relevantes."
185
- except Exception as e:
186
- if i < max_attempts - 1:
187
- time.sleep(2 * (i + 1))
188
- else:
189
- return f"Error buscar_web tras {max_attempts} intentos: {e}"
190
 
191
  def reverse_text(text: str) -> str:
192
  """Invierte el orden de los caracteres en 'text'."""
@@ -297,10 +315,10 @@ def classify_botanical(items_list_str: str) -> str:
297
  others_sorted = sorted(set(others))
298
 
299
  return (
300
- f"Verduras: {', '.join(vegs_sorted)}\n"
301
- f"Frutas: {', '.join(fruits_sorted)}\n"
302
- f"Otros: {', '.join(others_sorted)}"
303
- )
304
 
305
  def scrape_wikipedia_table(page_title: str, section: str, table_index: int = 0) -> str:
306
  """
@@ -308,7 +326,7 @@ def scrape_wikipedia_table(page_title: str, section: str, table_index: int = 0)
308
  Devuelve el CSV.
309
  """
310
  try:
311
- wikipedia.set_lang("es")
312
  page = wikipedia.page(page_title, auto_suggest=False)
313
  soup = BeautifulSoup(page.html(), 'html.parser')
314
  header = next(
@@ -336,8 +354,9 @@ def scrape_wikipedia_table(page_title: str, section: str, table_index: int = 0)
336
  search_tool = FunctionTool.from_defaults(
337
  fn=buscar_web,
338
  name="web_search",
339
- description="Búsqueda DuckDuckGo (máximo 5 resultados)."
340
  )
 
341
  reverse_tool = FunctionTool.from_defaults(
342
  fn=reverse_text,
343
  name="reverse_text",
@@ -405,7 +424,7 @@ tool_descriptions = "\n".join([
405
  # -------------------------------------------------------------------
406
  # 6) PROMPT DE SISTEMA MEJORADO with few-shot examples
407
  # -------------------------------------------------------------------
408
- system_prompt = f"""
409
  Eres Alfred, un agente ReAct eficiente y preciso. Tu objetivo es responder correctamente usando las herramientas disponibles.
410
  A continuación tienes ejemplos de cómo usar cada herramienta:
411
 
@@ -452,6 +471,65 @@ Herramientas disponibles (USAR EXÁCTAMENTE estos nombres):
452
  {tool_descriptions}
453
  """
454
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  # -------------------------------------------------------------------
456
  # 7) INICIALIZAR EL AGENTE ReActAgent
457
  # -------------------------------------------------------------------
@@ -472,7 +550,11 @@ def basic_agent_response(question: str) -> str:
472
  """
473
  try:
474
  if "attached excel" in question.lower() or "archivo excel" in question.lower():
475
- return read_excel_data("data/attached.xlsx")
 
 
 
 
476
  resp = alfred_agent.query(question)
477
  if hasattr(resp, 'response') and resp.response is not None:
478
  return str(resp.response)
@@ -481,3 +563,4 @@ def basic_agent_response(question: str) -> str:
481
  return "No se generó una respuesta válida."
482
  except Exception as e:
483
  return f"Error crítico del agente: {e}"
 
 
18
  from llama_index.core.agent import ReActAgent
19
  from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
20
 
21
+ from langchain_community.retrievers import TavilySearchAPIRetriever
22
+ from llama_index.core.schema import Document
23
+ from app.utils.search_utils import search_cache, add_documents_with_filtered_metadata
24
+
25
+
26
  # -------------------------------------------------------------------
27
  # 1) MONKEY-PATCH PARA ChatMessage (por requerimiento de LlamaIndex)
28
  # -------------------------------------------------------------------
 
168
  # -------------------------------------------------------------------
169
  # 3) HERRAMIENTAS PERSONALIZADAS
170
  # -------------------------------------------------------------------
171
+ def retrieve_with_fallback(query):
172
+ cached_results = search_cache.search(query)
173
+
174
+ if cached_results:
175
+ print(f"✅ Retrieved {len(cached_results)} documents from cache")
176
+ return format_docs(cached_results)
177
+
178
+ print("🔍 No cache hit, performing web search")
179
+
180
+ retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=5)
181
+ search_results = retriever.invoke(query)
182
+
183
+ docs = []
184
+ for result in search_results:
185
+ if isinstance(result, Document):
186
+ docs.append(result)
187
+ else:
188
+ docs.append(Document(page_content=result))
189
+
190
+ add_documents_with_filtered_metadata(search_cache, docs)
191
+
192
+ return format_docs(docs)
193
+
194
+
195
+
196
  HEADERS = {'User-Agent': 'Mozilla/5.0'}
197
 
198
+ def buscar_web(query, num_results=5):
199
+ retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=num_results)
200
+ results = retriever.invoke(query)
201
+ # Formatear resultados para pasarlos al LLM
202
+ formatted_results = []
203
+ for i, doc in enumerate(results):
204
+ formatted_results.append(
205
+ f"Result {i+1}:\nTitle: {doc.metadata.get('title','')}\nSource: {doc.metadata.get('source','')}\nContent: {doc.page_content}\n"
206
+ )
207
+ return "\n\n".join(formatted_results)
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  def reverse_text(text: str) -> str:
210
  """Invierte el orden de los caracteres en 'text'."""
 
315
  others_sorted = sorted(set(others))
316
 
317
  return (
318
+ f"Vegetables: {', '.join(vegs_sorted)}\n"
319
+ f"Fruits: {', '.join(fruits_sorted)}\n"
320
+ f"Others: {', '.join(others_sorted)}"
321
+ )
322
 
323
  def scrape_wikipedia_table(page_title: str, section: str, table_index: int = 0) -> str:
324
  """
 
326
  Devuelve el CSV.
327
  """
328
  try:
329
+ wikipedia.set_lang("en")
330
  page = wikipedia.page(page_title, auto_suggest=False)
331
  soup = BeautifulSoup(page.html(), 'html.parser')
332
  header = next(
 
354
  search_tool = FunctionTool.from_defaults(
355
  fn=buscar_web,
356
  name="web_search",
357
+ description="Searches the web using TavilySearch API."
358
  )
359
+
360
  reverse_tool = FunctionTool.from_defaults(
361
  fn=reverse_text,
362
  name="reverse_text",
 
424
  # -------------------------------------------------------------------
425
  # 6) PROMPT DE SISTEMA MEJORADO with few-shot examples
426
  # -------------------------------------------------------------------
427
+ system_prompt_deprecated = f"""
428
  Eres Alfred, un agente ReAct eficiente y preciso. Tu objetivo es responder correctamente usando las herramientas disponibles.
429
  A continuación tienes ejemplos de cómo usar cada herramienta:
430
 
 
471
  {tool_descriptions}
472
  """
473
 
474
+ system_prompt = f"""
475
+ You are Alfred, a ReAct agent. Your goal is to answer correctly using the available tools.
476
+
477
+ Strict guidelines:
478
+
479
+ 1️. ALWAYS use the available tools first if the question requires information you cannot deduce internally.
480
+ 2️. When a tool is used, ONLY answer based on the tool output. DO NOT add or invent any content not explicitly present in the tool output.
481
+ 3️. If a tool fails, you may explain the failure clearly. DO NOT fabricate the answer.
482
+ 4️. If no tool can help and you don't know, say "I cannot answer with the available tools."
483
+
484
+ Flow:
485
+
486
+ - **READ the question carefully.**
487
+ - **SELECT the most appropriate tool:**
488
+ - `classify_botanical_foods` → grocery list, vegetables, fruits
489
+ - `read_excel_data` → Excel or attached Excel
490
+ - `scrape_wiki_table` → Wikipedia, featured articles, tables
491
+ - `analyze_markdown_table` → Markdown table, commutativity
492
+ - `reverse_text` → reverse text
493
+ - `execute_code` → math, code
494
+ - `web_search` → all other general questions
495
+ - **CALL the tool → COPY its output EXACTLY**
496
+ - **When answering, ONLY use the tool output. DO NOT add any interpretation unless the tool explicitly asked you to process it.**
497
+
498
+ Few-shot examples:
499
+
500
+ ### Example: classify_botanical_foods
501
+ User: "milk, eggs, broccoli, celery, lettuce"
502
+ Agent:
503
+ {{
504
+ "tool": "classify_botanical_foods",
505
+ "input": "milk, eggs, broccoli, celery, lettuce"
506
+ }}
507
+ Observation: Verduras: broccoli, celery, lettuce
508
+ Frutas:
509
+ Otros: eggs, milk
510
+ Final Answer: "broccoli, celery, lettuce"
511
+
512
+ ### Example: analyze_markdown_table
513
+ User: "Check commutativity"
514
+ Agent:
515
+ {{
516
+ "tool": "analyze_markdown_table",
517
+ "input": "|A|B|C|\\n|---|---|---|\\n|A|A|B|C|..."
518
+ }}
519
+ Observation: a, b
520
+ Final Answer: "a, b"
521
+
522
+ ---
523
+
524
+ ONLY respond following this flow. DO NOT answer using your internal knowledge if a tool is required and available.
525
+ If unsure, default to using the most appropriate tool first.
526
+
527
+ Available tools:
528
+
529
+ {tool_descriptions}
530
+ """
531
+
532
+
533
  # -------------------------------------------------------------------
534
  # 7) INICIALIZAR EL AGENTE ReActAgent
535
  # -------------------------------------------------------------------
 
550
  """
551
  try:
552
  if "attached excel" in question.lower() or "archivo excel" in question.lower():
553
+ excel_result = read_excel_data("data/attached.xlsx")
554
+ if "Error" in excel_result:
555
+ return "The Excel file is not available."
556
+ return excel_result
557
+
558
  resp = alfred_agent.query(question)
559
  if hasattr(resp, 'response') and resp.response is not None:
560
  return str(resp.response)
 
563
  return "No se generó una respuesta válida."
564
  except Exception as e:
565
  return f"Error crítico del agente: {e}"
566
+