Cmuroc27 commited on
Commit
fc7df56
·
1 Parent(s): 5d35885

funcion read mejorada

Browse files
Files changed (3) hide show
  1. agents.py +18 -23
  2. requirements.txt +2 -1
  3. tools.py +80 -15
agents.py CHANGED
@@ -2,7 +2,7 @@ import asyncio
2
  import os
3
  from llama_index.core.agent.workflow import AgentWorkflow
4
  from llama_index.llms.openai import OpenAI
5
- from tools import (image_analyzer_tool, youtube_transcript_tool, calculator_tool, read_document_tool, search_tool)
6
  from llama_index.core.workflow import (Workflow, Context, step, StartEvent, StopEvent, Event)
7
  from llama_index.core.agent import ReActAgent
8
  from dotenv import load_dotenv
@@ -19,33 +19,28 @@ class AlfredAdvancedWorkflow(AgentWorkflow):
19
 
20
  description="Main agent that routes to the correct tool based on question type.",
21
 
22
- system_prompt="""You are Alfred, a precise assistant for the GAIA benchmark.
23
 
24
- CRITICAL RULES:
25
- 1. Your FINAL answer must be EXTREMELY concise - typically just a number, name, date, or single sentence.
26
- 2. Available tools:
27
- - web_search: For factual questions requiring web search (people, places, current events, papers)
28
- - read_document: For files (.xlsx, .pdf, .csv, .txt, .docx, .json) and audio files (.mp3, .wav, .m4a, .flac, .ogg)
29
- - youtube_transcript: For YouTube URLs
30
- - analyze_image: For images (.jpg, .png, .gif, .webp)
31
- - calculator: For mathematical calculations
 
 
32
 
33
- 3. MULTI-STEP STRATEGY:
34
- - Use tools AS MANY TIMES as needed to get the complete answer
35
- - For complex questions: read file → extract info → calculate → verify
36
- - Don't stop after first tool call if more information is needed
37
 
38
- 4. ANSWER FORMAT:
39
- - Numbers: Return exact value (e.g., "42" not "approximately 42")
40
- - Dates: Use format in question (e.g., "January 15, 2024" or "2024-01-15")
41
- - Names: Full name as it appears (e.g., "Marie Curie")
42
- - Lists: Use commas (e.g., "Paris, London, Berlin")
43
 
44
- 5. If tool fails: Try alternative approach or return "Unable to determine"
45
 
46
- REMEMBER: Extract the EXACT answer. No explanations, no context, no preamble.""",
47
-
48
- tools=[search_tool, read_document_tool, image_analyzer_tool, youtube_transcript_tool, calculator_tool],
49
  llm=llm,
50
  verbose=True,
51
  max_iterations=15)
 
2
  import os
3
  from llama_index.core.agent.workflow import AgentWorkflow
4
  from llama_index.llms.openai import OpenAI
5
+ from tools import (image_analyzer_tool,python_tool, youtube_transcript_tool, calculator_tool, read_document_tool, search_tool)
6
  from llama_index.core.workflow import (Workflow, Context, step, StartEvent, StopEvent, Event)
7
  from llama_index.core.agent import ReActAgent
8
  from dotenv import load_dotenv
 
19
 
20
  description="Main agent that routes to the correct tool based on question type.",
21
 
22
+ system_prompt="""You are Alfred, an elite autonomous agent for the GAIA benchmark.
23
 
24
+ STRATEGY FOR SUCCESS:
25
+ 1. **PYTHON FILES (.py)**: If asked about a python file output:
26
+ - First, use `read_document` to see the code.
27
+ - Second, use `python_interpreter` to execute that exact code and get the answer.
28
+
29
+ 2. **COMPLEX SEARCH**: For questions like "Pitcher before X...", do not give up.
30
+ - Step 1: Find the specific team/roster and year mentioned.
31
+ - Step 2: Find the list/order of players.
32
+ - Step 3: Extract the names.
33
+ - If `web_search` returns generic info, refine the query (e.g., "Taishō Tamai roster 2023 numbers").
34
 
35
+ 3. **FILES**: Always trust the filename provided in the prompt. If `read_document` fails initially, check the extension.
 
 
 
36
 
37
+ 4. **FINAL ANSWER**:
38
+ - Must be concise.
39
+ - No preamble like "The answer is". Just the value.
 
 
40
 
41
+ REMEMBER: Extract the EXACT answer. No explanations, no context, no PREAMBLES.""",
42
 
43
+ tools=[search_tool,python_tool, read_document_tool, image_analyzer_tool, youtube_transcript_tool, calculator_tool],
 
 
44
  llm=llm,
45
  verbose=True,
46
  max_iterations=15)
requirements.txt CHANGED
@@ -17,4 +17,5 @@ whisper
17
  pydub
18
  python-docx
19
  pillow
20
- pandas
 
 
17
  pydub
18
  python-docx
19
  pillow
20
+ pandas
21
+ openpyxl
tools.py CHANGED
@@ -23,6 +23,32 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
23
  TAVILY = os.getenv("TAVILY_KEY")
24
  client = InferenceClient(HF_TOKEN)
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def transcribe_audio_openai(audio_path: str) -> str:
27
  """Transcribe audio using OpenAI Whisper API - compatible with Spaces"""
28
  try:
@@ -75,25 +101,58 @@ def get_youtube_transcript(video_url: str) -> str:
75
 
76
 
77
 
78
- def read_document(file_path: str) -> str:
79
  """
80
- file_spec can be:
81
- - a local file path (e.g., "data.xlsx")
82
- - a task_id (e.g., "abc123") to download from GAIA
 
83
  """
84
- name = "read_file"
85
- description = "Reads a file and returns its content"
86
- inputs = {
87
- "file_path": {"type": "string", "description": "Path to the file to read"},
88
- }
89
- output_type = "string"
90
-
91
- def forward(self, file_path: str) -> str:
 
 
 
 
92
  try:
93
- with open(file_path, "r") as file:
94
- return file.read()
 
 
 
 
 
95
  except Exception as e:
96
- return f"Error reading file: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
 
99
 
@@ -169,6 +228,12 @@ async def search_web(query: str) -> str:
169
 
170
 
171
  # Definimos las tool
 
 
 
 
 
 
172
  image_analyzer_tool = FunctionTool.from_defaults(
173
  fn = analyze_image,
174
  name = "analyze_image",
 
23
  TAVILY = os.getenv("TAVILY_KEY")
24
  client = InferenceClient(HF_TOKEN)
25
 
26
+ def python_interpreter(code: str) -> str:
27
+ """
28
+ Execute Python code and return the output (stdout).
29
+ Use this when you need to perform complex calculations, string manipulations,
30
+ or when asked to determine the output of a python script.
31
+ WARNING: The code will be executed in the current environment.
32
+ """
33
+ # Crear un buffer para capturar el print()
34
+ buffer = io.StringIO()
35
+
36
+ try:
37
+ # Redirigir stdout a nuestro buffer
38
+ with contextlib.redirect_stdout(buffer):
39
+ # Crear un diccionario local para las variables
40
+ local_scope = {}
41
+ exec(code, {}, local_scope)
42
+
43
+ output = buffer.getvalue()
44
+ if not output and local_scope:
45
+ # Si no hubo prints, devolvemos las variables finales
46
+ return f"Code executed successfully. Variables: {local_scope}"
47
+ return output if output else "Code executed. No output produced."
48
+
49
+ except Exception as e:
50
+ return f"Error executing code: {str(e)}"
51
+
52
  def transcribe_audio_openai(audio_path: str) -> str:
53
  """Transcribe audio using OpenAI Whisper API - compatible with Spaces"""
54
  try:
 
101
 
102
 
103
 
104
+ def read_document(file_name: str) -> str:
105
  """
106
+ Downloads a file from the GAIA source if not present.
107
+ - If it's a text file (.txt, .py, .csv, .json, .md), returns the content.
108
+ - If it's a binary file (.xlsx, .pdf, .png, .mp3), returns the file path
109
+ and instructions to use other tools (like python_interpreter or analyze_image).
110
  """
111
+ import os
112
+ import requests
113
+
114
+ # Limpiar el nombre del archivo (a veces el LLM alucina rutas)
115
+ file_name = os.path.basename(file_name)
116
+
117
+ # URL oficial de validación de GAIA (Donde viven los archivos reales)
118
+ base_url = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation"
119
+ file_url = f"{base_url}/{file_name}"
120
+
121
+ # 1. Descarga si no existe
122
+ if not os.path.exists(file_name):
123
  try:
124
+ print(f"📥 Downloading {file_name} from {file_url}...")
125
+ response = requests.get(file_url)
126
+ if response.status_code == 200:
127
+ with open(file_name, "wb") as f:
128
+ f.write(response.content)
129
+ else:
130
+ return f"Error: File {file_name} not found in GAIA source (Status {response.status_code})."
131
  except Exception as e:
132
+ return f"Error downloading file: {str(e)}"
133
+
134
+ # 2. Decidir cómo leerlo según la extensión
135
+ _, ext = os.path.splitext(file_name)
136
+ ext = ext.lower()
137
+
138
+ # Lista de archivos que NO se deben leer como texto plano
139
+ binary_extensions = ['.xlsx', '.xls', '.png', '.jpg', '.jpeg', '.mp3', '.wav', '.pdf', '.zip']
140
+
141
+ if ext in binary_extensions:
142
+ return (f"File '{file_name}' has been downloaded and saved locally. "
143
+ f"It is a binary file ({ext}). DO NOT read it as text. "
144
+ f"Use 'python_interpreter' (with pandas for excel) or 'analyze_image' to process it.")
145
+
146
+ # 3. Si es texto, leerlo y devolver el contenido
147
+ try:
148
+ with open(file_name, "r", encoding='utf-8', errors='ignore') as file:
149
+ content = file.read()
150
+ # Truncar si es demasiado largo para evitar errores de contexto
151
+ if len(content) > 10000:
152
+ return content[:10000] + "\n...[Content Truncated]..."
153
+ return content
154
+ except Exception as e:
155
+ return f"Error reading text file: {str(e)}"
156
 
157
 
158
 
 
228
 
229
 
230
  # Definimos las tool
231
+ python_tool = FunctionTool.from_defaults(
232
+ fn=python_interpreter,
233
+ name="python_interpreter",
234
+ description="Executes Python code. Use this to run code found in files or to perform complex logic."
235
+ )
236
+
237
  image_analyzer_tool = FunctionTool.from_defaults(
238
  fn = analyze_image,
239
  name = "analyze_image",