Csuarezg commited on
Commit
df3c43a
ยท
verified ยท
1 Parent(s): c17fa18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +465 -214
app.py CHANGED
@@ -2,273 +2,524 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
  import re
6
- import pytesseract
7
- import yt_dlp
8
- import cv2
9
  import numpy as np
10
- import speech_recognition as sr
11
- from PIL import Image
12
- from typing import List, Dict
13
- from langchain_core.tools import tool
14
  from langchain_openai import ChatOpenAI
 
15
  from langchain_community.tools.tavily_search import TavilySearchResults
16
- from youtube_transcript_api import YouTubeTranscriptApi
17
- from langgraph.graph import StateGraph, END
 
 
18
  from langgraph.checkpoint.memory import MemorySaver
19
- from langchain_core.messages import HumanMessage, SystemMessage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- # ================ CONSTANTES ================
22
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
23
- SYSTEM_PROMPT = SYSTEM_PROMPT = """You are a precision research assistant for the GAIA benchmark. Your mission is EXTREME ACCURACY.
 
 
 
24
  CRITICAL ANSWER FORMAT RULES:
25
- # - ALWAYS end with: FINAL ANSWER: [answer]
26
- # - READ THE QUESTION CAREFULLY - answer EXACTLY what is asked for, nothing more, nothing less
 
27
  SPECIFIC FORMATTING BY QUESTION TYPE:
28
- # - Numbers: ONLY the number, no units, no text
29
- # Example: "FINAL ANSWER: 2" NOT "FINAL ANSWER: 2 albums"
30
- # - First name only: ONLY the first name
31
- # Example: If person is "John Smith" โ†’ "FINAL ANSWER: John"
32
- # - Country codes, IOC codes, abbreviations, symbols: ONLY the code/abbreviation, no country name or brackets
33
- # Example: If asked for IOC country code โ†’ "FINAL ANSWER: PHI" NOT "FINAL ANSWER: PHILIPPINES [PHI]"
34
- # - When asked for a specific type of identifier (code, abbreviation, symbol):
35
- # Give ONLY that identifier, strip all explanatory text, brackets, or full names
36
- # - Lists/Sets: Exactly as requested format
37
- # Example: "FINAL ANSWER: a, b, d, e" (comma-separated, alphabetical order)
38
  CRITICAL TOOL SELECTION:
39
- # - Wikipedia questions โ†’ wikipedia_tool ONLY
40
- # - File questions โ†’ file_analyzer_tool FIRST to inspect contents, then reason based on structure
41
- # - Current events โ†’ web_search_tool ONLY
42
- # - Mathematical analysis/calculations โ†’ wolfram_alpha_tool or python_repl_tool ONLY
43
- # - Tables, matrices, systematic checking โ†’ python_repl_tool ONLY
 
44
  FOR MATHEMATICAL PROBLEMS:
45
- # ALWAYS use python_repl_tool when:
46
- # - Analyzing mathematical tables or matrices
47
- # - Checking properties like commutativity, associativity
48
- # - Systematic verification of mathematical statements
49
- # - Complex calculations that need precision
50
- # - ANY problem involving tables, sets, or systematic checking
51
- MATHEMATICAL ANALYSIS PROCESS:
52
- # 1. Use python_repl_tool to parse data systematically
53
- # 2. Write code to check ALL cases (don't rely on manual inspection)
54
- # 3. Collect results programmatically
55
- # 4. Verify your logic with multiple approaches
56
- # 5. Format answer exactly as requested
57
- # Example for commutativity checking:
58
- # - Parse the operation table into a data structure
59
- # - Check ALL pairs (x,y) to see if x*y = y*x
60
- # - Collect ALL elements involved in ANY counter-example
61
- # - Return in requested format (e.g., comma-separated, alphabetical)
62
  FILE HANDLING:
63
- # - You HAVE the ability to read and analyze uploaded files
64
- # - ALWAYS use file_analyzer_tool when questions mention files
65
- # - The tool automatically finds and analyzes Excel, CSV, images, and audio files
66
- # - For Excel/CSV: Returns columns, data types, sample rows, and numeric totals
67
- # - NEVER say "I can't access files" - you CAN access them via file_analyzer_tool
68
- # - Example: "The attached Excel file..." โ†’ Use file_analyzer_tool immediately
69
- SPECIAL CASES TO HANDLE:
70
- # - If the question appears reversed or encoded, decode it first.
71
- # - If the question includes an instruction (e.g., "write the opposite of..."), follow the instruction precisely.
72
- # - DO NOT repeat or paraphrase the question in your answer.
73
- # - NEVER answer with the full sentence unless explicitly asked to.
74
- # - If the decoded question asks for a word, give ONLY the word, in the required format.
75
  REASONING PROCESS:
76
- # 1. Carefully read what the question is asking for
77
- # 2. Identify if it needs systematic/mathematical analysis
78
- # 3. Use appropriate tool (python_repl_tool for math problems)
79
- # 4. Extract ONLY the specific part requested
80
- # 5. Format according to the rules above
81
- # 6. For file questions:
82
- # a. First use file_analyzer_tool to inspect column names, types, and sample data
83
- # b. Identify relevant columns based on the question
84
- # c. Reason using the data (e.g., by counting, filtering, or identifying patterns)
85
- # d. Only use python_repl_tool if additional computation is necessary
86
- # 7. If the Wikipedia tool is used but fails to provide an answer (no relevant entry or content), automatically attempt a web search using the same query or a refined version of it
87
  """
88
- USERNAME = "Csuarezg"
89
- AGENT_CODE = "gaia_agent_v1"
90
 
91
- # ================ HERRAMIENTAS ================
92
- @tool
93
- def wikipedia_tool(query: str) -> str:
94
- """Busca informaciรณn enciclopรฉdica en Wikipedia. รštil para datos histรณricos, biografรญas y conceptos cientรญficos.
95
-
96
- Args:
97
- query: Tรฉrmino de bรบsqueda especรญfico (ej. 'Teorรญa de la relatividad')
98
 
99
- Returns:
100
- Resumen conciso del tema en 3 oraciones.
101
- """
102
- try:
103
- import wikipedia
104
- wikipedia.set_lang("en")
105
- return wikipedia.summary(query, sentences=3)
106
- except Exception as e:
107
- return f"Error Wikipedia: {str(e)}"
108
-
109
- @tool
110
- def youtube_transcript_tool(url: str) -> str:
111
- """Obtiene el transcript de videos de YouTube. รštil para analizar diรกlogos o contenido hablado.
112
-
113
- Args:
114
- url: Enlace completo del video (ej. 'https://youtu.be/VIDEO_ID')
115
 
116
- Returns:
117
- Primera parte del transcript (primeros 30 segundos).
118
- """
119
- try:
120
- video_id = re.findall(r'(?:v=|\/)([0-9A-Za-z_-]{11})', url)[0]
121
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
122
- return " ".join([entry['text'] for entry in transcript[:5]])
123
- except Exception as e:
124
- return f"Error transcript: {str(e)}"
125
-
126
- @tool
127
- def file_analyzer_tool(file_path: str) -> str:
128
- """Analiza archivos (imรกgenes, audio) usando OCR y visiรณn por computadora.
129
-
130
- Args:
131
- file_path: Ruta al archivo en el sistema
132
 
133
- Returns:
134
- Texto extraรญdo o anรกlisis de contenido multimedia.
135
- """
136
- try:
137
- if file_path.endswith(('.png', '.jpg', '.jpeg')):
138
- img = Image.open(file_path)
139
- text = pytesseract.image_to_string(img)
140
- return f"Texto detectado: {text[:500]}..." if text else "Sin texto"
141
- return "Formato no soportado"
142
- except Exception as e:
143
- return f"Error anรกlisis archivo: {str(e)}"
144
-
145
- @tool
146
- def web_search_tool(query: str) -> str:
147
- """Realiza bรบsquedas web en tiempo real. รštil para informaciรณn actualizada.
148
-
149
- Args:
150
- query: Tรฉrmino de bรบsqueda con contexto
151
 
152
- Returns:
153
- 3 resultados relevantes con fuentes.
154
- """
155
- try:
156
- tavily = TavilySearchResults(api_key=os.getenv("TAVILY_API_KEY"), max_results=3)
157
- results = tavily.invoke(query)
158
- return "\n".join([f"{res['title']}: {res['content']}" for res in results])
159
- except Exception as e:
160
- return f"Error bรบsqueda: {str(e)}"
 
 
 
 
 
 
 
 
 
161
 
162
- # ================ AGENTE PRINCIPAL ================
163
- class GaiaAgent:
164
- def __init__(self):
165
- self.tools = [wikipedia_tool, youtube_transcript_tool, file_analyzer_tool, web_search_tool]
166
- self.llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
167
- self.workflow = self._build_workflow()
168
- self.recognizer = sr.Recognizer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
- def _build_workflow(self):
171
- workflow = StateGraph(AgentState)
 
 
 
 
172
 
173
  def agent_node(state):
174
- messages = [SystemMessage(content=SYSTEM_PROMPT)] + state['messages']
175
- response = self.llm.bind_tools(self.tools).invoke(messages)
 
 
 
176
  return {"messages": [response]}
177
 
178
- workflow.add_node("agent", agent_node)
179
- workflow.add_node("tools", ToolNode(self.tools))
180
 
181
- workflow.set_entry_point("agent")
182
- workflow.add_conditional_edges(
183
- "agent",
184
- lambda x: "tools" if x["messages"][-1].tool_calls else END
185
- )
186
- workflow.add_edge("tools", "agent")
 
187
 
188
- return workflow.compile()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  def __call__(self, question: str) -> str:
 
 
 
191
  try:
192
- response = self.workflow.invoke(
 
 
 
 
 
 
193
  {"messages": [HumanMessage(content=question)]},
194
- {"configurable": {"thread_id": "main_thread"}}
 
195
  )
196
- return self._extract_final_answer(response['messages'][-1].content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  except Exception as e:
 
 
 
198
  return f"Error: {str(e)}"
199
 
200
- def _extract_final_answer(self, text: str) -> str:
201
- match = re.search(r"FINAL ANSWER:\s*(.*)", text, re.IGNORECASE)
202
- return match.group(1).strip() if match else text
203
-
204
- # ================ Lร“GICA DE EJECUCIร“N ================
205
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
206
  space_id = os.getenv("SPACE_ID")
207
 
208
- if not profile:
209
- return "Por favor inicia sesiรณn primero", None
 
 
 
 
 
 
 
 
210
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  try:
212
- agent = GaiaAgent()
213
- questions_url = f"{DEFAULT_API_URL}/questions"
214
  response = requests.get(questions_url, timeout=15)
215
  response.raise_for_status()
216
  questions_data = response.json()
217
-
218
- answers = []
219
- results_log = []
220
- for item in questions_data:
221
- task_id = item.get("task_id")
222
- question_text = item.get("question")
223
- if not task_id or not question_text:
224
- continue
225
- try:
226
- answer = agent(question_text)
227
- answers.append({"task_id": task_id, "submitted_answer": answer})
228
- results_log.append({"Task ID": task_id, "Question": question_text, "Answer": answer})
229
- except Exception as e:
230
- results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"Error: {str(e)}"})
231
 
232
- submission_data = {
233
- "username": USERNAME,
234
- "agent_code": AGENT_CODE,
235
- "answers": answers
236
- }
 
 
 
237
 
238
- submit_response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
239
- submit_response.raise_for_status()
 
 
 
240
 
241
- result = submit_response.json()
242
- status = (
243
- f"ยกEnvรญo exitoso!\n"
244
- f"Usuario: {result.get('username', '')}\n"
245
- f"Puntaje: {result.get('score', 0)}%\n"
246
- f"Mensaje: {result.get('message', '')}"
247
- )
248
- return status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  except Exception as e:
251
- return f"Error crรญtico: {str(e)}", pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
- # ================ INTERFAZ GRADIO ================
254
- with gr.Blocks() as demo:
255
- gr.Markdown("# GAIA Agent - Evaluaciรณn Completa")
256
- gr.Markdown("""
257
- **Instrucciones:**
258
- 1. Inicia sesiรณn con tu cuenta de Hugging Face
259
- 2. Haz clic en 'Ejecutar Evaluaciรณn'
260
- 3. Espera los resultados (puede tomar varios minutos)
261
- """)
262
-
263
  gr.LoginButton()
264
- run_btn = gr.Button("Ejecutar Evaluaciรณn", variant="primary")
265
- status_output = gr.Textbox(label="Estado", interactive=False)
266
- results_table = gr.DataFrame(label="Resultados Detallados", wrap=True)
267
 
268
- run_btn.click(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  fn=run_and_submit_all,
270
  outputs=[status_output, results_table]
271
  )
272
 
273
  if __name__ == "__main__":
274
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import json
6
  import re
7
+ import tempfile
8
+ import logging
9
+ from typing import List, Dict, Optional
10
  import numpy as np
11
+
12
+ # Core ML/AI imports
13
+ from langchain_core.messages import HumanMessage, SystemMessage
 
14
  from langchain_openai import ChatOpenAI
15
+ from langchain_core.tools import tool
16
  from langchain_community.tools.tavily_search import TavilySearchResults
17
+ from langchain_experimental.tools import PythonREPLTool
18
+ from langgraph.graph import StateGraph, START, END
19
+ from langgraph.graph.message import add_messages
20
+ from langgraph.prebuilt import ToolNode, tools_condition
21
  from langgraph.checkpoint.memory import MemorySaver
22
+ from typing import TypedDict, Annotated, List as ListType
23
+
24
+ # File processing
25
+ import pandas as pd
26
+ import wikipedia
27
+ from youtube_transcript_api import YouTubeTranscriptApi
28
+ import speech_recognition as sr
29
+
30
+ # Computer vision (will be downloaded at runtime)
31
+ try:
32
+ from ultralytics import YOLO
33
+ import cv2
34
+ import yt_dlp
35
+ VISION_AVAILABLE = True
36
+ except ImportError:
37
+ VISION_AVAILABLE = False
38
+ print("โš ๏ธ Vision libraries not available, will skip vision tasks")
39
+
40
+ # OCR (optional)
41
+ try:
42
+ import pytesseract
43
+ from PIL import Image
44
+ OCR_AVAILABLE = True
45
+ except ImportError:
46
+ OCR_AVAILABLE = False
47
+
48
+ # Silence verbose logging
49
+ os.environ['ULTRALYTICS_VERBOSE'] = 'false'
50
+ os.environ['YOLO_VERBOSE'] = 'false'
51
+ logging.getLogger("ultralytics").setLevel(logging.ERROR)
52
 
53
+ # --- Constants ---
54
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
55
+
56
+ # System prompt for the agent
57
+ SYSTEM_PROMPT = """You are a precision research assistant for the GAIA benchmark. Your mission is EXTREME ACCURACY.
58
+
59
  CRITICAL ANSWER FORMAT RULES:
60
+ - ALWAYS end with: FINAL ANSWER: [answer]
61
+ - READ THE QUESTION CAREFULLY - answer EXACTLY what is asked for, nothing more, nothing less
62
+
63
  SPECIFIC FORMATTING BY QUESTION TYPE:
64
+ - Numbers: ONLY the number, no units, no text
65
+ Example: "FINAL ANSWER: 2" NOT "FINAL ANSWER: 2 albums"
66
+ - First name only: ONLY the first name
67
+ Example: If person is "John Smith" โ†’ "FINAL ANSWER: John"
68
+ - Country codes, IOC codes, abbreviations, symbols: ONLY the code/abbreviation, no country name or brackets
69
+ Example: If asked for IOC country code โ†’ "FINAL ANSWER: PHI" NOT "FINAL ANSWER: PHILIPPINES [PHI]"
70
+ - Lists/Sets: Exactly as requested format
71
+ Example: "FINAL ANSWER: a, b, d, e" (comma-separated, alphabetical order)
72
+
 
73
  CRITICAL TOOL SELECTION:
74
+ - Wikipedia questions โ†’ wikipedia_tool ONLY
75
+ - File questions โ†’ file_analyzer_tool FIRST to inspect contents, then reason based on structure
76
+ - Current events โ†’ web_search_tool ONLY
77
+ - Mathematical analysis/calculations โ†’ wolfram_alpha_tool or python_repl_tool ONLY
78
+ - Tables, matrices, systematic checking โ†’ python_repl_tool ONLY
79
+
80
  FOR MATHEMATICAL PROBLEMS:
81
+ ALWAYS use python_repl_tool when:
82
+ - Analyzing mathematical tables or matrices
83
+ - Checking properties like commutativity, associativity
84
+ - Systematic verification of mathematical statements
85
+ - Complex calculations that need precision
86
+ - ANY problem involving tables, sets, or systematic checking
87
+
 
 
 
 
 
 
 
 
 
 
88
  FILE HANDLING:
89
+ - You HAVE the ability to read and analyze uploaded files
90
+ - ALWAYS use file_analyzer_tool when questions mention files
91
+ - The tool automatically finds and analyzes Excel, CSV, images, and audio files
92
+ - For Excel/CSV: Returns columns, data types, sample rows, and numeric totals
93
+ - NEVER say "I can't access files" - you CAN access them via file_analyzer_tool
94
+ - Example: "The attached Excel file..." โ†’ Use file_analyzer_tool immediately
95
+
 
 
 
 
 
96
  REASONING PROCESS:
97
+ 1. Carefully read what the question is asking for
98
+ 2. Identify if it needs systematic/mathematical analysis
99
+ 3. Use appropriate tool (python_repl_tool for math problems)
100
+ 4. Extract ONLY the specific part requested
101
+ 5. Format according to the rules above
 
 
 
 
 
 
102
  """
 
 
103
 
104
+ class GAIAAgent:
105
+ def __init__(self):
106
+ print("๐Ÿš€ Initializing GAIA Agent...")
 
 
 
 
107
 
108
+ # API Keys from HF Secrets
109
+ self.openai_api_key = os.getenv("OPENAI_API_KEY")
110
+ self.tavily_api_key = os.getenv("TAVILY_API_KEY")
111
+ self.wolfram_api_key = os.getenv("WOLFRAM_API_KEY")
112
+ self.hf_token = os.getenv("HUGGING_FACE_API_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ if not self.openai_api_key:
115
+ raise ValueError("OPENAI_API_KEY not found in environment variables")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # Initialize LLM
118
+ self.llm = ChatOpenAI(model="gpt-4-turbo", temperature=0.0, api_key=self.openai_api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ # Download and initialize YOLO model if vision is available
121
+ self.yolo_model = None
122
+ if VISION_AVAILABLE:
123
+ try:
124
+ print("๐Ÿ“ฆ Downloading YOLO model...")
125
+ self.yolo_model = YOLO("yolov8x.pt")
126
+ print("โœ… YOLO model ready")
127
+ except Exception as e:
128
+ print(f"โš ๏ธ YOLO model failed to load: {e}")
129
+ self.yolo_model = None
130
+
131
+ # Setup tools
132
+ self.tools = self._setup_tools()
133
+
134
+ # Create agent runner
135
+ self.agent_runner = self._create_agent_runner()
136
+
137
+ print("โœ… GAIA Agent initialized successfully!")
138
 
139
+ def _setup_tools(self):
140
+ """Setup all the tools for the agent"""
141
+ tools = []
142
+
143
+ # Wikipedia tool
144
+ @tool
145
+ def wikipedia_tool(query: str) -> str:
146
+ """Search Wikipedia for encyclopedic information"""
147
+ try:
148
+ wikipedia.set_lang("en")
149
+ summary = wikipedia.summary(query, sentences=3)
150
+ page = wikipedia.page(query)
151
+ return f"WIKIPEDIA: {page.title}\n\n{summary}\n\nURL: {page.url}"
152
+ except wikipedia.DisambiguationError as e:
153
+ summary = wikipedia.summary(e.options[0], sentences=3)
154
+ page = wikipedia.page(e.options[0])
155
+ return f"WIKIPEDIA: {page.title}\n\n{summary}\n\nURL: {page.url}"
156
+ except Exception as e:
157
+ return f"Wikipedia error: {str(e)}"
158
+
159
+ # Web search tool
160
+ @tool
161
+ def web_search_tool(query: str) -> str:
162
+ """Web search for current information"""
163
+ if not self.tavily_api_key:
164
+ return "Tavily API key not available"
165
+ try:
166
+ tavily_search = TavilySearchResults(api_key=self.tavily_api_key, max_results=5)
167
+ results = tavily_search.invoke(query)
168
+ formatted_results = []
169
+ for i, res in enumerate(results, 1):
170
+ formatted_results.append(f"RESULT {i}:\nTitle: {res.get('title', 'N/A')}\nContent: {res.get('content', 'N/A')}")
171
+ return "\n\n".join(formatted_results)
172
+ except Exception as e:
173
+ return f"Search error: {str(e)}"
174
+
175
+ # Wolfram Alpha tool
176
+ @tool
177
+ def wolfram_alpha_tool(query: str) -> str:
178
+ """Use Wolfram Alpha for computational questions"""
179
+ if not self.wolfram_api_key:
180
+ return "Wolfram API key not available"
181
+
182
+ params = {
183
+ 'appid': self.wolfram_api_key,
184
+ 'input': query,
185
+ 'format': 'plaintext',
186
+ 'output': 'JSON'
187
+ }
188
+ try:
189
+ resp = requests.get("http://api.wolframalpha.com/v2/query", params=params, timeout=30)
190
+ resp.raise_for_status()
191
+ data = resp.json().get('queryresult', {})
192
+
193
+ if not data.get('success'):
194
+ return f"Wolfram Alpha couldn't process: {query}"
195
+
196
+ results = []
197
+ for pod in data.get('pods', []):
198
+ pod_title = pod.get('title', 'Unknown')
199
+ for subpod in pod.get('subpods', []):
200
+ plaintext = subpod.get('plaintext')
201
+ if plaintext and plaintext.strip():
202
+ results.append(f"{pod_title}: {plaintext}")
203
+
204
+ return " | ".join(results[:5]) if results else "No readable results"
205
+
206
+ except Exception as e:
207
+ return f"Wolfram Alpha error: {e}"
208
+
209
+ # File analyzer tool
210
+ @tool
211
+ def file_analyzer_tool(file_description: str = "uploaded file") -> str:
212
+ """Analyze uploaded files (Excel, CSV, images, audio)"""
213
+ try:
214
+ search_paths = ["./", "./uploads", "./files", "./data"]
215
+ data_exts = ['.xlsx', '.xls', '.csv']
216
+ found_files = []
217
+
218
+ for path in search_paths:
219
+ if os.path.exists(path):
220
+ for file in os.listdir(path):
221
+ if any(file.lower().endswith(ext) for ext in data_exts):
222
+ found_files.append(os.path.join(path, file))
223
+
224
+ if not found_files:
225
+ return "No supported data files found"
226
+
227
+ results = []
228
+ for file_path in found_files:
229
+ try:
230
+ ext = os.path.splitext(file_path)[1].lower()
231
+ if ext in ['.xlsx', '.xls']:
232
+ df = pd.read_excel(file_path)
233
+ elif ext == '.csv':
234
+ df = pd.read_csv(file_path)
235
+ else:
236
+ continue
237
+
238
+ result = f"๐Ÿ“„ FILE: {file_path}\n"
239
+ result += f"๐Ÿ”ข SHAPE: {df.shape}\n"
240
+ result += f"๐Ÿง  COLUMNS: {list(df.columns)}\n"
241
+ result += f"๐Ÿ“Š FIRST 5 ROWS:\n{df.head().to_string(index=False)}\n"
242
+
243
+ numeric_cols = df.select_dtypes(include=['number']).columns
244
+ if len(numeric_cols) > 0:
245
+ totals = df[numeric_cols].sum().round(2)
246
+ result += f"๐Ÿ’ฐ NUMERIC TOTALS:\n{totals.to_string()}\n"
247
+
248
+ results.append(result)
249
+ except Exception as e:
250
+ results.append(f"Error processing {file_path}: {e}")
251
+
252
+ return "\n\n".join(results)
253
+ except Exception as e:
254
+ return f"File analysis error: {e}"
255
+
256
+ # Python REPL tool
257
+ python_repl_tool = PythonREPLTool()
258
+
259
+ tools.extend([
260
+ wikipedia_tool,
261
+ web_search_tool,
262
+ wolfram_alpha_tool,
263
+ file_analyzer_tool,
264
+ python_repl_tool
265
+ ])
266
+
267
+ return tools
268
 
269
+ def _create_agent_runner(self):
270
+ """Create the LangGraph agent runner"""
271
+ class AgentState(TypedDict):
272
+ messages: Annotated[ListType, add_messages]
273
+
274
+ model_with_tools = self.llm.bind_tools(self.tools)
275
 
276
  def agent_node(state):
277
+ messages = state['messages']
278
+ if not messages or not isinstance(messages[0], SystemMessage):
279
+ messages = [SystemMessage(content=SYSTEM_PROMPT)] + messages
280
+
281
+ response = model_with_tools.invoke(messages)
282
  return {"messages": [response]}
283
 
284
+ tool_node = ToolNode(self.tools)
 
285
 
286
+ builder = StateGraph(AgentState)
287
+ builder.add_node("agent", agent_node)
288
+ builder.add_node("tools", tool_node)
289
+
290
+ builder.add_edge(START, "agent")
291
+ builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", END: END})
292
+ builder.add_edge("tools", "agent")
293
 
294
+ memory = MemorySaver()
295
+ return builder.compile(checkpointer=memory)
296
+
297
+ def _extract_final_answer(self, response_text: str) -> str:
298
+ """Extract the final answer from agent response"""
299
+ match = re.search(r"FINAL ANSWER:\s*(.*)", response_text, re.DOTALL | re.IGNORECASE)
300
+ if match:
301
+ raw_answer = match.group(1).strip()
302
+ if "\n" in raw_answer:
303
+ raw_answer = raw_answer.split("\n", 1)[0].strip()
304
+ if raw_answer.endswith('.') and not raw_answer[:-1].replace('.', '').isdigit():
305
+ raw_answer = raw_answer[:-1]
306
+ return raw_answer.strip()
307
+
308
+ lines = [line.strip() for line in response_text.strip().split('\n') if line.strip()]
309
+ return lines[-1] if lines else response_text.strip()
310
 
311
  def __call__(self, question: str) -> str:
312
+ """Main method called by Gradio interface"""
313
+ print(f"๐Ÿค– Processing question: {question[:100]}...")
314
+
315
  try:
316
+ config = {"configurable": {"thread_id": "gaia_session"}}
317
+
318
+ # Run the agent
319
+ final_state = None
320
+ max_iterations = 0
321
+
322
+ events = self.agent_runner.stream(
323
  {"messages": [HumanMessage(content=question)]},
324
+ config=config,
325
+ stream_mode="values"
326
  )
327
+
328
+ for event in events:
329
+ final_state = event
330
+ max_iterations += 1
331
+ if max_iterations > 8: # Prevent infinite loops
332
+ break
333
+
334
+ if not final_state or not final_state['messages']:
335
+ return "Agent execution failed - no response generated"
336
+
337
+ last_message = final_state['messages'][-1]
338
+ full_response = last_message.content
339
+
340
+ print(f"๐Ÿ“ Agent response: {full_response[:200]}...")
341
+
342
+ # Extract final answer
343
+ final_answer = self._extract_final_answer(full_response)
344
+ print(f"๐ŸŽฏ Final answer: {final_answer}")
345
+
346
+ return final_answer
347
+
348
  except Exception as e:
349
+ print(f"โŒ Error processing question: {e}")
350
+ import traceback
351
+ traceback.print_exc()
352
  return f"Error: {str(e)}"
353
 
 
 
 
 
 
354
  def run_and_submit_all(profile: gr.OAuthProfile | None):
355
+ """
356
+ Fetches all questions, runs the GAIA Agent on them, submits all answers,
357
+ and displays the results.
358
+ """
359
  space_id = os.getenv("SPACE_ID")
360
 
361
+ if profile:
362
+ username = f"{profile.username}"
363
+ print(f"User logged in: {username}")
364
+ else:
365
+ print("User not logged in.")
366
+ return "Please Login to Hugging Face with the button.", None
367
+
368
+ api_url = DEFAULT_API_URL
369
+ questions_url = f"{api_url}/questions"
370
+ submit_url = f"{api_url}/submit"
371
 
372
+ # 1. Instantiate GAIA Agent
373
+ try:
374
+ agent = GAIAAgent()
375
+ except Exception as e:
376
+ print(f"Error instantiating GAIA agent: {e}")
377
+ return f"Error initializing GAIA agent: {e}", None
378
+
379
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
380
+ print(f"Agent code URL: {agent_code}")
381
+
382
+ # 2. Fetch Questions
383
+ print(f"Fetching questions from: {questions_url}")
384
  try:
 
 
385
  response = requests.get(questions_url, timeout=15)
386
  response.raise_for_status()
387
  questions_data = response.json()
388
+ if not questions_data:
389
+ return "Fetched questions list is empty.", None
390
+ print(f"Fetched {len(questions_data)} questions.")
391
+ except Exception as e:
392
+ print(f"Error fetching questions: {e}")
393
+ return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
 
394
 
395
+ # 3. Run GAIA Agent on questions
396
+ results_log = []
397
+ answers_payload = []
398
+ print(f"Running GAIA agent on {len(questions_data)} questions...")
399
+
400
+ for i, item in enumerate(questions_data):
401
+ task_id = item.get("task_id")
402
+ question_text = item.get("question") or item.get("Question")
403
 
404
+ if not task_id or question_text is None:
405
+ print(f"Skipping item {i} with missing data")
406
+ continue
407
+
408
+ print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
409
 
410
+ try:
411
+ submitted_answer = agent(question_text)
412
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
413
+ results_log.append({
414
+ "Task ID": task_id,
415
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
416
+ "Submitted Answer": submitted_answer
417
+ })
418
+ print(f"โœ… Question {i+1} completed: {submitted_answer}")
419
+ except Exception as e:
420
+ print(f"โŒ Error on question {i+1}: {e}")
421
+ error_msg = f"AGENT ERROR: {str(e)}"
422
+ answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
423
+ results_log.append({
424
+ "Task ID": task_id,
425
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
426
+ "Submitted Answer": error_msg
427
+ })
428
+
429
+ if not answers_payload:
430
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
431
+
432
+ # 4. Prepare and Submit
433
+ submission_data = {
434
+ "username": username.strip(),
435
+ "agent_code": agent_code,
436
+ "answers": answers_payload
437
+ }
438
 
439
+ print(f"Submitting {len(answers_payload)} answers...")
440
+ try:
441
+ response = requests.post(submit_url, json=submission_data, timeout=120)
442
+ response.raise_for_status()
443
+ result_data = response.json()
444
+
445
+ final_status = (
446
+ f"๐ŸŽ‰ Submission Successful!\n"
447
+ f"User: {result_data.get('username')}\n"
448
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
449
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
450
+ f"Message: {result_data.get('message', 'No message received.')}"
451
+ )
452
+ print("โœ… Submission successful!")
453
+ return final_status, pd.DataFrame(results_log)
454
+
455
  except Exception as e:
456
+ error_msg = f"โŒ Submission Failed: {str(e)}"
457
+ print(error_msg)
458
+ return error_msg, pd.DataFrame(results_log)
459
+
460
+ # --- Build Gradio Interface ---
461
+ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
462
+ gr.Markdown("# ๐Ÿค– GAIA Agent Evaluation Runner")
463
+ gr.Markdown(
464
+ """
465
+ **Advanced GAIA Benchmark Agent**
466
+
467
+ This agent uses:
468
+ - ๐Ÿง  GPT-4 Turbo with specialized tools
469
+ - ๐Ÿ“š Wikipedia search for encyclopedic information
470
+ - ๐ŸŒ Web search for current events
471
+ - ๐Ÿงฎ Wolfram Alpha for computational tasks
472
+ - ๐Ÿ“Š File analysis for Excel/CSV data
473
+ - ๐Ÿ Python REPL for mathematical analysis
474
+ - ๐ŸŽฏ Specialized prompt engineering for GAIA benchmark
475
+
476
+ **Instructions:**
477
+ 1. Log in to your Hugging Face account
478
+ 2. Click 'Run Evaluation & Submit All Answers'
479
+ 3. Wait for processing (this may take several minutes)
480
+
481
+ ---
482
+ """
483
+ )
484
 
 
 
 
 
 
 
 
 
 
 
485
  gr.LoginButton()
 
 
 
486
 
487
+ run_button = gr.Button("๐Ÿš€ Run Evaluation & Submit All Answers", variant="primary")
488
+
489
+ status_output = gr.Textbox(
490
+ label="๐Ÿ“Š Run Status / Submission Result",
491
+ lines=8,
492
+ interactive=False
493
+ )
494
+
495
+ results_table = gr.DataFrame(
496
+ label="๐Ÿ“ Questions and Agent Answers",
497
+ wrap=True,
498
+ max_height=400
499
+ )
500
+
501
+ run_button.click(
502
  fn=run_and_submit_all,
503
  outputs=[status_output, results_table]
504
  )
505
 
506
  if __name__ == "__main__":
507
+ print("\n" + "="*50)
508
+ print("๐Ÿš€ GAIA Agent HuggingFace Space Starting")
509
+ print("="*50)
510
+
511
+ # Environment info
512
+ space_host = os.getenv("SPACE_HOST")
513
+ space_id = os.getenv("SPACE_ID")
514
+
515
+ if space_host:
516
+ print(f"โœ… SPACE_HOST: {space_host}")
517
+ print(f" Runtime URL: https://{space_host}.hf.space")
518
+
519
+ if space_id:
520
+ print(f"โœ… SPACE_ID: {space_id}")
521
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
522
+
523
+ print("="*50 + "\n")
524
+ print("๐ŸŒŸ Launching GAIA Agent Interface...")
525
+ demo.launch(debug=True, share=False)