Mehedi2 commited on
Commit
56d667b
·
verified ·
1 Parent(s): 172b75b

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +195 -172
  2. gaia_api.py +207 -0
  3. requirements.txt +10 -1
app.py CHANGED
@@ -1,196 +1,219 @@
1
  import os
2
- import gradio as gr
 
3
  import requests
4
- import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
- # --- Constants ---
9
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
- """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
- and displays the results.
26
- """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
-
30
- if profile:
31
- username= f"{profile.username}"
32
- print(f"User logged in: {username}")
33
- else:
34
- print("User not logged in.")
35
- return "Please Login to Hugging Face with the button.", None
36
-
37
- api_url = DEFAULT_API_URL
38
- questions_url = f"{api_url}/questions"
39
- submit_url = f"{api_url}/submit"
40
-
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
 
 
 
 
 
 
 
 
 
 
 
44
  except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
-
51
- # 2. Fetch Questions
52
- print(f"Fetching questions from: {questions_url}")
53
  try:
54
- response = requests.get(questions_url, timeout=15)
55
- response.raise_for_status()
56
- questions_data = response.json()
57
- if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
- print(f"Fetched {len(questions_data)} questions.")
61
- except requests.exceptions.RequestException as e:
62
- print(f"Error fetching questions: {e}")
63
- return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
- return f"An unexpected error occurred fetching questions: {e}", None
71
-
72
- # 3. Run your Agent
73
- results_log = []
74
- answers_payload = []
75
- print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
77
- task_id = item.get("task_id")
78
- question_text = item.get("question")
79
- if not task_id or question_text is None:
80
- print(f"Skipping item with missing task_id or question: {item}")
81
- continue
82
- try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
- except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
-
90
- if not answers_payload:
91
- print("Agent did not produce any answers to submit.")
92
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
- print(status_update)
98
 
99
- # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
101
  try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
- final_status = (
106
- f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
- )
112
- print("Submission successful.")
113
- results_df = pd.DataFrame(results_log)
114
- return final_status, results_df
115
- except requests.exceptions.HTTPError as e:
116
- error_detail = f"Server responded with status {e.response.status_code}."
117
  try:
118
- error_json = e.response.json()
119
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
- error_detail += f" Response: {e.response.text[:500]}"
122
- status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
- results_df = pd.DataFrame(results_log)
125
- return status_message, results_df
126
- except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
- except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
- results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
136
  except Exception as e:
137
- status_message = f"An unexpected error occurred during submission: {e}"
138
- print(status_message)
139
- results_df = pd.DataFrame(results_log)
140
- return status_message, results_df
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- # --- Build Gradio Interface using Blocks ---
144
- with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
- gr.Markdown(
147
- """
148
- **Instructions:**
149
 
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
- """
159
- )
160
 
161
- gr.LoginButton()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
- space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
-
180
- if space_host_startup:
181
- print(f"✅ SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
- else:
184
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
-
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
- print(f"✅ SPACE_ID found: {space_id_startup}")
188
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
- else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
-
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
-
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import re
3
+ import json
4
  import requests
 
5
  import pandas as pd
6
+ from pathlib import Path
7
+ from typing import Optional
8
+ from dotenv import load_dotenv
9
+
10
+ from langgraph.prebuilt import create_react_agent
11
+ from langchain_core.messages import HumanMessage
12
+ from langchain_core.tools import tool
13
+ from langchain_openai import ChatOpenAI
14
+ import inspect
15
 
16
+ load_dotenv()
 
 
17
 
18
+
19
+ class OpenRouterLLM(ChatOpenAI):
20
+ """Custom OpenRouter LLM wrapper for LangGraph"""
21
+
22
+ def __init__(self, model: str = "deepseek/deepseek-v3.1-terminus", **kwargs):
23
+ api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
24
+ super().__init__(
25
+ model=model,
26
+ openai_api_key=api_key,
27
+ openai_api_base="https://openrouter.ai/api/v1",
28
+ **kwargs
29
+ )
30
+
31
+
32
+ # ------------------ TOOLS ------------------
33
+
34
+ @tool
35
+ def search_web(query: str) -> str:
36
+ """Search the web using DuckDuckGo for current information."""
 
 
 
 
 
 
 
 
 
 
 
 
37
  try:
38
+ search_url = f"https://api.duckduckgo.com/?q={query}&format=json&no_html=1&skip_disambig=1"
39
+ response = requests.get(search_url, timeout=10)
40
+ if response.status_code == 200:
41
+ data = response.json()
42
+ results = []
43
+ if data.get("AbstractText"):
44
+ results.append(f"Abstract: {data['AbstractText']}")
45
+ if data.get("RelatedTopics"):
46
+ for topic in data["RelatedTopics"][:3]:
47
+ if isinstance(topic, dict) and topic.get("Text"):
48
+ results.append(f"Related: {topic['Text']}")
49
+ return "\n".join(results) if results else f"No results for '{query}'."
50
+ return f"Search failed with status code {response.status_code}"
51
  except Exception as e:
52
+ return f"Search error: {str(e)}"
53
+
54
+
55
+ @tool
56
+ def search_wikipedia(query: str) -> str:
57
+ """Search Wikipedia for factual information."""
 
 
58
  try:
59
+ search_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
60
+ response = requests.get(search_url, timeout=10)
61
+ if response.status_code == 200:
62
+ data = response.json()
63
+ extract = data.get("extract", "")
64
+ return f"Wikipedia: {extract[:500]}..." if extract else f"No extract for '{query}'."
65
+ return f"Wikipedia search failed for '{query}'"
 
 
 
 
 
 
 
66
  except Exception as e:
67
+ return f"Wikipedia search error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
 
 
 
 
69
 
70
+ @tool
71
+ def execute_python(code: str) -> str:
72
+ """Execute Python code and return the result."""
73
  try:
74
+ safe_globals = {
75
+ '__builtins__': {
76
+ 'print': print, 'len': len, 'str': str, 'int': int, 'float': float,
77
+ 'bool': bool, 'list': list, 'dict': dict, 'tuple': tuple, 'set': set,
78
+ 'range': range, 'sum': sum, 'max': max, 'min': min, 'abs': abs,
79
+ 'round': round, 'sorted': sorted, 'enumerate': enumerate, 'zip': zip,
80
+ },
81
+ 'math': __import__('math'),
82
+ 'json': __import__('json'),
83
+ 'datetime': __import__('datetime'),
84
+ 'random': __import__('random'),
85
+ }
86
+ import io, sys
87
+ old_stdout = sys.stdout
88
+ sys.stdout = mystdout = io.StringIO()
89
  try:
90
+ exec(code, safe_globals)
91
+ output = mystdout.getvalue()
92
+ finally:
93
+ sys.stdout = old_stdout
94
+ return output if output else "Code executed successfully (no output)"
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  except Exception as e:
96
+ return f"Python execution error: {str(e)}"
97
+
 
 
98
 
99
+ @tool
100
+ def read_excel_file(file_path: str, sheet_name: Optional[str] = None) -> str:
101
+ """Read an Excel file and return its contents as a formatted string."""
102
+ try:
103
+ file_path_obj = Path(file_path)
104
+ if not file_path_obj.exists():
105
+ return f"Error: File not found at {file_path}"
106
+ if sheet_name and sheet_name.isdigit():
107
+ sheet_name = int(sheet_name)
108
+ elif sheet_name is None:
109
+ sheet_name = 0
110
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
111
+ if len(df) > 20:
112
+ result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
113
+ result += "First 10 rows:\n" + df.head(10).to_string(index=False)
114
+ result += f"\n\n... ({len(df) - 20} rows omitted) ...\n\n"
115
+ result += "Last 10 rows:\n" + df.tail(10).to_string(index=False)
116
+ else:
117
+ result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
118
+ result += df.to_string(index=False)
119
+ return result
120
+ except Exception as e:
121
+ return f"Error reading Excel file: {str(e)}"
122
 
 
 
 
 
 
 
123
 
124
+ @tool
125
+ def read_text_file(file_path: str) -> str:
126
+ """Read a text file and return its contents."""
127
+ try:
128
+ file_path_obj = Path(file_path)
129
+ if not file_path_obj.exists():
130
+ return f"Error: File not found at {file_path}"
131
+ encodings = ['utf-8', 'utf-16', 'iso-8859-1', 'cp1252']
132
+ for encoding in encodings:
133
+ try:
134
+ with open(file_path_obj, 'r', encoding=encoding) as f:
135
+ return f"File content ({encoding} encoding):\n\n{f.read()}"
136
+ except UnicodeDecodeError:
137
+ continue
138
+ return "Error: Could not decode file with any standard encoding"
139
+ except Exception as e:
140
+ return f"Error reading file: {str(e)}"
141
+
142
 
143
+ # ------------------ GAIA AGENT ------------------
 
 
 
 
 
144
 
145
+ class GaiaAgent:
146
+ """LangGraph-based agent for GAIA tasks using OpenRouter DeepSeek"""
147
+
148
+ def __init__(self):
149
+ print("Initializing GaiaAgent with LangGraph and OpenRouter DeepSeek...")
150
+ self.llm = OpenRouterLLM(
151
+ model="deepseek/deepseek-v3.1-terminus",
152
+ temperature=0.1,
153
+ max_tokens=2000
154
+ )
155
+ self.tools = [search_web, search_wikipedia, execute_python, read_excel_file, read_text_file]
156
+ prompt_modifier = self._get_system_prompt()
157
+
158
+ # Detect correct kwarg for your LangGraph version
159
+ sig = inspect.signature(create_react_agent)
160
+ accepted = sig.parameters.keys()
161
+ kwargs = {}
162
+ if "messages_modifier" in accepted:
163
+ kwargs["messages_modifier"] = prompt_modifier
164
+ elif "state_modifier" in accepted:
165
+ kwargs["state_modifier"] = prompt_modifier
166
+ elif "prompt" in accepted:
167
+ kwargs["prompt"] = prompt_modifier
168
+
169
+ self.agent = create_react_agent(self.llm, self.tools, **kwargs)
170
+ print("GaiaAgent initialized successfully!")
171
+
172
+ def _get_system_prompt(self) -> str:
173
+ return """You are an advanced AI agent designed to answer complex questions...
174
+ (keep your original system prompt here)"""
175
+
176
+ def __call__(self, task_id: str, question: str) -> str:
177
+ try:
178
+ print(f"Processing task {task_id}: {question[:100]}...")
179
+ messages = [HumanMessage(content=question)]
180
+ result = self.agent.invoke({"messages": messages})
181
+ final_message = result["messages"][-1]
182
+ answer = final_message.content
183
+ return self._clean_answer(answer)
184
+ except Exception as e:
185
+ return f"Agent error: {e}"
186
+
187
+ def _clean_answer(self, answer: str) -> str:
188
+ # same cleaning code as before
189
+ answer = answer.strip()
190
+ if "final answer:" in answer.lower():
191
+ parts = re.split(r'final answer:', answer, flags=re.IGNORECASE)
192
+ if len(parts) > 1:
193
+ answer = parts[-1].strip()
194
+ prefixes = ["The answer is", "Answer:", "Result:", "Solution:",
195
+ "Based on", "Therefore", "In conclusion", "So the answer is"]
196
+ for prefix in prefixes:
197
+ if answer.lower().startswith(prefix.lower()):
198
+ answer = answer[len(prefix):].strip()
199
+ if answer.startswith(':'):
200
+ answer = answer[1:].strip()
201
+ break
202
+ if len(answer.split()) <= 3:
203
+ answer = answer.strip('"\'.')
204
+ return answer
205
+
206
+
207
+ # ------------------ ENTRYPOINT ------------------
208
+
209
+ import gradio as gr
210
 
211
+ agent = GaiaAgent()
212
 
213
+ def run_agent(prompt: str) -> str:
214
+ return agent("gaia_task", prompt)
 
215
 
216
+ demo = gr.Interface(fn=run_agent, inputs="text", outputs="text", title="GAIA Agent")
 
 
 
217
 
218
  if __name__ == "__main__":
219
+ demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gaia_api.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Tuple
2
+ import re
3
+ import tempfile
4
+ from pathlib import Path
5
+ import pandas as pd
6
+ import requests
7
+ from pandas import DataFrame
8
+
9
+ # --- Constants ---
10
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
+ QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
12
+ SUBMIT_URL = f"{DEFAULT_API_URL}/submit"
13
+ FILE_PATH = f"{DEFAULT_API_URL}/files/"
14
+
15
+
16
+ # --- Helper Methods ---
17
+ def fetch_all_questions() -> Dict:
18
+ """Fetches all questions from the specified API endpoint.
19
+
20
+ This function retrieves a list of questions from the API, handles potential errors
21
+ such as network issues, invalid responses, or empty question lists, and returns
22
+ the questions as a dictionary.
23
+
24
+ Returns:
25
+ Dict: A dictionary containing the questions data retrieved from the API.
26
+
27
+ Raises:
28
+ UserWarning: If there is an error fetching the questions, such as network issues,
29
+ invalid JSON response, or an empty question list. The exception message
30
+ provides details about the specific error encountered.
31
+ """
32
+ print(f"Fetching questions from: {QUESTIONS_URL}")
33
+ response = requests.get(QUESTIONS_URL, timeout=15)
34
+ try:
35
+ response.raise_for_status()
36
+ questions_data = response.json()
37
+ if not questions_data:
38
+ print("Fetched questions list is empty.")
39
+ raise UserWarning("Fetched questions list is empty or invalid format.")
40
+ print(f"Fetched {len(questions_data)} questions.")
41
+ return questions_data
42
+ except requests.exceptions.RequestException as e:
43
+ print(f"Error fetching questions: {e}")
44
+ raise UserWarning(f"Error fetching questions: {e}")
45
+ except requests.exceptions.JSONDecodeError as e:
46
+ print(f"Error decoding JSON response from questions endpoint: {e}")
47
+ print(f"Response text: {response.text[:500]}")
48
+ raise UserWarning(f"Error decoding server response for questions: {e}")
49
+ except Exception as e:
50
+ print(f"An unexpected error occurred fetching questions: {e}")
51
+ raise UserWarning(f"An unexpected error occurred fetching questions: {e}")
52
+
53
+
54
+ def submit_answers(submission_data: dict, results_log: list) -> Tuple[str, DataFrame]:
55
+ """Submits answers to the scoring API and returns the submission status and results.
56
+
57
+ This function sends the provided answers to the scoring API, handles potential errors
58
+ such as network issues, server errors, or invalid responses, and returns a status
59
+ message indicating the success or failure of the submission, along with a DataFrame
60
+ containing the results log.
61
+
62
+ Args:
63
+ submission_data (dict): A dictionary containing the answers to be submitted.
64
+ Expected to have a structure compatible with the scoring API.
65
+ results_log (list): A list of dictionaries containing the results log.
66
+ This log is converted to a Pandas DataFrame and returned.
67
+
68
+ Returns:
69
+ Tuple[str, DataFrame]: A tuple containing:
70
+ - A status message (str) indicating the submission status and any relevant
71
+ information or error messages.
72
+ - A Pandas DataFrame containing the results log.
73
+
74
+ """
75
+ try:
76
+ response = requests.post(SUBMIT_URL, json=submission_data, timeout=60)
77
+ response.raise_for_status()
78
+ result_data = response.json()
79
+ final_status = (
80
+ f"Submission Successful!\n"
81
+ f"User: {result_data.get('username')}\n"
82
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
83
+ f"({result_data.get('correct_count', '?')}/"
84
+ f"{result_data.get('total_attempted', '?')} correct)\n"
85
+ f"Message: {result_data.get('message', 'No message received.')}"
86
+ )
87
+ print("Submission successful.")
88
+ results_df = pd.DataFrame(results_log)
89
+ return final_status, results_df
90
+ except requests.exceptions.HTTPError as e:
91
+ error_detail = f"Server responded with status {e.response.status_code}."
92
+ try:
93
+ error_json = e.response.json()
94
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
95
+ except requests.exceptions.JSONDecodeError:
96
+ error_detail += f" Response: {e.response.text[:500]}"
97
+ status_message = f"Submission Failed: {error_detail}"
98
+ print(status_message)
99
+ results_df = pd.DataFrame(results_log)
100
+ return status_message, results_df
101
+ except requests.exceptions.Timeout:
102
+ status_message = "Submission Failed: The request timed out."
103
+ print(status_message)
104
+ results_df = pd.DataFrame(results_log)
105
+ return status_message, results_df
106
+ except requests.exceptions.RequestException as e:
107
+ status_message = f"Submission Failed: Network error - {e}"
108
+ print(status_message)
109
+ results_df = pd.DataFrame(results_log)
110
+ return status_message, results_df
111
+ except Exception as e:
112
+ status_message = f"An unexpected error occurred during submission: {e}"
113
+ print(status_message)
114
+ results_df = pd.DataFrame(results_log)
115
+ return status_message, results_df
116
+
117
+
118
+ def run_agent(gaia_agent, questions_data: List[Dict]) -> Tuple[List[Dict], List[Dict]]:
119
+ """Runs the agent on a list of questions and returns the results and answers.
120
+
121
+ This function iterates through a list of questions, runs the provided agent on each
122
+ question, and collects the results and answers. It handles potential errors during
123
+ agent execution and returns the results log and the answers payload.
124
+
125
+ Args:
126
+ gaia_agent: An instance of the GaiaAgent class, which is responsible for
127
+ generating answers to the questions.
128
+ questions_data (List[Dict]): A list of dictionaries, where each dictionary
129
+ represents a question and contains at least the 'task_id' and 'question' keys.
130
+
131
+ Returns:
132
+ Tuple[List[Dict], List[Dict]]: A tuple containing:
133
+ - A list of dictionaries representing the results log, where each dictionary
134
+ contains the 'Task ID', 'Question', and 'Submitted Answer'.
135
+ - A list of dictionaries representing the answers payload, where each dictionary
136
+ contains the 'task_id' and 'submitted_answer'.
137
+ """
138
+ results_log = []
139
+ answers_payload = []
140
+
141
+ print(f"Running agent on {len(questions_data)} questions...")
142
+ for item in questions_data:
143
+ task_id = item.get("task_id")
144
+ question_text = item.get("question")
145
+ question_text = process_file(task_id, question_text)
146
+ if not task_id or question_text is None:
147
+ print(f"Skipping invalid item (missing task_id or question): {item}")
148
+ continue
149
+ try:
150
+ submitted_answer = gaia_agent(task_id, question_text)
151
+ answers_payload.append(
152
+ {"task_id": task_id, "submitted_answer": submitted_answer}
153
+ )
154
+ except Exception as e:
155
+ print(f"Error running agent on task {task_id}: {e}")
156
+ submitted_answer = f"AGENT ERROR: {e}"
157
+
158
+ results_log.append(
159
+ {
160
+ "Task ID": task_id,
161
+ "Question": question_text,
162
+ "Submitted Answer": submitted_answer,
163
+ }
164
+ )
165
+ return results_log, answers_payload
166
+
167
+
168
+ def process_file(task_id: str, question_text: str) -> str:
169
+ """
170
+ Attempt to download a file associated with a task from the API.
171
+
172
+ - If the file exists (HTTP 200), it is saved to a temp directory and the local file path is returned.
173
+ - If no file is found (HTTP 404), returns the original question text.
174
+ - For all other HTTP errors, the exception is propagated to the caller.
175
+ """
176
+ file_url = f"{FILE_PATH}{task_id}"
177
+
178
+ try:
179
+ response = requests.get(file_url, timeout=30)
180
+ response.raise_for_status()
181
+ except requests.exceptions.RequestException as exc:
182
+ print(f"Exception in download_file>> {str(exc)}")
183
+ return question_text # Unable to get the file
184
+
185
+ # Determine filename from 'Content-Disposition' header, fallback to task_id
186
+ content_disposition = response.headers.get("content-disposition", "")
187
+ filename = task_id
188
+ match = re.search(r'filename="([^"]+)"', content_disposition)
189
+ if match:
190
+ filename = match.group(1)
191
+
192
+ # Save file in a temp directory
193
+ temp_storage_dir = Path(tempfile.gettempdir()) / "gaia_cached_files"
194
+ temp_storage_dir.mkdir(parents=True, exist_ok=True)
195
+
196
+ file_path = temp_storage_dir / filename
197
+ file_path.write_bytes(response.content)
198
+
199
+ print(f"Downloaded file for task {task_id}: {filename}")
200
+
201
+ return (
202
+ f"{question_text}\n\n"
203
+ f"---\n"
204
+ f"A file was downloaded for this task and saved locally at:\n"
205
+ f"{str(file_path)}\n"
206
+ f"---\n\n"
207
+ )
requirements.txt CHANGED
@@ -1,2 +1,11 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ gradio[oauth]
3
+ requests
4
+ python-dotenv
5
+ pandas
6
+ openpyxl
7
+ xlrd
8
+ langgraph>=0.2.0
9
+ langchain-core>=0.3.0
10
+ langchain-openai>=0.2.0
11
+ langchain-community>=0.3.0