Mehedi2 commited on
Commit
a26e4d2
·
verified ·
1 Parent(s): 7d38b5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -160
app.py CHANGED
@@ -4,14 +4,14 @@ import json
4
  import requests
5
  import pandas as pd
6
  from pathlib import Path
7
- from typing import Optional, Union, Dict, Any, List
8
  from dotenv import load_dotenv
9
 
10
- from langgraph.graph import StateGraph, MessagesState
11
  from langgraph.prebuilt import create_react_agent
12
- from langchain_core.messages import HumanMessage, SystemMessage
13
  from langchain_core.tools import tool
14
  from langchain_openai import ChatOpenAI
 
15
 
16
  load_dotenv()
17
 
@@ -21,7 +21,6 @@ class OpenRouterLLM(ChatOpenAI):
21
 
22
  def __init__(self, model: str = "deepseek/deepseek-v3.1-terminus", **kwargs):
23
  api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
24
-
25
  super().__init__(
26
  model=model,
27
  openai_api_key=api_key,
@@ -30,34 +29,25 @@ class OpenRouterLLM(ChatOpenAI):
30
  )
31
 
32
 
 
 
33
  @tool
34
  def search_web(query: str) -> str:
35
  """Search the web using DuckDuckGo for current information."""
36
  try:
37
- # Simple web search using DuckDuckGo
38
  search_url = f"https://api.duckduckgo.com/?q={query}&format=json&no_html=1&skip_disambig=1"
39
  response = requests.get(search_url, timeout=10)
40
-
41
  if response.status_code == 200:
42
  data = response.json()
43
-
44
- # Extract results
45
  results = []
46
  if data.get("AbstractText"):
47
  results.append(f"Abstract: {data['AbstractText']}")
48
-
49
  if data.get("RelatedTopics"):
50
  for topic in data["RelatedTopics"][:3]:
51
  if isinstance(topic, dict) and topic.get("Text"):
52
  results.append(f"Related: {topic['Text']}")
53
-
54
- if results:
55
- return "\n".join(results)
56
- else:
57
- return f"Search performed for '{query}' but no specific results found."
58
- else:
59
- return f"Search failed with status code {response.status_code}"
60
-
61
  except Exception as e:
62
  return f"Search error: {str(e)}"
63
 
@@ -66,20 +56,13 @@ def search_web(query: str) -> str:
66
  def search_wikipedia(query: str) -> str:
67
  """Search Wikipedia for factual information."""
68
  try:
69
- # Wikipedia API search
70
  search_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
71
  response = requests.get(search_url, timeout=10)
72
-
73
  if response.status_code == 200:
74
  data = response.json()
75
  extract = data.get("extract", "")
76
- if extract:
77
- return f"Wikipedia: {extract[:500]}..."
78
- else:
79
- return f"Wikipedia page found for '{query}' but no extract available."
80
- else:
81
- return f"Wikipedia search failed for '{query}'"
82
-
83
  except Exception as e:
84
  return f"Wikipedia search error: {str(e)}"
85
 
@@ -88,51 +71,27 @@ def search_wikipedia(query: str) -> str:
88
  def execute_python(code: str) -> str:
89
  """Execute Python code and return the result."""
90
  try:
91
- # Create a safe execution environment
92
  safe_globals = {
93
  '__builtins__': {
94
- 'print': print,
95
- 'len': len,
96
- 'str': str,
97
- 'int': int,
98
- 'float': float,
99
- 'bool': bool,
100
- 'list': list,
101
- 'dict': dict,
102
- 'tuple': tuple,
103
- 'set': set,
104
- 'range': range,
105
- 'sum': sum,
106
- 'max': max,
107
- 'min': min,
108
- 'abs': abs,
109
- 'round': round,
110
- 'sorted': sorted,
111
- 'enumerate': enumerate,
112
- 'zip': zip,
113
  },
114
  'math': __import__('math'),
115
  'json': __import__('json'),
116
  'datetime': __import__('datetime'),
117
  'random': __import__('random'),
118
  }
119
-
120
- # Capture output
121
- import io
122
- import sys
123
-
124
  old_stdout = sys.stdout
125
  sys.stdout = mystdout = io.StringIO()
126
-
127
  try:
128
- # Execute the code
129
  exec(code, safe_globals)
130
  output = mystdout.getvalue()
131
  finally:
132
  sys.stdout = old_stdout
133
-
134
  return output if output else "Code executed successfully (no output)"
135
-
136
  except Exception as e:
137
  return f"Python execution error: {str(e)}"
138
 
@@ -144,30 +103,20 @@ def read_excel_file(file_path: str, sheet_name: Optional[str] = None) -> str:
144
  file_path_obj = Path(file_path)
145
  if not file_path_obj.exists():
146
  return f"Error: File not found at {file_path}"
147
-
148
- # Try to read the Excel file
149
  if sheet_name and sheet_name.isdigit():
150
  sheet_name = int(sheet_name)
151
  elif sheet_name is None:
152
  sheet_name = 0
153
-
154
  df = pd.read_excel(file_path, sheet_name=sheet_name)
155
-
156
- # Convert to string representation
157
  if len(df) > 20:
158
- # Show first 10 and last 10 rows for large datasets
159
  result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
160
- result += "First 10 rows:\n"
161
- result += df.head(10).to_string(index=False)
162
  result += f"\n\n... ({len(df) - 20} rows omitted) ...\n\n"
163
- result += "Last 10 rows:\n"
164
- result += df.tail(10).to_string(index=False)
165
  else:
166
  result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
167
  result += df.to_string(index=False)
168
-
169
  return result
170
-
171
  except Exception as e:
172
  return f"Error reading Excel file: {str(e)}"
173
 
@@ -179,153 +128,92 @@ def read_text_file(file_path: str) -> str:
179
  file_path_obj = Path(file_path)
180
  if not file_path_obj.exists():
181
  return f"Error: File not found at {file_path}"
182
-
183
- # Try different encodings
184
  encodings = ['utf-8', 'utf-16', 'iso-8859-1', 'cp1252']
185
-
186
  for encoding in encodings:
187
  try:
188
  with open(file_path_obj, 'r', encoding=encoding) as f:
189
- content = f.read()
190
- return f"File content ({encoding} encoding):\n\n{content}"
191
  except UnicodeDecodeError:
192
  continue
193
-
194
- return f"Error: Could not decode file with any standard encoding"
195
-
196
  except Exception as e:
197
  return f"Error reading file: {str(e)}"
198
 
199
 
 
 
200
  class GaiaAgent:
201
  """LangGraph-based agent for GAIA tasks using OpenRouter DeepSeek"""
202
 
203
  def __init__(self):
204
  print("Initializing GaiaAgent with LangGraph and OpenRouter DeepSeek...")
205
-
206
- # Initialize the LLM
207
  self.llm = OpenRouterLLM(
208
  model="deepseek/deepseek-v3.1-terminus",
209
  temperature=0.1,
210
  max_tokens=2000
211
  )
212
-
213
- # Define available tools
214
- self.tools = [
215
- search_web,
216
- search_wikipedia,
217
- execute_python,
218
- read_excel_file,
219
- read_text_file,
220
- ]
221
-
222
- # Create the agent
223
- self.agent = create_react_agent(
224
- self.llm,
225
- self.tools,
226
- state_modifier=self._get_system_prompt()
227
- )
228
-
229
  print("GaiaAgent initialized successfully!")
230
-
231
- def _get_system_prompt(self) -> str:
232
- """Get the system prompt for the agent"""
233
- return """You are an advanced AI agent designed to answer complex questions that may require:
234
 
235
- 1. Web searches for current information
236
- 2. Mathematical calculations using Python
237
- 3. File analysis (Excel, text files)
238
- 4. Multi-step reasoning and problem solving
239
-
240
- For GAIA evaluation:
241
- - Provide EXACT, DIRECT answers
242
- - Use tools when necessary to gather information or perform calculations
243
- - For math problems, show your calculation but end with just the number
244
- - For yes/no questions, answer just "Yes" or "No"
245
- - For factual questions, provide just the fact
246
-
247
- When you encounter files:
248
- - Use read_excel_file for .xlsx, .xls files
249
- - Use read_text_file for text-based files
250
- - Analyze the file content to answer the question
251
-
252
- Be thorough in your analysis but concise in your final answer."""
253
 
254
  def __call__(self, task_id: str, question: str) -> str:
255
- """Process a question and return the answer"""
256
  try:
257
  print(f"Processing task {task_id}: {question[:100]}...")
258
-
259
- # Create the input state
260
  messages = [HumanMessage(content=question)]
261
-
262
- # Run the agent
263
  result = self.agent.invoke({"messages": messages})
264
-
265
- # Extract the final answer
266
  final_message = result["messages"][-1]
267
  answer = final_message.content
268
-
269
- # Clean up the answer for GAIA evaluation
270
- clean_answer = self._clean_answer(answer)
271
-
272
- print(f"Agent answer for {task_id}: {clean_answer}")
273
- return clean_answer
274
-
275
  except Exception as e:
276
- error_msg = f"Agent error: {str(e)}"
277
- print(f"Error processing task {task_id}: {error_msg}")
278
- return error_msg
279
-
280
  def _clean_answer(self, answer: str) -> str:
281
- """Clean the answer to extract the final result"""
282
  answer = answer.strip()
283
-
284
- # Look for "Final Answer:" pattern
285
  if "final answer:" in answer.lower():
286
  parts = re.split(r'final answer:', answer, flags=re.IGNORECASE)
287
  if len(parts) > 1:
288
  answer = parts[-1].strip()
289
-
290
- # Remove common prefixes
291
- prefixes = [
292
- "The answer is", "Answer:", "Result:", "Solution:",
293
- "Based on", "Therefore", "In conclusion", "So the answer is"
294
- ]
295
-
296
  for prefix in prefixes:
297
  if answer.lower().startswith(prefix.lower()):
298
  answer = answer[len(prefix):].strip()
299
  if answer.startswith(':'):
300
  answer = answer[1:].strip()
301
  break
302
-
303
- # Remove quotes and periods from short answers
304
  if len(answer.split()) <= 3:
305
  answer = answer.strip('"\'.')
306
-
307
  return answer
308
 
309
 
 
 
310
  import gradio as gr
311
 
312
- # Create a single global agent instance
313
  agent = GaiaAgent()
314
 
315
  def run_agent(prompt: str) -> str:
316
- """
317
- Simple wrapper so GAIA and Hugging Face Spaces can call the agent.
318
- GAIA usually passes only a prompt (not task_id), so we use a dummy ID.
319
- """
320
  return agent("gaia_task", prompt)
321
 
322
- # Expose via Gradio
323
- demo = gr.Interface(
324
- fn=run_agent,
325
- inputs="text",
326
- outputs="text",
327
- title="GAIA Agent"
328
- )
329
 
330
  if __name__ == "__main__":
331
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
4
  import requests
5
  import pandas as pd
6
  from pathlib import Path
7
+ from typing import Optional
8
  from dotenv import load_dotenv
9
 
 
10
  from langgraph.prebuilt import create_react_agent
11
+ from langchain_core.messages import HumanMessage
12
  from langchain_core.tools import tool
13
  from langchain_openai import ChatOpenAI
14
+ import inspect
15
 
16
  load_dotenv()
17
 
 
21
 
22
  def __init__(self, model: str = "deepseek/deepseek-v3.1-terminus", **kwargs):
23
  api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
 
24
  super().__init__(
25
  model=model,
26
  openai_api_key=api_key,
 
29
  )
30
 
31
 
32
+ # ------------------ TOOLS ------------------
33
+
34
  @tool
35
  def search_web(query: str) -> str:
36
  """Search the web using DuckDuckGo for current information."""
37
  try:
 
38
  search_url = f"https://api.duckduckgo.com/?q={query}&format=json&no_html=1&skip_disambig=1"
39
  response = requests.get(search_url, timeout=10)
 
40
  if response.status_code == 200:
41
  data = response.json()
 
 
42
  results = []
43
  if data.get("AbstractText"):
44
  results.append(f"Abstract: {data['AbstractText']}")
 
45
  if data.get("RelatedTopics"):
46
  for topic in data["RelatedTopics"][:3]:
47
  if isinstance(topic, dict) and topic.get("Text"):
48
  results.append(f"Related: {topic['Text']}")
49
+ return "\n".join(results) if results else f"No results for '{query}'."
50
+ return f"Search failed with status code {response.status_code}"
 
 
 
 
 
 
51
  except Exception as e:
52
  return f"Search error: {str(e)}"
53
 
 
56
  def search_wikipedia(query: str) -> str:
57
  """Search Wikipedia for factual information."""
58
  try:
 
59
  search_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
60
  response = requests.get(search_url, timeout=10)
 
61
  if response.status_code == 200:
62
  data = response.json()
63
  extract = data.get("extract", "")
64
+ return f"Wikipedia: {extract[:500]}..." if extract else f"No extract for '{query}'."
65
+ return f"Wikipedia search failed for '{query}'"
 
 
 
 
 
66
  except Exception as e:
67
  return f"Wikipedia search error: {str(e)}"
68
 
 
71
  def execute_python(code: str) -> str:
72
  """Execute Python code and return the result."""
73
  try:
 
74
  safe_globals = {
75
  '__builtins__': {
76
+ 'print': print, 'len': len, 'str': str, 'int': int, 'float': float,
77
+ 'bool': bool, 'list': list, 'dict': dict, 'tuple': tuple, 'set': set,
78
+ 'range': range, 'sum': sum, 'max': max, 'min': min, 'abs': abs,
79
+ 'round': round, 'sorted': sorted, 'enumerate': enumerate, 'zip': zip,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  },
81
  'math': __import__('math'),
82
  'json': __import__('json'),
83
  'datetime': __import__('datetime'),
84
  'random': __import__('random'),
85
  }
86
+ import io, sys
 
 
 
 
87
  old_stdout = sys.stdout
88
  sys.stdout = mystdout = io.StringIO()
 
89
  try:
 
90
  exec(code, safe_globals)
91
  output = mystdout.getvalue()
92
  finally:
93
  sys.stdout = old_stdout
 
94
  return output if output else "Code executed successfully (no output)"
 
95
  except Exception as e:
96
  return f"Python execution error: {str(e)}"
97
 
 
103
  file_path_obj = Path(file_path)
104
  if not file_path_obj.exists():
105
  return f"Error: File not found at {file_path}"
 
 
106
  if sheet_name and sheet_name.isdigit():
107
  sheet_name = int(sheet_name)
108
  elif sheet_name is None:
109
  sheet_name = 0
 
110
  df = pd.read_excel(file_path, sheet_name=sheet_name)
 
 
111
  if len(df) > 20:
 
112
  result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
113
+ result += "First 10 rows:\n" + df.head(10).to_string(index=False)
 
114
  result += f"\n\n... ({len(df) - 20} rows omitted) ...\n\n"
115
+ result += "Last 10 rows:\n" + df.tail(10).to_string(index=False)
 
116
  else:
117
  result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
118
  result += df.to_string(index=False)
 
119
  return result
 
120
  except Exception as e:
121
  return f"Error reading Excel file: {str(e)}"
122
 
 
128
  file_path_obj = Path(file_path)
129
  if not file_path_obj.exists():
130
  return f"Error: File not found at {file_path}"
 
 
131
  encodings = ['utf-8', 'utf-16', 'iso-8859-1', 'cp1252']
 
132
  for encoding in encodings:
133
  try:
134
  with open(file_path_obj, 'r', encoding=encoding) as f:
135
+ return f"File content ({encoding} encoding):\n\n{f.read()}"
 
136
  except UnicodeDecodeError:
137
  continue
138
+ return "Error: Could not decode file with any standard encoding"
 
 
139
  except Exception as e:
140
  return f"Error reading file: {str(e)}"
141
 
142
 
143
+ # ------------------ GAIA AGENT ------------------
144
+
145
  class GaiaAgent:
146
  """LangGraph-based agent for GAIA tasks using OpenRouter DeepSeek"""
147
 
148
  def __init__(self):
149
  print("Initializing GaiaAgent with LangGraph and OpenRouter DeepSeek...")
 
 
150
  self.llm = OpenRouterLLM(
151
  model="deepseek/deepseek-v3.1-terminus",
152
  temperature=0.1,
153
  max_tokens=2000
154
  )
155
+ self.tools = [search_web, search_wikipedia, execute_python, read_excel_file, read_text_file]
156
+ prompt_modifier = self._get_system_prompt()
157
+
158
+ # Detect correct kwarg for your LangGraph version
159
+ sig = inspect.signature(create_react_agent)
160
+ accepted = sig.parameters.keys()
161
+ kwargs = {}
162
+ if "messages_modifier" in accepted:
163
+ kwargs["messages_modifier"] = prompt_modifier
164
+ elif "state_modifier" in accepted:
165
+ kwargs["state_modifier"] = prompt_modifier
166
+ elif "prompt" in accepted:
167
+ kwargs["prompt"] = prompt_modifier
168
+
169
+ self.agent = create_react_agent(self.llm, self.tools, **kwargs)
 
 
170
  print("GaiaAgent initialized successfully!")
 
 
 
 
171
 
172
+ def _get_system_prompt(self) -> str:
173
+ return """You are an advanced AI agent designed to answer complex questions...
174
+ (keep your original system prompt here)"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  def __call__(self, task_id: str, question: str) -> str:
 
177
  try:
178
  print(f"Processing task {task_id}: {question[:100]}...")
 
 
179
  messages = [HumanMessage(content=question)]
 
 
180
  result = self.agent.invoke({"messages": messages})
 
 
181
  final_message = result["messages"][-1]
182
  answer = final_message.content
183
+ return self._clean_answer(answer)
 
 
 
 
 
 
184
  except Exception as e:
185
+ return f"Agent error: {e}"
186
+
 
 
187
  def _clean_answer(self, answer: str) -> str:
188
+ # same cleaning code as before
189
  answer = answer.strip()
 
 
190
  if "final answer:" in answer.lower():
191
  parts = re.split(r'final answer:', answer, flags=re.IGNORECASE)
192
  if len(parts) > 1:
193
  answer = parts[-1].strip()
194
+ prefixes = ["The answer is", "Answer:", "Result:", "Solution:",
195
+ "Based on", "Therefore", "In conclusion", "So the answer is"]
 
 
 
 
 
196
  for prefix in prefixes:
197
  if answer.lower().startswith(prefix.lower()):
198
  answer = answer[len(prefix):].strip()
199
  if answer.startswith(':'):
200
  answer = answer[1:].strip()
201
  break
 
 
202
  if len(answer.split()) <= 3:
203
  answer = answer.strip('"\'.')
 
204
  return answer
205
 
206
 
207
+ # ------------------ ENTRYPOINT ------------------
208
+
209
  import gradio as gr
210
 
 
211
  agent = GaiaAgent()
212
 
213
  def run_agent(prompt: str) -> str:
 
 
 
 
214
  return agent("gaia_task", prompt)
215
 
216
+ demo = gr.Interface(fn=run_agent, inputs="text", outputs="text", title="GAIA Agent")
 
 
 
 
 
 
217
 
218
  if __name__ == "__main__":
219
+ demo.launch(server_name="0.0.0.0", server_port=7860)