Mehedi2 commited on
Commit
5966015
·
verified ·
1 Parent(s): 28c303d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +292 -179
app.py CHANGED
@@ -1,194 +1,307 @@
1
  import os
 
2
  import json
3
- import time
4
  import requests
5
- import gradio as gr
6
-
7
- # ===============================
8
- # 1. LLM Wrapper (Your Original)
9
- # ===============================
10
- class OpenRouterLLM:
11
- def __init__(self, api_key=None, model="deepseek/deepseek-v3.1-terminus"):
12
- self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
13
- self.model = model
14
- self.base_url = "https://openrouter.ai/api/v1"
15
- if not self.api_key:
16
- raise ValueError("Missing OpenRouter API key. Set OPENROUTER_API_KEY environment variable.")
 
 
 
 
17
 
18
- def generate(self, prompt, system_prompt="You are a helpful AI agent."):
19
- headers = {
20
- "Authorization": f"Bearer {self.api_key}",
21
- "Content-Type": "application/json",
22
- }
23
- payload = {
24
- "model": self.model,
25
- "messages": [
26
- {"role": "system", "content": system_prompt},
27
- {"role": "user", "content": prompt}
28
- ],
29
- "temperature": 0.1,
30
- "max_tokens": 500
31
- }
32
- try:
33
- response = requests.post(f"{self.base_url}/chat/completions", headers=headers, json=payload)
34
- response.raise_for_status()
35
- data = response.json()
36
- return data["choices"][0]["message"]["content"].strip()
37
- except Exception as e:
38
- return f"Error: {e}"
39
 
40
- # ===============================
41
- # 2. GAIA API Loader
42
- # ===============================
43
- GAIA_API_BASE = "https://gaia-benchmark-hf.fly.dev"
44
 
45
- class GAIAAgent:
46
- def __init__(self, llm: OpenRouterLLM):
47
- self.llm = llm
48
- self.questions = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- def fetch_questions(self):
51
- try:
52
- resp = requests.get(f"{GAIA_API_BASE}/questions", timeout=30)
53
- if resp.status_code == 200:
54
- self.questions = resp.json()
55
- return f" Loaded {len(self.questions)} GAIA questions"
 
 
 
 
 
 
 
56
  else:
57
- return f"⚠️ Failed to fetch questions: {resp.status_code}"
58
- except Exception as e:
59
- return f"⚠️ Error fetching questions: {e}"
 
 
 
60
 
61
- def fetch_random_question(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  try:
63
- resp = requests.get(f"{GAIA_API_BASE}/random-question", timeout=10)
64
- if resp.status_code == 200:
65
- return resp.json()
66
- else:
67
- return {}
68
- except:
69
- return {}
 
 
 
70
 
71
- def clean_answer(self, answer: str):
72
- answer = answer.strip()
73
- prefixes = ["Answer:", "Final answer:", "The answer is:"]
74
- for prefix in prefixes:
75
- if answer.lower().startswith(prefix.lower()):
76
- answer = answer[len(prefix):].strip()
77
- return answer.strip()
78
 
79
- def answer_question(self, question_obj):
80
- q = question_obj.get("Question", "")
81
- system_prompt = (
82
- "You are solving GAIA benchmark questions. "
83
- "Provide ONLY the final answer, no reasoning."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  )
85
- raw_answer = self.llm.generate(q, system_prompt)
86
- return self.clean_answer(raw_answer)
87
-
88
- def evaluate_all(self):
89
- if not self.questions:
90
- return {"error": "No questions loaded"}
91
- results = []
92
- correct = 0
93
- for q in self.questions:
94
- expected = str(q.get("Final answer", "")).strip()
95
- answer = self.answer_question(q)
96
- is_correct = answer.strip() == expected
97
- if is_correct:
98
- correct += 1
99
- results.append({
100
- "task_id": q.get("task_id"),
101
- "question": q.get("Question"),
102
- "expected": expected,
103
- "answer": answer,
104
- "correct": is_correct
105
- })
106
- score = (correct / len(results)) * 100 if results else 0
107
- return {"score": score, "results": results, "correct": correct, "total": len(results)}
108
-
109
- def submit_answers(self, username, agent_code, answers):
 
 
 
110
  try:
111
- payload = {
112
- "username": username,
113
- "agent_code": agent_code,
114
- "answers": answers
115
- }
116
- resp = requests.post(f"{GAIA_API_BASE}/submit", json=payload, timeout=60)
117
- if resp.status_code == 200:
118
- return resp.json()
119
- else:
120
- return {"error": f"Submission failed: {resp.status_code}"}
 
 
 
 
 
 
 
 
121
  except Exception as e:
122
- return {"error": str(e)}
123
-
124
- # ===============================
125
- # 3. Gradio UI
126
- # ===============================
127
- llm = OpenRouterLLM()
128
- agent = GAIAAgent(llm)
129
-
130
- def load_questions_ui():
131
- return agent.fetch_questions()
132
-
133
- def test_random_question_ui():
134
- q = agent.fetch_random_question()
135
- if not q:
136
- return "Failed to fetch a random question"
137
- ans = agent.answer_question(q)
138
- return f"Question: {q.get('Question')}\nAnswer: {ans}"
139
-
140
- def run_full_evaluation_ui(username):
141
- if not agent.questions:
142
- return "Please load questions first."
143
- results_data = agent.evaluate_all()
144
- if "error" in results_data:
145
- return results_data["error"]
146
-
147
- answers_payload = [
148
- {"task_id": r["task_id"], "submitted_answer": r["answer"]}
149
- for r in results_data["results"]
150
- ]
151
- agent_code = f"https://huggingface.co/spaces/{username}/Gaia-Test-Agent/tree/main"
152
- submission_result = agent.submit_answers(username, agent_code, answers_payload)
153
- score = submission_result.get("score", 0)
154
- return f"Score: {score}%\nAnswers submitted: {len(answers_payload)}\nLeaderboard info: {submission_result}"
155
-
156
- def manual_test_ui(question_text):
157
- return agent.answer_question({"Question": question_text})
158
-
159
- def build_gradio_app():
160
- with gr.Blocks() as app:
161
- gr.Markdown("# 🤖 GAIA Benchmark Agent")
162
-
163
- with gr.Tab("Load Questions"):
164
- out_load = gr.Textbox(label="Status")
165
- btn_load = gr.Button("Load GAIA Questions")
166
- btn_load.click(load_questions_ui, outputs=out_load)
167
-
168
- with gr.Tab("Random Question Test"):
169
- out_test = gr.Textbox(label="Result", lines=6)
170
- btn_test = gr.Button("Test Random Question")
171
- btn_test.click(test_random_question_ui, outputs=out_test)
172
-
173
- with gr.Tab("Full Evaluation & Submit"):
174
- username_input = gr.Textbox(label="Your HF Username")
175
- out_eval = gr.Textbox(label="Evaluation Result", lines=10)
176
- btn_eval = gr.Button("Run Evaluation & Submit")
177
- btn_eval.click(run_full_evaluation_ui, inputs=username_input, outputs=out_eval)
178
-
179
- with gr.Tab("Manual Test"):
180
- manual_input = gr.Textbox(label="Enter Question")
181
- manual_output = gr.Textbox(label="Agent Answer", lines=4)
182
- manual_btn = gr.Button("Get Answer")
183
- manual_btn.click(manual_test_ui, inputs=manual_input, outputs=manual_output)
184
- return app
185
-
186
- # ===============================
187
- # 4. Main
188
- # ===============================
189
- if __name__ == "__main__":
190
- app = build_gradio_app()
191
- if os.getenv("SPACE_ID"):
192
- app.launch(server_name="0.0.0.0", server_port=7860)
193
- else:
194
- app.launch(share=True)
 
1
  import os
2
+ import re
3
  import json
 
4
  import requests
5
+ import pandas as pd
6
+ from pathlib import Path
7
+ from typing import Optional, Union, Dict, Any, List
8
+ from dotenv import load_dotenv
9
+
10
+ from langgraph.graph import StateGraph, MessagesState
11
+ from langgraph.prebuilt import create_react_agent
12
+ from langchain_core.messages import HumanMessage, SystemMessage
13
+ from langchain_core.tools import tool
14
+ from langchain_openai import ChatOpenAI
15
+
16
+ load_dotenv()
17
+
18
+
19
+ class OpenRouterLLM(ChatOpenAI):
20
+ """Custom OpenRouter LLM wrapper for LangGraph"""
21
 
22
+ def __init__(self, model: str = "deepseek/deepseek-v3.1-terminus", **kwargs):
23
+ api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
24
+
25
+ super().__init__(
26
+ model=model,
27
+ openai_api_key=api_key,
28
+ openai_api_base="https://openrouter.ai/api/v1",
29
+ **kwargs
30
+ )
 
 
 
 
 
 
 
 
 
 
 
 
31
 
 
 
 
 
32
 
33
+ @tool
34
+ def search_web(query: str) -> str:
35
+ """Search the web using DuckDuckGo for current information."""
36
+ try:
37
+ # Simple web search using DuckDuckGo
38
+ search_url = f"https://api.duckduckgo.com/?q={query}&format=json&no_html=1&skip_disambig=1"
39
+ response = requests.get(search_url, timeout=10)
40
+
41
+ if response.status_code == 200:
42
+ data = response.json()
43
+
44
+ # Extract results
45
+ results = []
46
+ if data.get("AbstractText"):
47
+ results.append(f"Abstract: {data['AbstractText']}")
48
+
49
+ if data.get("RelatedTopics"):
50
+ for topic in data["RelatedTopics"][:3]:
51
+ if isinstance(topic, dict) and topic.get("Text"):
52
+ results.append(f"Related: {topic['Text']}")
53
+
54
+ if results:
55
+ return "\n".join(results)
56
+ else:
57
+ return f"Search performed for '{query}' but no specific results found."
58
+ else:
59
+ return f"Search failed with status code {response.status_code}"
60
+
61
+ except Exception as e:
62
+ return f"Search error: {str(e)}"
63
+
64
 
65
+ @tool
66
+ def search_wikipedia(query: str) -> str:
67
+ """Search Wikipedia for factual information."""
68
+ try:
69
+ # Wikipedia API search
70
+ search_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
71
+ response = requests.get(search_url, timeout=10)
72
+
73
+ if response.status_code == 200:
74
+ data = response.json()
75
+ extract = data.get("extract", "")
76
+ if extract:
77
+ return f"Wikipedia: {extract[:500]}..."
78
  else:
79
+ return f"Wikipedia page found for '{query}' but no extract available."
80
+ else:
81
+ return f"Wikipedia search failed for '{query}'"
82
+
83
+ except Exception as e:
84
+ return f"Wikipedia search error: {str(e)}"
85
 
86
+
87
+ @tool
88
+ def execute_python(code: str) -> str:
89
+ """Execute Python code and return the result."""
90
+ try:
91
+ # Create a safe execution environment
92
+ safe_globals = {
93
+ '__builtins__': {
94
+ 'print': print,
95
+ 'len': len,
96
+ 'str': str,
97
+ 'int': int,
98
+ 'float': float,
99
+ 'bool': bool,
100
+ 'list': list,
101
+ 'dict': dict,
102
+ 'tuple': tuple,
103
+ 'set': set,
104
+ 'range': range,
105
+ 'sum': sum,
106
+ 'max': max,
107
+ 'min': min,
108
+ 'abs': abs,
109
+ 'round': round,
110
+ 'sorted': sorted,
111
+ 'enumerate': enumerate,
112
+ 'zip': zip,
113
+ },
114
+ 'math': __import__('math'),
115
+ 'json': __import__('json'),
116
+ 'datetime': __import__('datetime'),
117
+ 'random': __import__('random'),
118
+ }
119
+
120
+ # Capture output
121
+ import io
122
+ import sys
123
+
124
+ old_stdout = sys.stdout
125
+ sys.stdout = mystdout = io.StringIO()
126
+
127
  try:
128
+ # Execute the code
129
+ exec(code, safe_globals)
130
+ output = mystdout.getvalue()
131
+ finally:
132
+ sys.stdout = old_stdout
133
+
134
+ return output if output else "Code executed successfully (no output)"
135
+
136
+ except Exception as e:
137
+ return f"Python execution error: {str(e)}"
138
 
 
 
 
 
 
 
 
139
 
140
+ @tool
141
+ def read_excel_file(file_path: str, sheet_name: Optional[str] = None) -> str:
142
+ """Read an Excel file and return its contents as a formatted string."""
143
+ try:
144
+ file_path_obj = Path(file_path)
145
+ if not file_path_obj.exists():
146
+ return f"Error: File not found at {file_path}"
147
+
148
+ # Try to read the Excel file
149
+ if sheet_name and sheet_name.isdigit():
150
+ sheet_name = int(sheet_name)
151
+ elif sheet_name is None:
152
+ sheet_name = 0
153
+
154
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
155
+
156
+ # Convert to string representation
157
+ if len(df) > 20:
158
+ # Show first 10 and last 10 rows for large datasets
159
+ result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
160
+ result += "First 10 rows:\n"
161
+ result += df.head(10).to_string(index=False)
162
+ result += f"\n\n... ({len(df) - 20} rows omitted) ...\n\n"
163
+ result += "Last 10 rows:\n"
164
+ result += df.tail(10).to_string(index=False)
165
+ else:
166
+ result = f"Excel file with {len(df)} rows and {len(df.columns)} columns:\n\n"
167
+ result += df.to_string(index=False)
168
+
169
+ return result
170
+
171
+ except Exception as e:
172
+ return f"Error reading Excel file: {str(e)}"
173
+
174
+
175
+ @tool
176
+ def read_text_file(file_path: str) -> str:
177
+ """Read a text file and return its contents."""
178
+ try:
179
+ file_path_obj = Path(file_path)
180
+ if not file_path_obj.exists():
181
+ return f"Error: File not found at {file_path}"
182
+
183
+ # Try different encodings
184
+ encodings = ['utf-8', 'utf-16', 'iso-8859-1', 'cp1252']
185
+
186
+ for encoding in encodings:
187
+ try:
188
+ with open(file_path_obj, 'r', encoding=encoding) as f:
189
+ content = f.read()
190
+ return f"File content ({encoding} encoding):\n\n{content}"
191
+ except UnicodeDecodeError:
192
+ continue
193
+
194
+ return f"Error: Could not decode file with any standard encoding"
195
+
196
+ except Exception as e:
197
+ return f"Error reading file: {str(e)}"
198
+
199
+
200
+ class GaiaAgent:
201
+ """LangGraph-based agent for GAIA tasks using OpenRouter DeepSeek"""
202
+
203
+ def __init__(self):
204
+ print("Initializing GaiaAgent with LangGraph and OpenRouter DeepSeek...")
205
+
206
+ # Initialize the LLM
207
+ self.llm = OpenRouterLLM(
208
+ model="deepseek/deepseek-v3.1-terminus",
209
+ temperature=0.1,
210
+ max_tokens=2000
211
+ )
212
+
213
+ # Define available tools
214
+ self.tools = [
215
+ search_web,
216
+ search_wikipedia,
217
+ execute_python,
218
+ read_excel_file,
219
+ read_text_file,
220
+ ]
221
+
222
+ # Create the agent
223
+ self.agent = create_react_agent(
224
+ self.llm,
225
+ self.tools,
226
+ state_modifier=self._get_system_prompt()
227
  )
228
+
229
+ print("GaiaAgent initialized successfully!")
230
+
231
+ def _get_system_prompt(self) -> str:
232
+ """Get the system prompt for the agent"""
233
+ return """You are an advanced AI agent designed to answer complex questions that may require:
234
+
235
+ 1. Web searches for current information
236
+ 2. Mathematical calculations using Python
237
+ 3. File analysis (Excel, text files)
238
+ 4. Multi-step reasoning and problem solving
239
+
240
+ For GAIA evaluation:
241
+ - Provide EXACT, DIRECT answers
242
+ - Use tools when necessary to gather information or perform calculations
243
+ - For math problems, show your calculation but end with just the number
244
+ - For yes/no questions, answer just "Yes" or "No"
245
+ - For factual questions, provide just the fact
246
+
247
+ When you encounter files:
248
+ - Use read_excel_file for .xlsx, .xls files
249
+ - Use read_text_file for text-based files
250
+ - Analyze the file content to answer the question
251
+
252
+ Be thorough in your analysis but concise in your final answer."""
253
+
254
+ def __call__(self, task_id: str, question: str) -> str:
255
+ """Process a question and return the answer"""
256
  try:
257
+ print(f"Processing task {task_id}: {question[:100]}...")
258
+
259
+ # Create the input state
260
+ messages = [HumanMessage(content=question)]
261
+
262
+ # Run the agent
263
+ result = self.agent.invoke({"messages": messages})
264
+
265
+ # Extract the final answer
266
+ final_message = result["messages"][-1]
267
+ answer = final_message.content
268
+
269
+ # Clean up the answer for GAIA evaluation
270
+ clean_answer = self._clean_answer(answer)
271
+
272
+ print(f"Agent answer for {task_id}: {clean_answer}")
273
+ return clean_answer
274
+
275
  except Exception as e:
276
+ error_msg = f"Agent error: {str(e)}"
277
+ print(f"Error processing task {task_id}: {error_msg}")
278
+ return error_msg
279
+
280
+ def _clean_answer(self, answer: str) -> str:
281
+ """Clean the answer to extract the final result"""
282
+ answer = answer.strip()
283
+
284
+ # Look for "Final Answer:" pattern
285
+ if "final answer:" in answer.lower():
286
+ parts = re.split(r'final answer:', answer, flags=re.IGNORECASE)
287
+ if len(parts) > 1:
288
+ answer = parts[-1].strip()
289
+
290
+ # Remove common prefixes
291
+ prefixes = [
292
+ "The answer is", "Answer:", "Result:", "Solution:",
293
+ "Based on", "Therefore", "In conclusion", "So the answer is"
294
+ ]
295
+
296
+ for prefix in prefixes:
297
+ if answer.lower().startswith(prefix.lower()):
298
+ answer = answer[len(prefix):].strip()
299
+ if answer.startswith(':'):
300
+ answer = answer[1:].strip()
301
+ break
302
+
303
+ # Remove quotes and periods from short answers
304
+ if len(answer.split()) <= 3:
305
+ answer = answer.strip('"\'.')
306
+
307
+ return answer