Yerma commited on
Commit
481fea5
·
1 Parent(s): 81917a3
Files changed (3) hide show
  1. agent.py +333 -0
  2. app.py +118 -9
  3. requirements.txt +18 -1
agent.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import io
4
+ import contextlib
5
+ import requests
6
+ import base64
7
+ import zipfile
8
+ import json
9
+ from typing import TypedDict, Annotated
10
+
11
+ from langgraph.graph import StateGraph, START
12
+ from langgraph.graph.message import add_messages
13
+ from langgraph.prebuilt import ToolNode, tools_condition
14
+
15
+ from langchain_openai import ChatOpenAI
16
+ from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
17
+ from langchain_core.tools import tool
18
+ from pydantic import BaseModel, Field
19
+
20
+ from dotenv import load_dotenv
21
+ load_dotenv()
22
+
23
+
24
+
25
+
26
+ SYSTEM_PROMPT = """You are a research agent solving questions from the GAIA benchmark.
27
+
28
+ WORKFLOW:
29
+ 1. Analyze the question carefully before acting.
30
+ 2. If the question contains reversed text, reverse it back first using python_executor.
31
+ 3. If the question references a file (Excel, CSV, Python, etc.), use read_file to read it.
32
+ 4. If the question references an image file, use analyze_image to look at it.
33
+ 5. If the question references an audio/mp3 file, use transcribe_audio to get the text.
34
+ 6. If the question requires math or logic, use python_executor.
35
+ 7. If the question asks about a YouTube video, first try youtube_transcript. If that fails, use web_search.
36
+ 8. Use web_search or wikipedia_search for factual questions.
37
+ 9. If you find a URL that might have the answer, use fetch_webpage to read it.
38
+
39
+
40
+ RULES:
41
+ - NEVER call the same tool with the same query twice.
42
+ - If a tool fails, try a DIFFERENT approach.
43
+ - For math/logic problems with tables, use python_executor to check ALL pairs systematically.
44
+
45
+ - For math — ALWAYS use python_executor, never calculate in your head.
46
+ - Keep search queries short: 2-5 words.
47
+ - NEVER say "I cannot access" or "I'm unable to" — always try tools first, then give your best guess.
48
+ - For botany questions: bell peppers, corn, green beans, zucchini, tomatoes, pumpkins are botanical FRUITS, not vegetables.
49
+
50
+
51
+ CRITICAL — ANSWER FORMAT:
52
+ Your response must end with exactly:
53
+ FINAL ANSWER: [your answer]
54
+
55
+ The answer must be:
56
+ - CONCISE: a number, name, date, or short phrase
57
+ - EXACT: no extra words like "The answer is..."
58
+ - If a number: just the number
59
+ - If a name: just the name
60
+ - If a list: comma-separated values
61
+ """
62
+
63
+ MAX_TOOL_CALLS = 10
64
+ RECURSION_LIMIT = 40
65
+
66
+ @tool
67
+ def web_search(query: str) -> str:
68
+ """Search the web for current events, facts, people, etc.
69
+ Args:
70
+ query: search query string (keep it short and specific)
71
+ """
72
+ try:
73
+ from langchain_tavily import TavilySearch
74
+ search = TavilySearch(max_results=3)
75
+ results = search.invoke(query)
76
+
77
+ # TavilySearch возвращает list of dicts или string
78
+ if isinstance(results, list):
79
+ formatted = []
80
+ for r in results:
81
+ url = r.get("url", "")
82
+ content = r.get("content", "")
83
+ formatted.append(f"Source: {url}\n{content}")
84
+ return "\n\n---\n\n".join(formatted)[:5000]
85
+ return str(results)[:5000]
86
+ except Exception as e:
87
+ return f"Search failed: {e}"
88
+
89
+ @tool
90
+ def wikipedia_search(query: str) -> str:
91
+ """Search Wikipedia for factual information about people, places, history, science.
92
+ Args:
93
+ query: topic to search on Wikipedia
94
+ """
95
+ try:
96
+ from langchain_community.utilities import WikipediaAPIWrapper
97
+ wiki = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=4000)
98
+ return wiki.run(query)
99
+ except Exception as e:
100
+ return f"Wikipedia search failed: {e}"
101
+
102
+
103
+ @tool
104
+ def arxiv_search(query: str) -> str:
105
+ """Search academic papers on ArXiv for scientific/research questions.
106
+ Args:
107
+ query: search query for academic papers
108
+ """
109
+ try:
110
+ from langchain_community.document_loaders import ArxivLoader
111
+ docs = ArxivLoader(query=query, load_max_docs=2).load()
112
+ results = []
113
+ for doc in docs:
114
+ title = doc.metadata.get("Title", "No title")
115
+ results.append(f"**{title}**\n{doc.page_content[:1500]}")
116
+ return "\n\n---\n\n".join(results) if results else "No results found."
117
+ except Exception as e:
118
+ return f"ArXiv search failed: {e}"
119
+
120
+
121
+ @tool
122
+ def fetch_webpage(url: str) -> str:
123
+ """Fetch and read content from a URL/webpage.
124
+ Args:
125
+ url: full URL to fetch
126
+ """
127
+ try:
128
+ headers = {"User-Agent": "Mozilla/5.0"}
129
+ resp = requests.get(url, headers=headers, timeout=15)
130
+ resp.raise_for_status()
131
+
132
+ from bs4 import BeautifulSoup
133
+ soup = BeautifulSoup(resp.text, "html.parser")
134
+
135
+ for tag in soup(["script", "style", "nav", "footer", "header"]):
136
+ tag.decompose()
137
+ text = soup.get_text(separator="\n", strip=True)
138
+ return text[:8000]
139
+ except Exception as e:
140
+ return f"Failed to fetch URL: {e}"
141
+
142
+ python_state = {
143
+ "__builtins__": __builtins__,
144
+ "import_module": __import__
145
+ }
146
+
147
+ @tool
148
+ def python_executor(code: str) -> str:
149
+ """
150
+ Execute Python code with persistent state across calls.
151
+ Use print() to see results. All variables are saved for the next call.
152
+ """
153
+ # Очистка кода от Markdown-оберток, если модель их добавила
154
+ code = re.sub(r'^```python\n|```$', '', code, flags=re.MULTILINE)
155
+
156
+ output = io.StringIO()
157
+ try:
158
+ with contextlib.redirect_stdout(output):
159
+ # Используем один и тот же словарь python_state
160
+ exec(code, python_state)
161
+
162
+ result = output.getvalue().strip()
163
+ if not result:
164
+ return "Code executed successfully, but produced no output. Remember to use print()."
165
+ return result
166
+ except Exception as e:
167
+ return f"Python Error: {str(e)}"
168
+
169
+ @tool
170
+ def read_file(file_path: str) -> str:
171
+ """
172
+ Read content of files: TXT, CSV, JSON, PY, XLSX, PDF, or ZIP.
173
+ For ZIP: lists files inside. For PDF: extracts text.
174
+ For Tables: returns a summary and first 10 rows.
175
+ """
176
+ if not os.path.exists(file_path):
177
+ return f"Error: File '{file_path}' not found."
178
+
179
+ ext = file_path.lower().split('.')[-1]
180
+
181
+ try:
182
+ # 1. Таблицы (Excel, CSV)
183
+ if ext in ['xlsx', 'xls', 'csv']:
184
+ import pandas as pd
185
+ df = pd.read_excel(file_path) if ext.startswith('xls') else pd.read_csv(file_path)
186
+ summary = f"Rows: {len(df)}, Columns: {df.columns.tolist()}\n"
187
+ return summary + df.head(15).to_string()
188
+
189
+ # 2. PDF (через PyMuPDF / fitz)
190
+ elif ext == 'pdf':
191
+ import fitz
192
+ doc = fitz.open(file_path)
193
+ text = []
194
+ for i, page in enumerate(doc[:10]): # Ограничимся 10 страницами
195
+ text.append(f"--- Page {i+1} ---\n{page.get_text()}")
196
+ return "\n".join(text)[:15000]
197
+
198
+ # 3. ZIP-архивы
199
+ elif ext == 'zip':
200
+ with zipfile.ZipFile(file_path, 'r') as z:
201
+ files = z.namelist()
202
+ return f"ZIP Archive contains: {files}. Use python_executor to extract if needed."
203
+
204
+ # 4. JSON
205
+ elif ext == 'json':
206
+ with open(file_path, 'r', encoding='utf-8') as f:
207
+ data = json.load(f)
208
+ return json.dumps(data, indent=2)[:10000]
209
+
210
+ # 5. Обычный текст
211
+ else:
212
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
213
+ return f.read(15000) # Читаем первые 15к символов
214
+
215
+ except Exception as e:
216
+ return f"Error processing file {file_path}: {str(e)}"
217
+
218
+
219
+ @tool
220
+ def analyze_image(image_path: str, question: str) -> str:
221
+ """Analyze an image using GPT-4o vision. Use for photos, charts, chess positions, diagrams.
222
+ Args:
223
+ image_path: path to the image file (png, jpg, etc.)
224
+ question: what you want to know about the image
225
+ """
226
+ try:
227
+ with open(image_path, "rb") as f:
228
+ image_data = base64.b64encode(f.read()).decode("utf-8")
229
+
230
+ # Determine mime type
231
+ ext = image_path.lower().split(".")[-1]
232
+ mime_map = {"png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg", "gif": "image/gif", "webp": "image/webp"}
233
+ mime_type = mime_map.get(ext, "image/png")
234
+
235
+ from openai import OpenAI
236
+ client = OpenAI()
237
+ response = client.chat.completions.create(
238
+ model="gpt-4o",
239
+ messages=[
240
+ {
241
+ "role": "user",
242
+ "content": [
243
+ {"type": "text", "text": question},
244
+ {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_data}"}},
245
+ ],
246
+ }
247
+ ],
248
+ max_tokens=1000,
249
+ )
250
+ return response.choices[0].message.content
251
+ except Exception as e:
252
+ return f"Image analysis failed: {e}"
253
+
254
+
255
+ @tool
256
+ def transcribe_audio(file_path: str) -> str:
257
+ """Transcribe an audio file (mp3, wav, m4a) to text using OpenAI Whisper.
258
+ Args:
259
+ file_path: path to the audio file
260
+ """
261
+ try:
262
+ from openai import OpenAI
263
+ client = OpenAI()
264
+ with open(file_path, "rb") as f:
265
+ transcription = client.audio.transcriptions.create(
266
+ model="whisper-1",
267
+ file=f,
268
+ )
269
+ return transcription.text[:8000]
270
+ except Exception as e:
271
+ return f"Transcription failed: {e}"
272
+
273
+
274
+ llm_fast = ChatOpenAI(model="gpt-4o-mini", temperature=0) # основной агент
275
+ llm_strong = ChatOpenAI(model="gpt-4o", temperature=0)
276
+
277
+
278
+ tools = [
279
+ web_search,
280
+ wikipedia_search,
281
+ python_executor,
282
+ arxiv_search,
283
+ read_file,
284
+ fetch_webpage,
285
+ analyze_image,
286
+ transcribe_audio,
287
+ ]
288
+
289
+ llm_with_tools = llm_fast.bind_tools(tools)
290
+
291
+
292
+ class AgentState(TypedDict):
293
+ messages: Annotated[list[AnyMessage], add_messages]
294
+
295
+ def assistant(state: AgentState):
296
+ tool_count = sum(1 for msg in state["messages"] if msg.type == "tool")
297
+
298
+ if tool_count >= MAX_TOOL_CALLS:
299
+ force = SystemMessage(
300
+ content="Provide your FINAL ANSWER now. Format: FINAL ANSWER: [answer]."
301
+ )
302
+ return {"messages": [llm_fast.invoke(state["messages"] + [force])]}
303
+
304
+ return {"messages": [llm_with_tools.invoke(state["messages"])]}
305
+
306
+
307
+ class FinalAnswer(BaseModel):
308
+ answer: str = Field(description="The exact final answer — concise, no extra words")
309
+
310
+
311
+ answer_extractor = llm_fast.with_structured_output(FinalAnswer)
312
+
313
+ def agent_func():
314
+
315
+
316
+ builder = StateGraph(AgentState)
317
+
318
+ # Define nodes: these do the work
319
+ builder.add_node("assistant", assistant)
320
+ builder.add_node("tools", ToolNode(tools, handle_tool_errors=True))
321
+
322
+ # Define edges: these determine how the control flow moves
323
+ builder.add_edge(START, "assistant")
324
+ builder.add_conditional_edges(
325
+ "assistant",
326
+ # If the latest message requires a tool, route to tools
327
+ # Otherwise, provide a direct response
328
+ tools_condition,
329
+ )
330
+ builder.add_edge("tools", "assistant")
331
+ alfred = builder.compile()
332
+
333
+ return alfred
app.py CHANGED
@@ -1,24 +1,125 @@
 
1
  import os
2
  import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
-
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
13
  class BasicAgent:
14
  def __init__(self):
 
 
 
 
 
 
 
15
  print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -80,12 +181,20 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
 
1
+
2
  import os
3
  import gradio as gr
4
  import requests
5
  import inspect
6
  import pandas as pd
7
+ from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
8
+
9
+ from agent import agent_func, SYSTEM_PROMPT, answer_extractor
10
+ import re
11
+ import time
12
+ import csv
13
+ from datetime import datetime
14
 
15
  # (Keep Constants as is)
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
+ RECURSION_LIMIT = 40
19
  # --- Basic Agent Definition ---
20
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
21
+
22
  class BasicAgent:
23
  def __init__(self):
24
+ self.agent = agent_func()
25
+ self.log_file = f"logs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
26
+ # Создаём файл с заголовками
27
+ with open(self.log_file, "w", newline="", encoding="utf-8") as f:
28
+ writer = csv.writer(f)
29
+ writer.writerow(["task_id", "question", "raw_answer", "final_answer", "duration_sec", "error"])
30
+ print(f"Logging to {self.log_file}")
31
  print("BasicAgent initialized.")
 
 
 
 
 
32
 
33
+ def __call__(self, question: str, task_id: str = None) -> str:
34
+ from agent import SYSTEM_PROMPT, answer_extractor
35
+ import time
36
+
37
+ start = time.time()
38
+ error = ""
39
+ raw = ""
40
+ final = ""
41
+
42
+ # Скачиваем файл если есть
43
+ file_info = ""
44
+ if task_id:
45
+ try:
46
+ file_path = self._download_file(task_id)
47
+ if file_path:
48
+ file_info = (
49
+ f"\n\n[Attached file downloaded to: {file_path}. "
50
+ f"Use the appropriate tool: read_file for text/excel/csv/python, "
51
+ f"analyze_image for images, transcribe_audio for mp3/wav.]"
52
+ )
53
+ except Exception as e:
54
+ error = f"File download: {e}"
55
+
56
+ messages = [
57
+ SystemMessage(content=SYSTEM_PROMPT),
58
+ HumanMessage(content=question + file_info),
59
+ ]
60
+
61
+ try:
62
+ response = self.agent.invoke(
63
+ {"messages": messages},
64
+ config={"recursion_limit": RECURSION_LIMIT},
65
+ )
66
+ raw = response["messages"][-1].content.strip()
67
+ except Exception as e:
68
+ error = str(e)
69
+ raw = f"Error: {e}"
70
+
71
+ # Extract clean answer
72
+ match = re.search(r"FINAL ANSWER:\s*(.+)", raw, re.IGNORECASE | re.DOTALL)
73
+ if match:
74
+ final = match.group(1).strip()
75
+ else:
76
+ try:
77
+ structured = answer_extractor.invoke(
78
+ f"Question: {question}\nResponse: {raw}\n"
79
+ f"Extract ONLY the final answer."
80
+ )
81
+ final = structured.answer.strip()
82
+ except Exception:
83
+ final = raw
84
+
85
+ duration = round(time.time() - start, 1)
86
+
87
+ # Записываем лог
88
+ with open(self.log_file, "a", newline="", encoding="utf-8") as f:
89
+ writer = csv.writer(f)
90
+ writer.writerow([task_id, question[:200], raw[:500], final, duration, error])
91
+
92
+ print(f"[{duration}s] Q: {question[:80]}...")
93
+ print(f" Raw: {raw[:150]}")
94
+ print(f" Final: {final}")
95
+
96
+ return final
97
+
98
+ def _download_file(self, task_id: str) -> str:
99
+ api_url = DEFAULT_API_URL
100
+ url = f"{api_url}/files/{task_id}"
101
+ try:
102
+ resp = requests.get(url, timeout=15)
103
+ print(f" File request for {task_id}: status={resp.status_code}")
104
+ if resp.status_code != 200:
105
+ print(f" No file for this task")
106
+ return None
107
+ cd = resp.headers.get("Content-Disposition", "")
108
+ filename = "attached_file"
109
+ if "filename=" in cd:
110
+ filename = cd.split("filename=")[-1].strip('"').strip("'")
111
+
112
+ file_path = os.path.join("/tmp", filename)
113
+ with open(file_path, "wb") as f:
114
+ f.write(resp.content)
115
+
116
+ size = len(resp.content)
117
+ print(f"Downloaded: {file_path} ({size} bytes)")
118
+ return file_path
119
+ except Exception as e:
120
+ print(f"File download error: {e}")
121
+ return None
122
+
123
  def run_and_submit_all( profile: gr.OAuthProfile | None):
124
  """
125
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
181
  print(f"Skipping item with missing task_id or question: {item}")
182
  continue
183
  try:
184
+ submitted_answer = agent(question_text, task_id=task_id)
185
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
186
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
187
+ time.sleep(3) # пауза между вопросами чтобы не упереться в лимит
188
  except Exception as e:
189
+ print(f"Error on task {task_id}: {e}")
190
+ time.sleep(5) # больше пауза после ошибки
191
+ # Retry once
192
+ try:
193
+ submitted_answer = agent(question_text, task_id=task_id)
194
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
195
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
196
+ except Exception as e2:
197
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e2}"})
198
 
199
  if not answers_payload:
200
  print("Agent did not produce any answers to submit.")
requirements.txt CHANGED
@@ -1,2 +1,19 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ langchain
4
+ langchain-community
5
+ langchain-core
6
+ langchain-openai
7
+ langchain-google-genai
8
+ langchain-huggingface
9
+ langchain-groq
10
+ langchain-tavily
11
+ langgraph
12
+ huggingface_hub
13
+ supabase
14
+ arxiv
15
+ pymupdf
16
+ wikipedia
17
+ pgvector
18
+ python-dotenv
19
+ "gradio[oauth]"