naclfish Claude Sonnet 4.6 commited on
Commit
2628a0b
·
1 Parent(s): 81917a3

Add tools/ folder, fix agent answer format and Wikipedia proxy

Browse files

- Add tools/ with web_search (Serper), wikipedia_search, python_repl, file_handler
- Fix Wikipedia 403 by adding User-Agent header
- Fix answer format: enforce ENTIRE response = only final answer
- Fix list output: comma-separated, no Python brackets
- Add [Task ID] prefix to questions so agent uses correct task_id
- Increase max iterations from 10 to 15
- Add .gitignore to protect .env, data/, __pycache__

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ data/
6
+ 记忆.md
app.py CHANGED
@@ -1,23 +1,150 @@
1
  import os
 
 
 
2
  import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -51,7 +178,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
51
  # 2. Fetch Questions
52
  print(f"Fetching questions from: {questions_url}")
53
  try:
54
- response = requests.get(questions_url, timeout=15)
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
@@ -80,7 +207,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
 
 
 
 
 
 
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
@@ -99,7 +232,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
99
  # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
  response.raise_for_status()
104
  result_data = response.json()
105
  final_status = (
 
1
  import os
2
+ import json
3
+ from dotenv import load_dotenv
4
+ load_dotenv()
5
  import gradio as gr
6
  import requests
7
  import inspect
8
  import pandas as pd
9
+ from tools import web_search, wikipedia_search, python_repl, download_and_read_file
10
+ from tools.file_handler import prefetch_file
11
 
12
  # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
+ TOOL_FUNCTIONS = {
17
+ "web_search": web_search,
18
+ "wikipedia_search": wikipedia_search,
19
+ "python_repl": python_repl,
20
+ "download_and_read_file": download_and_read_file,
21
+ }
22
+
23
+ TOOL_SCHEMAS = [
24
+ {
25
+ "type": "function",
26
+ "function": {
27
+ "name": "web_search",
28
+ "description": "Search Google for current information. Use for factual questions, recent events, or any topic requiring web search.",
29
+ "parameters": {
30
+ "type": "object",
31
+ "properties": {"query": {"type": "string", "description": "Search query"}},
32
+ "required": ["query"],
33
+ },
34
+ },
35
+ },
36
+ {
37
+ "type": "function",
38
+ "function": {
39
+ "name": "wikipedia_search",
40
+ "description": "Search Wikipedia for encyclopedic or historical information about a topic.",
41
+ "parameters": {
42
+ "type": "object",
43
+ "properties": {"query": {"type": "string", "description": "Topic to search"}},
44
+ "required": ["query"],
45
+ },
46
+ },
47
+ },
48
+ {
49
+ "type": "function",
50
+ "function": {
51
+ "name": "python_repl",
52
+ "description": "Execute Python code for math, calculations, data analysis, or logic. Use print() to output results.",
53
+ "parameters": {
54
+ "type": "object",
55
+ "properties": {"code": {"type": "string", "description": "Python code to execute"}},
56
+ "required": ["code"],
57
+ },
58
+ },
59
+ },
60
+ {
61
+ "type": "function",
62
+ "function": {
63
+ "name": "download_and_read_file",
64
+ "description": "Download and read a file attachment (CSV, Excel, text) for a given task_id from the question.",
65
+ "parameters": {
66
+ "type": "object",
67
+ "properties": {"task_id": {"type": "string", "description": "The task_id of the question"}},
68
+ "required": ["task_id"],
69
+ },
70
+ },
71
+ },
72
+ ]
73
+
74
+ SYSTEM_PROMPT = (
75
+ "You are a precise research assistant. Solve each question step by step using tools.\n\n"
76
+
77
+ "STRICT RULES:\n"
78
+ "1. NEVER search for 'GAIA benchmark', 'GAIA answer', 'HuggingFace discussion', or any meta-search for pre-solved answers. Solve the problem yourself.\n"
79
+ "2. For ANY text manipulation (reversing, encoding, counting characters, etc.), ALWAYS use python_repl — never guess by eye.\n"
80
+ "3. Keep search queries SHORT and targeted (under 8 words). Never enumerate values (e.g. years) in one query.\n"
81
+ "4. If you have an [Attached file content] section, read it directly — do NOT call download_and_read_file again.\n"
82
+ "5. OUTPUT FORMAT: Your ENTIRE response must be ONLY the final answer — no explanation, no reasoning, no 'The answer is', no preamble. A single word, number, or comma-separated list. Nothing else.\n"
83
+ "6. Numbers: digits only (e.g. '42', not 'forty-two'). Names: as they appear in the source.\n"
84
+ "7. If a question involves reversing or encoding text, use python_repl to decode it first before reasoning.\n"
85
+ "8. Lists: output as plain comma-separated values (e.g. 'apple, banana, cherry') — NO brackets, NO quotes, NO Python syntax.\n"
86
+ "9. If the question has a [Task ID: xxx] prefix, use that exact value when calling download_and_read_file.\n"
87
+ "10. If an attached file is audio/image/video (marked [UNSUPPORTED]), do NOT call download_and_read_file — use web_search to find the answer instead.\n"
88
+ )
89
+
90
  # --- Basic Agent Definition ---
91
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
92
  class BasicAgent:
93
  def __init__(self):
94
+ self.api_key = os.getenv("DEEPSEEK_API_KEY")
95
+ self.model = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
96
+ self.api_url = "https://www.ggwk1.online/v1/chat/completions"
97
+ print("BasicAgent initialized with DeepSeek (native HTTP).")
98
+
99
+ def _call_llm(self, messages: list) -> dict:
100
+ headers = {
101
+ "Authorization": f"Bearer {self.api_key}",
102
+ "Content-Type": "application/json",
103
+ }
104
+ payload = {
105
+ "model": self.model,
106
+ "messages": messages,
107
+ "tools": TOOL_SCHEMAS,
108
+ "tool_choice": "auto",
109
+ "temperature": 0,
110
+ }
111
+ response = requests.post(
112
+ self.api_url, headers=headers, json=payload, timeout=60,
113
+ proxies={"http": None, "https": None}
114
+ )
115
+ response.raise_for_status()
116
+ return response.json()
117
+
118
  def __call__(self, question: str) -> str:
119
  print(f"Agent received question (first 50 chars): {question[:50]}...")
120
+ messages = [
121
+ {"role": "system", "content": SYSTEM_PROMPT},
122
+ {"role": "user", "content": question},
123
+ ]
124
+ for _ in range(15): # max iterations
125
+ result = self._call_llm(messages)
126
+ choice = result["choices"][0]
127
+ message = choice["message"]
128
+ messages.append(message)
129
+
130
+ if choice["finish_reason"] == "tool_calls":
131
+ for tool_call in message.get("tool_calls", []):
132
+ fn_name = tool_call["function"]["name"]
133
+ fn_args = json.loads(tool_call["function"]["arguments"])
134
+ print(f" -> Tool: {fn_name}({fn_args})")
135
+ tool_result = TOOL_FUNCTIONS[fn_name](**fn_args)
136
+ print(f" <- Result (first 200): {str(tool_result)[:200]}")
137
+ messages.append({
138
+ "role": "tool",
139
+ "tool_call_id": tool_call["id"],
140
+ "content": str(tool_result),
141
+ })
142
+ else:
143
+ answer = message.get("content", "")
144
+ print(f"Agent answer: {answer}")
145
+ return answer
146
+
147
+ return "Max iterations reached without a final answer."
148
 
149
  def run_and_submit_all( profile: gr.OAuthProfile | None):
150
  """
 
178
  # 2. Fetch Questions
179
  print(f"Fetching questions from: {questions_url}")
180
  try:
181
+ response = requests.get(questions_url, timeout=15, proxies={"http": None, "https": None})
182
  response.raise_for_status()
183
  questions_data = response.json()
184
  if not questions_data:
 
207
  print(f"Skipping item with missing task_id or question: {item}")
208
  continue
209
  try:
210
+ # Pre-fetch file attachment and embed content directly in question
211
+ file_content = prefetch_file(task_id)
212
+ if file_content:
213
+ full_question = f"[Task ID: {task_id}]\n{question_text}\n\n[Attached file content]:\n{file_content}"
214
+ else:
215
+ full_question = f"[Task ID: {task_id}]\n{question_text}"
216
+ submitted_answer = agent(full_question)
217
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
218
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
219
  except Exception as e:
 
232
  # 5. Submit
233
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
234
  try:
235
+ response = requests.post(submit_url, json=submission_data, timeout=60, proxies={"http": None, "https": None})
236
  response.raise_for_status()
237
  result_data = response.json()
238
  final_status = (
requirements.txt CHANGED
@@ -1,2 +1,5 @@
1
  gradio
2
- requests
 
 
 
 
1
  gradio
2
+ requests
3
+ python-dotenv
4
+ pandas
5
+ openpyxl
test_agent.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Quick test script to debug a single question without launching Gradio.
3
+ Usage: python test_agent.py
4
+ """
5
+ import os
6
+ import json
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ from app import BasicAgent
11
+ from tools.file_handler import prefetch_file
12
+
13
+ agent = BasicAgent()
14
+
15
+ # --- Edit these to test any question ---
16
+ task_id = "test-task-id" # replace with real task_id if needed
17
+ question = 'If we reverse the word "tfel", what is the antonym of the result?'
18
+ # ----------------------------------------
19
+
20
+ file_content = prefetch_file(task_id)
21
+ if file_content:
22
+ full_question = f"{question}\n\n[Attached file content]:\n{file_content}"
23
+ print(f"[File found and attached, length={len(file_content)}]")
24
+ else:
25
+ full_question = question
26
+ print("[No file attachment]")
27
+
28
+ print(f"\nQuestion: {full_question[:200]}\n")
29
+ answer = agent(full_question)
30
+ print(f"\n=== Final Answer ===\n{answer}")
tools/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .search import web_search
2
+ from .wikipedia import wikipedia_search
3
+ from .calculator import python_repl
4
+ from .file_handler import download_and_read_file
5
+
6
+ __all__ = ["web_search", "wikipedia_search", "python_repl", "download_and_read_file"]
tools/_session.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import requests
2
+
3
+ # Shared session that ignores ALL system proxy environment variables
4
+ _session = requests.Session()
5
+ _session.trust_env = False
tools/calculator.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import io
3
+ import traceback
4
+
5
+
6
+ def python_repl(code: str) -> str:
7
+ """Execute Python code for calculations or data processing."""
8
+ stdout_capture = io.StringIO()
9
+ stderr_capture = io.StringIO()
10
+ local_vars = {}
11
+ try:
12
+ sys.stdout = stdout_capture
13
+ sys.stderr = stderr_capture
14
+ exec(compile(code, "<string>", "exec"), {"__builtins__": __builtins__}, local_vars)
15
+ except Exception:
16
+ sys.stdout = sys.__stdout__
17
+ sys.stderr = sys.__stderr__
18
+ return f"Error:\n{traceback.format_exc()}"
19
+ finally:
20
+ sys.stdout = sys.__stdout__
21
+ sys.stderr = sys.__stderr__
22
+ output = stdout_capture.getvalue()
23
+ err = stderr_capture.getvalue()
24
+ if err:
25
+ return f"Stderr:\n{err}\nStdout:\n{output}"
26
+ return output if output else "Code executed successfully (no output)."
tools/file_handler.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import pandas as pd
4
+ from tools._session import _session
5
+
6
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
7
+ DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
8
+ os.makedirs(DATA_DIR, exist_ok=True)
9
+
10
+
11
+ def _parse_file(file_path: str, content_bytes: bytes, ext: str) -> str:
12
+ """Parse file content based on extension and return as string."""
13
+ try:
14
+ if ext == ".csv":
15
+ df = pd.read_csv(file_path)
16
+ return f"CSV file ({len(df)} rows, {len(df.columns)} columns):\n{df.to_string(index=False)}"
17
+ elif ext in (".xlsx", ".xls"):
18
+ # Read all sheets
19
+ xl = pd.ExcelFile(file_path)
20
+ parts = []
21
+ for sheet in xl.sheet_names:
22
+ df = xl.parse(sheet)
23
+ parts.append(f"Sheet '{sheet}' ({len(df)} rows, {len(df.columns)} columns):\n{df.to_string(index=False)}")
24
+ return "\n\n".join(parts)
25
+ elif ext in (".py", ".txt", ".md", ".json", ".xml", ".html", ""):
26
+ return f"File contents:\n{content_bytes.decode('utf-8', errors='replace')[:5000]}"
27
+ else:
28
+ try:
29
+ return f"File contents:\n{content_bytes.decode('utf-8', errors='replace')[:5000]}"
30
+ except Exception:
31
+ return f"Binary file, cannot display as text. Size: {len(content_bytes)} bytes."
32
+ except Exception as e:
33
+ return f"Failed to parse file: {e}"
34
+
35
+
36
+ def prefetch_file(task_id: str) -> str | None:
37
+ """
38
+ Try to download the file for a task_id.
39
+ Returns parsed file content string if found, None if no attachment exists.
40
+ Caches file to data/ directory.
41
+ """
42
+ # Check cache first
43
+ cached = [f for f in os.listdir(DATA_DIR) if f.startswith(task_id)]
44
+ if cached:
45
+ file_path = os.path.join(DATA_DIR, cached[0])
46
+ ext = os.path.splitext(cached[0])[-1].lower()
47
+ with open(file_path, "rb") as f:
48
+ content_bytes = f.read()
49
+ return _parse_file(file_path, content_bytes, ext)
50
+
51
+ file_url = f"{DEFAULT_API_URL}/files/{task_id}"
52
+ try:
53
+ response = _session.get(file_url, timeout=30)
54
+ if response.status_code == 404:
55
+ return None
56
+ response.raise_for_status()
57
+ except Exception:
58
+ return None
59
+
60
+ # Determine extension
61
+ ext = ""
62
+ cd = response.headers.get("content-disposition", "")
63
+ if "filename=" in cd:
64
+ fname = cd.split("filename=")[-1].strip().strip('"')
65
+ ext = os.path.splitext(fname)[-1].lower()
66
+ content_type = response.headers.get("content-type", "")
67
+ if not ext:
68
+ if "csv" in content_type:
69
+ ext = ".csv"
70
+ elif "excel" in content_type or "spreadsheet" in content_type or "openxmlformats" in content_type:
71
+ ext = ".xlsx"
72
+ elif "text" in content_type:
73
+ ext = ".txt"
74
+
75
+ # Save to data/
76
+ file_path = os.path.join(DATA_DIR, f"{task_id}{ext}")
77
+ with open(file_path, "wb") as f:
78
+ f.write(response.content)
79
+
80
+ return _parse_file(file_path, response.content, ext)
81
+
82
+
83
+ def download_and_read_file(task_id: str) -> str:
84
+ """Download and read a file attachment for a given task_id.
85
+ Supports CSV, Excel (.xlsx/.xls), and plain text files.
86
+ """
87
+ result = prefetch_file(task_id)
88
+ if result is None:
89
+ return "No file attachment found for this task."
90
+ return result
tools/search.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from tools._session import _session
4
+
5
+
6
+ def web_search(query: str) -> str:
7
+ """Search Google via Serper API for current information.
8
+ Use this for factual questions, recent events, or anything requiring web search.
9
+ """
10
+ api_key = os.getenv("SERPER_API_KEY")
11
+ if not api_key:
12
+ return "SERPER_API_KEY not set."
13
+ try:
14
+ response = _session.post(
15
+ "https://google.serper.dev/search",
16
+ headers={"X-API-KEY": api_key, "Content-Type": "application/json"},
17
+ data=json.dumps({"q": query, "num": 5}),
18
+ timeout=15,
19
+ )
20
+ response.raise_for_status()
21
+ data = response.json()
22
+
23
+ parts = []
24
+ # Answer box (direct answer)
25
+ if "answerBox" in data:
26
+ ab = data["answerBox"]
27
+ answer = ab.get("answer") or ab.get("snippet") or ""
28
+ if answer:
29
+ parts.append(f"Direct answer: {answer}")
30
+
31
+ # Organic results
32
+ for r in data.get("organic", [])[:5]:
33
+ title = r.get("title", "")
34
+ link = r.get("link", "")
35
+ snippet = r.get("snippet", "")
36
+ parts.append(f"Title: {title}\nURL: {link}\nSnippet: {snippet}")
37
+
38
+ return "\n---\n".join(parts) if parts else "No results found."
39
+ except Exception as e:
40
+ return f"Search error: {e}"
tools/wikipedia.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+
4
+ # Wikipedia needs the local proxy to be accessible from China
5
+ _PROXY = os.getenv("LOCAL_PROXY", "http://127.0.0.1:7890")
6
+ _wiki_session = requests.Session()
7
+ _wiki_session.proxies = {"http": _PROXY, "https": _PROXY}
8
+ _wiki_session.trust_env = False
9
+ _wiki_session.headers.update({
10
+ "User-Agent": "Mozilla/5.0 (compatible; ResearchAgent/1.0; educational use)"
11
+ })
12
+
13
+
14
+ def wikipedia_search(query: str) -> str:
15
+ """Search Wikipedia for encyclopedic information."""
16
+ # Use Wikipedia REST API directly
17
+ search_url = "https://en.wikipedia.org/w/api.php"
18
+ try:
19
+ # Step 1: search for page title
20
+ search_resp = _wiki_session.get(search_url, timeout=15, params={
21
+ "action": "query",
22
+ "list": "search",
23
+ "srsearch": query,
24
+ "srlimit": 3,
25
+ "format": "json",
26
+ })
27
+ search_resp.raise_for_status()
28
+ results = search_resp.json().get("query", {}).get("search", [])
29
+ if not results:
30
+ return "No Wikipedia articles found."
31
+
32
+ # Step 2: fetch content of top result
33
+ title = results[0]["title"]
34
+ content_resp = _wiki_session.get(search_url, timeout=15, params={
35
+ "action": "query",
36
+ "titles": title,
37
+ "prop": "extracts",
38
+ "exintro": False,
39
+ "explaintext": True,
40
+ "format": "json",
41
+ })
42
+ content_resp.raise_for_status()
43
+ pages = content_resp.json().get("query", {}).get("pages", {})
44
+ page = next(iter(pages.values()))
45
+ text = page.get("extract", "")
46
+ if not text:
47
+ return f"Wikipedia article '{title}' has no extractable content."
48
+ url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
49
+ return f"Wikipedia: {title}\nURL: {url}\n\n{text[:3000]}"
50
+ except Exception as e:
51
+ return f"Wikipedia search failed: {e}"