dramella commited on
Commit
c1b893b
·
1 Parent(s): 81917a3

basic agent implementation

Browse files
Files changed (6) hide show
  1. .gitignore +5 -0
  2. agent.py +146 -0
  3. app.py +19 -9
  4. metadata.jsonl +0 -0
  5. requirements.txt +152 -2
  6. tools.py +510 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .env
2
+ *.ipynb
3
+ .venv
4
+ files
5
+ __pycache__/
agent.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chat_models import init_chat_model
2
+ from urllib.parse import urlparse
3
+ import os
4
+ import base64
5
+ import pandas as pd
6
+ import requests
7
+ from io import BytesIO, StringIO
8
+ from typing import Annotated
9
+ from tools import *
10
+ from typing_extensions import TypedDict
11
+
12
+ from langgraph.graph import StateGraph, START, END
13
+ from langgraph.graph.message import add_messages
14
+ from langchain_core.messages import SystemMessage
15
+
16
+ from langgraph.prebuilt import ToolNode, tools_condition
17
+
18
+ SUPPORTING_FILES_URL = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation/"
19
+
20
+ system_prompt = """You are a general AI assistant. I will ask you a question.
21
+
22
+ You must:
23
+ 1. Think step-by-step (invisibly to the user).
24
+ 2. End your visible answer with the final answer only — nothing else.
25
+
26
+ Rules for the final answer:
27
+ - If the answer is a number:
28
+ • No commas in the number.
29
+ • No units (e.g., $, %, km) unless the question explicitly asks for them.
30
+ - If the answer is a string:
31
+ • No articles ("a", "an", "the").
32
+ • No abbreviations (e.g., for city names).
33
+ • Write digits as plain words unless instructed otherwise.
34
+ - If the answer is a comma-separated list:
35
+ • Apply the above rules individually to each element.
36
+
37
+ IMPORTANT:
38
+ - Do not add any extra words before or after the final answer.
39
+ - Do not explain your reasoning to the user — keep it hidden.
40
+ - The output must be exactly the final answer following the above rules.
41
+
42
+ Examples:
43
+ Q: Who wrote the novel 1984?
44
+ A: George Orwell
45
+
46
+ Q: How many plays did Shakespeare write?
47
+ A: 38
48
+ """
49
+
50
+
51
+ class State(TypedDict):
52
+ messages: Annotated[list, add_messages]
53
+ uploaded_filename: str
54
+ uploaded_file: str
55
+
56
+ def _is_url(path_or_url: str) -> bool:
57
+ try:
58
+ result = urlparse(path_or_url)
59
+ return result.scheme in ("http", "https")
60
+ except:
61
+ return False
62
+
63
+ def _process_uploaded_file(file_name: str, file_path: str) -> str:
64
+ """Process a single local file or file URL and return context for the question."""
65
+ try:
66
+ if _is_url(file_path):
67
+ response = requests.get(file_path)
68
+ response.raise_for_status()
69
+
70
+ file_ext = os.path.splitext(file_name)[1].lower()
71
+ content_bytes = response.content
72
+
73
+ if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
74
+ return f"[UPLOADED IMAGE: {file_name}] - URL: {file_path}"
75
+
76
+ elif file_ext in ['.txt', '.md', '.py', '.js', '.html', '.css', '.json', '.xml']:
77
+ content_text = content_bytes.decode('utf-8')
78
+ return f"[Code Content:\n{content_text}"
79
+
80
+ elif file_ext == '.csv':
81
+ df = pd.read_csv(StringIO(content_bytes.decode('utf-8')))
82
+ return f"[UPLOADED CSV FILE: {file_name}] : {df}"
83
+
84
+ elif file_ext in ['.xlsx', '.xls']:
85
+ df = pd.read_excel(BytesIO(content_bytes))
86
+ return f"[EXCEL FILE DATAFRAME: {df}"
87
+
88
+ else:
89
+ return f"[UPLOADED FILE: {file_name}] - URL: {file_path}"
90
+
91
+ except Exception as e:
92
+ print(f"Error processing file {file_path}: {e}")
93
+ return f"[ERROR PROCESSING FILE: {os.path.basename(file_path)}] - {str(e)}"
94
+
95
+ def build_and_compile():
96
+ graph_builder = StateGraph(State)
97
+ tools = [
98
+ web_search,
99
+ wiki_search,
100
+ academic_search,
101
+ python_code,
102
+ image_info,
103
+ read_mp3_transcript,
104
+ pdf_text_extractor,
105
+ ocr_image,
106
+ math_solver,
107
+ plot_data_tool,
108
+ unit_converter,
109
+ date_time_calculator,
110
+ api_request_tool,
111
+ html_table_extractor,
112
+ multiply,
113
+ add,
114
+ subtract,
115
+ divide,
116
+ modulus,
117
+ power,
118
+ square_root
119
+ ]
120
+
121
+
122
+ llm = init_chat_model("openai:gpt-4.1-mini",temperature=0, seed=42)
123
+ llm_with_tools = llm.bind_tools(tools)
124
+
125
+ def chatbot(state: State):
126
+ file_context = ""
127
+ if "uploaded_file" in state and state["uploaded_file"]:
128
+ file_context = "\n\nAdditional file context:\n" + _process_uploaded_file(file_name=state["uploaded_filename"],file_path=state["uploaded_file"])
129
+ final_prompt = system_prompt + file_context
130
+ return {"messages": [llm_with_tools.invoke([SystemMessage(final_prompt)] + state["messages"])]}
131
+
132
+
133
+ graph_builder.add_node("chatbot", chatbot)
134
+
135
+ tool_node = ToolNode(tools=tools)
136
+ graph_builder.add_node("tools", tool_node)
137
+
138
+ graph_builder.add_conditional_edges(
139
+ "chatbot",
140
+ tools_condition,
141
+ )
142
+ # Any time a tool is called, we return to the chatbot to decide the next step
143
+ graph_builder.add_edge("tools", "chatbot")
144
+ graph_builder.add_edge(START, "chatbot")
145
+ graph = graph_builder.compile()
146
+ return graph
app.py CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
@@ -12,12 +13,19 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -76,11 +84,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
@@ -91,7 +101,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
@@ -193,4 +203,4 @@ if __name__ == "__main__":
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agent import build_and_compile
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
 
13
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
14
  class BasicAgent:
15
  def __init__(self):
16
+ self.graph = build_and_compile()
17
+ def __call__(self, question: str, file_name: str = "", file_path: str = "", debug: bool = False) -> str:
18
+ if file_path != "":
19
+ state = {"messages": [{"role": "user", "content": question}],
20
+ "uploaded_filename" : file_name, "uploaded_file": file_path}
21
+ else:
22
+ state = {"messages": [{"role": "user", "content": question}]}
23
+
24
+ full_answer = self.graph.invoke(state, {"recursion_limit": 100})['messages']
25
+ if debug == True:
26
+ return full_answer
27
+ else:
28
+ return full_answer[-1].content
29
 
30
  def run_and_submit_all( profile: gr.OAuthProfile | None):
31
  """
 
84
  for item in questions_data:
85
  task_id = item.get("task_id")
86
  question_text = item.get("question")
87
+ file_name = item.get("file_name")
88
+ file_path = f'https://agents-course-unit4-scoring.hf.space/files/{task_id}'
89
  if not task_id or question_text is None:
90
  print(f"Skipping item with missing task_id or question: {item}")
91
  continue
92
  try:
93
+ submitted_answer = agent(question=question_text, file_name=file_name, file_path=file_path)
94
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
95
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
96
  except Exception as e:
 
101
  print("Agent did not produce any answers to submit.")
102
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
103
 
104
+ # 4. Prepare Submission
105
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
106
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
107
  print(status_update)
 
203
  print("-"*(60 + len(" App Starting ")) + "\n")
204
 
205
  print("Launching Gradio Interface for Basic Agent Evaluation...")
206
+ demo.launch(debug=True, share=False)
metadata.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,2 +1,152 @@
1
- gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.12.15
4
+ aiosignal==1.4.0
5
+ annotated-types==0.7.0
6
+ anyio==4.10.0
7
+ appnope==0.1.4
8
+ arxiv==2.2.0
9
+ asttokens==3.0.0
10
+ async-timeout==4.0.3
11
+ attrs==25.3.0
12
+ beautifulsoup4==4.13.4
13
+ Brotli==1.1.0
14
+ certifi==2025.8.3
15
+ cffi==1.17.1
16
+ charset-normalizer==3.4.3
17
+ click==8.2.1
18
+ comm==0.2.3
19
+ contourpy==1.3.2
20
+ cryptography==45.0.6
21
+ cycler==0.12.1
22
+ dataclasses-json==0.6.7
23
+ datasets==2.14.6
24
+ debugpy==1.8.16
25
+ decorator==5.2.1
26
+ dill==0.3.7
27
+ distro==1.9.0
28
+ et_xmlfile==2.0.0
29
+ exceptiongroup==1.3.0
30
+ executing==2.2.0
31
+ fastapi==0.116.1
32
+ feedparser==6.0.11
33
+ ffmpy==0.6.1
34
+ filelock==3.18.0
35
+ flexcache==0.3
36
+ flexparser==0.4
37
+ fonttools==4.59.0
38
+ frozenlist==1.7.0
39
+ fsspec==2023.10.0
40
+ gradio==5.42.0
41
+ gradio_client==1.11.1
42
+ greenlet==3.2.4
43
+ groovy==0.1.2
44
+ h11==0.16.0
45
+ hf-xet==1.1.7
46
+ httpcore==1.0.9
47
+ httpx==0.28.1
48
+ httpx-sse==0.4.1
49
+ huggingface-hub==0.34.4
50
+ idna==3.10
51
+ ipykernel==6.30.1
52
+ ipython==8.37.0
53
+ jedi==0.19.2
54
+ Jinja2==3.1.6
55
+ jiter==0.10.0
56
+ jsonpatch==1.33
57
+ jsonpointer==3.0.0
58
+ jupyter_client==8.6.3
59
+ jupyter_core==5.8.1
60
+ kiwisolver==1.4.9
61
+ langchain==0.3.27
62
+ langchain-community==0.3.27
63
+ langchain-core==0.3.74
64
+ langchain-experimental==0.3.4
65
+ langchain-openai==0.3.29
66
+ langchain-tavily==0.2.11
67
+ langchain-text-splitters==0.3.9
68
+ langgraph==0.6.4
69
+ langgraph-checkpoint==2.1.1
70
+ langgraph-prebuilt==0.6.4
71
+ langgraph-sdk==0.2.0
72
+ langsmith==0.4.13
73
+ markdown-it-py==3.0.0
74
+ MarkupSafe==3.0.2
75
+ marshmallow==3.26.1
76
+ matplotlib==3.10.5
77
+ matplotlib-inline==0.1.7
78
+ mdurl==0.1.2
79
+ multidict==6.6.3
80
+ multiprocess==0.70.15
81
+ mypy_extensions==1.1.0
82
+ nest-asyncio==1.6.0
83
+ numpy==1.26.4
84
+ openai==1.99.6
85
+ openpyxl==3.1.5
86
+ orjson==3.11.1
87
+ ormsgpack==1.10.0
88
+ packaging==25.0
89
+ pandas==2.3.1
90
+ parso==0.8.4
91
+ pdfminer.six==20250506
92
+ pdfplumber==0.11.7
93
+ pexpect==4.9.0
94
+ pillow==11.3.0
95
+ Pint==0.24.4
96
+ platformdirs==4.3.8
97
+ prompt_toolkit==3.0.51
98
+ propcache==0.3.2
99
+ psutil==7.0.0
100
+ ptyprocess==0.7.0
101
+ pure_eval==0.2.3
102
+ pyarrow==14.0.1
103
+ pycparser==2.22
104
+ pydantic==2.11.7
105
+ pydantic-settings==2.10.1
106
+ pydantic_core==2.33.2
107
+ pydub==0.25.1
108
+ Pygments==2.19.2
109
+ PyMuPDF==1.26.3
110
+ pyparsing==3.2.3
111
+ pypdfium2==4.30.0
112
+ pytesseract==0.3.13
113
+ python-dateutil==2.9.0.post0
114
+ python-dotenv==1.1.1
115
+ python-multipart==0.0.20
116
+ pytz==2025.2
117
+ PyYAML==6.0.2
118
+ pyzmq==27.0.1
119
+ regex==2025.7.34
120
+ requests==2.32.4
121
+ requests-toolbelt==1.0.0
122
+ rich==14.1.0
123
+ ruff==0.12.8
124
+ safehttpx==0.1.6
125
+ semantic-version==2.10.0
126
+ sgmllib3k==1.0.0
127
+ shellingham==1.5.4
128
+ six==1.17.0
129
+ sniffio==1.3.1
130
+ soupsieve==2.7
131
+ SQLAlchemy==2.0.42
132
+ stack-data==0.6.3
133
+ starlette==0.47.2
134
+ tenacity==9.1.2
135
+ tiktoken==0.11.0
136
+ tomlkit==0.13.3
137
+ tornado==6.5.2
138
+ tqdm==4.67.1
139
+ traitlets==5.14.3
140
+ typer==0.16.0
141
+ typing-inspect==0.9.0
142
+ typing-inspection==0.4.1
143
+ typing_extensions==4.14.1
144
+ tzdata==2025.2
145
+ urllib3==2.5.0
146
+ uvicorn==0.35.0
147
+ wcwidth==0.2.13
148
+ websockets==15.0.1
149
+ wikipedia==1.4.0
150
+ xxhash==3.5.0
151
+ yarl==1.20.1
152
+ zstandard==0.23.0
tools.py ADDED
@@ -0,0 +1,510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import io
5
+ import re
6
+ import json
7
+ import base64
8
+ import traceback
9
+ from typing import Optional, List, Dict, Any
10
+ import cmath
11
+
12
+ import requests
13
+ import pandas as pd
14
+ from PIL import Image
15
+
16
+ # Optional deps
17
+ try:
18
+ import pdfplumber
19
+ except Exception:
20
+ pdfplumber = None
21
+
22
+ try:
23
+ import pytesseract
24
+ except Exception:
25
+ pytesseract = None
26
+
27
+ try:
28
+ import sympy as sp
29
+ except Exception:
30
+ sp = None
31
+
32
+ try:
33
+ import matplotlib
34
+ matplotlib.use("Agg")
35
+ import matplotlib.pyplot as plt
36
+ except Exception:
37
+ plt = None
38
+
39
+ try:
40
+ from pint import UnitRegistry
41
+ _ureg = UnitRegistry()
42
+ except Exception:
43
+ _ureg = None
44
+
45
+ try:
46
+ from dateutil import parser as dtparser
47
+ from dateutil.relativedelta import relativedelta
48
+ except Exception:
49
+ dtparser = None
50
+ relativedelta = None
51
+
52
+
53
+ # LangChain bits
54
+ from langchain_core.tools import tool
55
+ from langchain_tavily.tavily_search import TavilySearch
56
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
57
+ from langchain_experimental.utilities import PythonREPL
58
+
59
+
60
+ # ------------ helpers (formatting, env, truncation, errors) ------------
61
+
62
+ def _env(name: str, default: Optional[str] = None) -> Optional[str]:
63
+ return os.getenv(name, default)
64
+
65
+ def _truncate(txt: str, max_len: int = 4000) -> str:
66
+ if txt is None:
67
+ return ""
68
+ if len(txt) <= max_len:
69
+ return txt
70
+ head = max_len - 200
71
+ return txt[:head] + f"\n... [truncated {len(txt) - head} chars]"
72
+
73
+ def _fmt_block(tag: str, attrs: Dict[str, Any] | None, body: str) -> str:
74
+ attrs = attrs or {}
75
+ attr_str = " ".join(f'{k}="{attrs[k] if attrs[k] is not None else ""}"' for k in attrs)
76
+ if attr_str:
77
+ return f"<{tag} {attr_str}>\n{body}\n</{tag}>"
78
+ return f"<{tag}>\n{body}\n</{tag}>"
79
+
80
+ def _fmt_error(tool_name: str, err: Exception) -> str:
81
+ return _fmt_block(
82
+ "ToolError",
83
+ {"tool": tool_name, "type": err.__class__.__name__},
84
+ _truncate(f"{err}\n{traceback.format_exc()}", 1600),
85
+ )
86
+
87
+
88
+
89
+ @tool("web_search")
90
+ def web_search(query: str) -> str:
91
+ """Search the web (Tavily). Returns up to 3 results in <Document> blocks + optional <Answer>."""
92
+ try:
93
+ api_key = _env("TAVILY_API_KEY")
94
+ tavily = TavilySearch(
95
+ tavily_api_key=api_key,
96
+ max_results=3,
97
+ include_answer=True,
98
+ search_depth="advanced",
99
+ topic="general",
100
+ include_raw_content=False,
101
+ )
102
+ res = tavily._run(query=query)
103
+ docs = res.get("results", []) or []
104
+ blocks: List[str] = []
105
+ for d in docs:
106
+ blocks.append(
107
+ _fmt_block(
108
+ "Document",
109
+ {"source": d.get("url", ""), "title": d.get("title", "")},
110
+ _truncate(d.get("content", "") or ""),
111
+ )
112
+ )
113
+ parts = []
114
+ ans = res.get("answer")
115
+ if ans:
116
+ parts.append(_fmt_block("Answer", {}, _truncate(str(ans), 1000)))
117
+ if blocks:
118
+ parts.append("\n\n---\n\n".join(blocks))
119
+ if not parts:
120
+ return _fmt_block("WebResults", {"query": query}, "No results.")
121
+ return _fmt_block("WebResults", {"query": query}, "\n\n".join(parts))
122
+ except Exception as e:
123
+ return _fmt_error("web_search", e)
124
+
125
+
126
+ @tool("wiki_search")
127
+ def wiki_search(query: str) -> str:
128
+ """Search Wikipedia and return up to 2 articles as <Document> blocks."""
129
+ try:
130
+ loader = WikipediaLoader(query=query, load_max_docs=2)
131
+ docs = loader.load()
132
+ blocks = []
133
+ for doc in docs:
134
+ meta = getattr(doc, "metadata", {}) or {}
135
+ blocks.append(
136
+ _fmt_block(
137
+ "Document",
138
+ {"source": meta.get("source", ""), "page": str(meta.get("page", ""))},
139
+ _truncate(doc.page_content or "", 4000),
140
+ )
141
+ )
142
+ if not blocks:
143
+ return _fmt_block("WikiResults", {"query": query}, "No results.")
144
+ return _fmt_block("WikiResults", {"query": query}, "\n\n---\n\n".join(blocks))
145
+ except Exception as e:
146
+ return _fmt_error("wiki_search", e)
147
+
148
+
149
+ @tool("academic_search")
150
+ def academic_search(query: str) -> str:
151
+ """Search arXiv and return up to 3 papers as <Document> blocks."""
152
+ try:
153
+ loader = ArxivLoader(query=query, load_max_docs=3)
154
+ docs = loader.load()
155
+ blocks = []
156
+ for doc in docs:
157
+ meta = getattr(doc, "metadata", {}) or {}
158
+ title = meta.get("Title") or meta.get("title") or ""
159
+ published = meta.get("Published") or meta.get("published") or ""
160
+ blocks.append(
161
+ _fmt_block(
162
+ "Document",
163
+ {"title": str(title), "date": str(published)},
164
+ _truncate(doc.page_content or "", 3000),
165
+ )
166
+ )
167
+ if not blocks:
168
+ return _fmt_block("ArxivResults", {"query": query}, "No results.")
169
+ return _fmt_block("ArxivResults", {"query": query}, "\n\n---\n\n".join(blocks))
170
+ except Exception as e:
171
+ return _fmt_error("academic_search", e)
172
+
173
+
174
+ @tool("python_code")
175
+ def python_code(code: str) -> str:
176
+ """
177
+ Execute Python code in a sandboxed REPL.
178
+ Input should be valid Python code.
179
+ """
180
+ try:
181
+ if code is None:
182
+ return "<ToolError>No code provided to python_code tool.</ToolError>"
183
+ python_repl = PythonREPL()
184
+ code_str = str(code)
185
+ output = python_repl.run(code_str)
186
+ if output is None:
187
+ return "(no output)"
188
+ return str(output).strip("\n")
189
+ except Exception as e:
190
+ return f"<ToolError>{type(e).__name__}: {e}</ToolError>"
191
+
192
+
193
+
194
+ @tool("image_info")
195
+ def image_info(path: str) -> str:
196
+ """Return basic info about an image (width x height, format)."""
197
+ try:
198
+ with Image.open(path) as img:
199
+ w, h, fmt = img.width, img.height, img.format
200
+ return _fmt_block("ImageInfo", {"path": path}, f"{w}x{h} ({fmt})")
201
+ except Exception as e:
202
+ return _fmt_error("image_info", e)
203
+
204
+
205
+ @tool("read_mp3_transcript")
206
+ def read_mp3_transcript(path: str) -> str:
207
+ """Transcribe an MP3 file (placeholder). Replace with actual ASR."""
208
+ try:
209
+ return _fmt_block("AudioTranscript", {"path": path}, "Transcription not implemented.")
210
+ except Exception as e:
211
+ return _fmt_error("read_mp3_transcript", e)
212
+
213
+
214
+ @tool("pdf_text_extractor")
215
+ def pdf_text_extractor(args: str) -> str:
216
+ """Extract text from a PDF. Usage:
217
+ - 'path/to/file.pdf'
218
+ - 'path/to/file.pdf|pages=1-3' (1-indexed inclusive range)
219
+ Returns a concatenated text excerpt (truncated)."""
220
+ try:
221
+ if pdfplumber is None:
222
+ raise RuntimeError("pdfplumber not installed")
223
+ path, start, end = args, None, None
224
+ m = re.search(r"\|pages=(\d+)-(\d+)$", args.strip())
225
+ if m:
226
+ path = args[: args.rfind("|pages=")]
227
+ start, end = int(m.group(1)), int(m.group(2))
228
+ text_parts: List[str] = []
229
+ with pdfplumber.open(path) as pdf:
230
+ total = len(pdf.pages)
231
+ s = max(1, start) if start else 1
232
+ e = min(end, total) if end else total
233
+ for p in range(s - 1, e):
234
+ page = pdf.pages[p]
235
+ text_parts.append(page.extract_text() or "")
236
+ text = "\n".join(text_parts).strip()
237
+ if not text:
238
+ text = "(no extractable text)"
239
+ meta = {"path": path, "pages": f"{start or 1}-{end or 'end'}"}
240
+ return _fmt_block("PDFText", meta, _truncate(text, 4000))
241
+ except Exception as e:
242
+ return _fmt_error("pdf_text_extractor", e)
243
+
244
+
245
+ @tool("ocr_image")
246
+ def ocr_image(path: str) -> str:
247
+ """Run OCR on an image and return extracted text (requires pytesseract + Tesseract installed)."""
248
+ try:
249
+ if pytesseract is None:
250
+ raise RuntimeError("pytesseract not installed or tesseract binary missing")
251
+ with Image.open(path) as img:
252
+ txt = pytesseract.image_to_string(img)
253
+ return _fmt_block("OCRText", {"path": path}, _truncate(txt.strip() or "(no text)", 4000))
254
+ except Exception as e:
255
+ return _fmt_error("ocr_image", e)
256
+
257
+
258
+ @tool("math_solver")
259
+ def math_solver(expr: str) -> str:
260
+ """Solve/compute a math expression with SymPy. Examples:
261
+ - 'integrate(sin(x)/x, (x, 0, 1))'
262
+ - 'solve(x**2 - 5*x + 6, x)'
263
+ - 'simplify((x**2 - 1)/(x - 1))'"""
264
+ try:
265
+ if sp is None:
266
+ raise RuntimeError("sympy not installed")
267
+ # Safe-ish sympify (no symbols defined → define common ones)
268
+ symbols = {s: sp.symbols(s) for s in list("abcdefghijklmnopqrstuvwxyz")}
269
+ res = sp.sympify(expr, locals=symbols)
270
+ # Try evalf if numeric
271
+ out = res.evalf() if hasattr(res, "evalf") else res
272
+ return _fmt_block("MathResult", {}, _truncate(str(out), 4000))
273
+ except Exception as e:
274
+ return _fmt_error("math_solver", e)
275
+
276
+
277
+ @tool("plot_data_tool")
278
+ def plot_data_tool(args: str) -> str:
279
+ """Create a simple plot from CSV.
280
+ Usage (JSON string):
281
+ {
282
+ "path": "data.csv",
283
+ "x": "col_x", # optional if data has index
284
+ "y": "col_y", # required
285
+ "kind": "line" # 'line' or 'scatter'
286
+ }
287
+ Returns <ImageBase64> PNG."""
288
+ try:
289
+ if plt is None:
290
+ raise RuntimeError("matplotlib not available")
291
+ cfg = json.loads(args)
292
+ path = cfg.get("path")
293
+ xcol = cfg.get("x")
294
+ ycol = cfg["y"]
295
+ kind = (cfg.get("kind") or "line").lower()
296
+ df = pd.read_csv(path)
297
+ fig, ax = plt.subplots(figsize=(6, 4))
298
+ if kind == "scatter":
299
+ ax.scatter(df[xcol] if xcol else range(len(df[ycol])), df[ycol])
300
+ else:
301
+ ax.plot(df[xcol] if xcol else range(len(df[ycol])), df[ycol])
302
+ ax.set_xlabel(xcol or "index")
303
+ ax.set_ylabel(ycol)
304
+ ax.set_title(f"{os.path.basename(path)}: {ycol} vs {xcol or 'index'}")
305
+ buf = io.BytesIO()
306
+ fig.tight_layout()
307
+ fig.savefig(buf, format="png")
308
+ plt.close(fig)
309
+ b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
310
+ return _fmt_block("ImageBase64", {"format": "png"}, b64)
311
+ except Exception as e:
312
+ return _fmt_error("plot_data_tool", e)
313
+
314
+
315
+ @tool("unit_converter")
316
+ def unit_converter(query: str) -> str:
317
+ """Convert units. Examples:
318
+ - '12 inch to cm'
319
+ - '5 miles to kilometer'
320
+ - '32 degF to degC'"""
321
+ try:
322
+ if _ureg is None:
323
+ raise RuntimeError("pint not installed")
324
+ m = re.match(r"\s*([\-0-9\.]+)\s+([A-Za-z\/\^\*\s]+)\s+to\s+([A-Za-z\/\^\*\s]+)\s*$", query)
325
+ if not m:
326
+ raise ValueError("Format: '<value> <from_units> to <to_units>'")
327
+ val = float(m.group(1))
328
+ from_u = m.group(2).strip()
329
+ to_u = m.group(3).strip()
330
+ qty = val * _ureg(from_u)
331
+ conv = qty.to(to_u)
332
+ return _fmt_block("UnitConversion", {"from": from_u, "to": to_u}, f"{conv.magnitude} {conv.units}")
333
+ except Exception as e:
334
+ return _fmt_error("unit_converter", e)
335
+
336
+
337
+ @tool("date_time_calculator")
338
+ def date_time_calculator(query: str) -> str:
339
+ """Date/time math. Examples:
340
+ - 'diff 2024-01-01 2025-08-14' → difference (y,m,d)
341
+ - 'add 2025-08-14 + 3 days' → add delta
342
+ - 'add 2025-08-14 - 2 weeks' → subtract delta
343
+ Accepts ISO dates; units: years, months, weeks, days, hours, minutes."""
344
+ try:
345
+ if dtparser is None or relativedelta is None:
346
+ raise RuntimeError("python-dateutil not installed")
347
+ s = query.strip()
348
+ if s.lower().startswith("diff"):
349
+ parts = s.split()
350
+ if len(parts) != 3:
351
+ raise ValueError("Use: diff YYYY-MM-DD YYYY-MM-DD")
352
+ d1 = dtparser.parse(parts[1])
353
+ d2 = dtparser.parse(parts[2])
354
+ rd = relativedelta(d2, d1)
355
+ return _fmt_block(
356
+ "DateDiff",
357
+ {"from": parts[1], "to": parts[2]},
358
+ f"{rd.years} years, {rd.months} months, {rd.days} days, "
359
+ f"{rd.hours} hours, {rd.minutes} minutes",
360
+ )
361
+ elif s.lower().startswith("add"):
362
+ # e.g., "add 2025-08-14 + 3 days" or "add 2025-08-14 - 2 weeks"
363
+ m = re.match(r"add\s+(\S+)\s*([+-])\s*(\d+)\s+(years?|months?|weeks?|days?|hours?|minutes?)", s, re.I)
364
+ if not m:
365
+ raise ValueError("Use: add <date> +/- <n> <unit>")
366
+ base = dtparser.parse(m.group(1))
367
+ sign = 1 if m.group(2) == "+" else -1
368
+ n = int(m.group(3)) * sign
369
+ unit = m.group(4).lower()
370
+ kwargs = {}
371
+ if "year" in unit: kwargs["years"] = n
372
+ elif "month" in unit: kwargs["months"] = n
373
+ elif "week" in unit: kwargs["weeks"] = n
374
+ elif "day" in unit: kwargs["days"] = n
375
+ elif "hour" in unit: kwargs["hours"] = n
376
+ elif "minute" in unit: kwargs["minutes"] = n
377
+ res = base + relativedelta(**kwargs)
378
+ return _fmt_block("DateAdd", {"base": base.isoformat(), "delta": f"{n} {unit}"}, res.isoformat())
379
+ else:
380
+ raise ValueError("Start with 'diff' or 'add'.")
381
+ except Exception as e:
382
+ return _fmt_error("date_time_calculator", e)
383
+
384
+
385
+ @tool("api_request_tool")
386
+ def api_request_tool(args: str) -> str:
387
+ """Call a JSON REST API.
388
+ Usage (JSON string):
389
+ {
390
+ "method": "GET",
391
+ "url": "https://api.example.com/items",
392
+ "headers": {"Authorization": "Bearer ..."},
393
+ "params": {"q": "search"},
394
+ "json": {"k": "v"},
395
+ "timeout": 20
396
+ }"""
397
+ try:
398
+ cfg = json.loads(args)
399
+ method = (cfg.get("method") or "GET").upper()
400
+ url = cfg["url"]
401
+ headers = cfg.get("headers") or {}
402
+ params = cfg.get("params") or {}
403
+ json_body = cfg.get("json")
404
+ timeout = cfg.get("timeout", 20)
405
+ resp = requests.request(method, url, headers=headers, params=params, json=json_body, timeout=timeout)
406
+ meta = {"status": resp.status_code, "url": url}
407
+ text = resp.text
408
+ # Try to pretty JSON if possible
409
+ try:
410
+ text = json.dumps(resp.json(), indent=2)[:4000]
411
+ except Exception:
412
+ text = _truncate(text, 4000)
413
+ return _fmt_block("APIResponse", meta, text)
414
+ except Exception as e:
415
+ return _fmt_error("api_request_tool", e)
416
+
417
+
418
+ @tool("html_table_extractor")
419
+ def html_table_extractor(url: str) -> str:
420
+ """Extract the first HTML table from a webpage and return CSV preview."""
421
+ try:
422
+ tables = pd.read_html(url)
423
+ if not tables:
424
+ return _fmt_block("HTMLTable", {"url": url}, "No tables found.")
425
+ df = tables[0]
426
+ buf = io.StringIO()
427
+ df.head(15).to_csv(buf, index=False)
428
+ summary = f"Shape: {df.shape[0]} rows x {df.shape[1]} cols\nColumns: {list(df.columns)}\n\nHead(15):\n{buf.getvalue()}"
429
+ return _fmt_block("HTMLTable", {"url": url}, _truncate(summary, 4000))
430
+ except Exception as e:
431
+ return _fmt_error("html_table_extractor", e)
432
+
433
+ @tool
434
+ def multiply(a: float, b: float) -> float:
435
+ """
436
+ Multiplies two numbers.
437
+ Args:
438
+ a (float): the first number
439
+ b (float): the second number
440
+ """
441
+ return a * b
442
+
443
+
444
+ @tool
445
+ def add(a: float, b: float) -> float:
446
+ """
447
+ Adds two numbers.
448
+ Args:
449
+ a (float): the first number
450
+ b (float): the second number
451
+ """
452
+ return a + b
453
+
454
+
455
+ @tool
456
+ def subtract(a: float, b: float) -> int:
457
+ """
458
+ Subtracts two numbers.
459
+ Args:
460
+ a (float): the first number
461
+ b (float): the second number
462
+ """
463
+ return a - b
464
+
465
+
466
+ @tool
467
+ def divide(a: float, b: float) -> float:
468
+ """
469
+ Divides two numbers.
470
+ Args:
471
+ a (float): the first float number
472
+ b (float): the second float number
473
+ """
474
+ if b == 0:
475
+ raise ValueError("Cannot divided by zero.")
476
+ return a / b
477
+
478
+
479
+ @tool
480
+ def modulus(a: int, b: int) -> int:
481
+ """
482
+ Get the modulus of two numbers.
483
+ Args:
484
+ a (int): the first number
485
+ b (int): the second number
486
+ """
487
+ return a % b
488
+
489
+
490
+ @tool
491
+ def power(a: float, b: float) -> float:
492
+ """
493
+ Get the power of two numbers.
494
+ Args:
495
+ a (float): the first number
496
+ b (float): the second number
497
+ """
498
+ return a**b
499
+
500
+
501
+ @tool
502
+ def square_root(a: float) -> float | complex:
503
+ """
504
+ Get the square root of a number.
505
+ Args:
506
+ a (float): the number to get the square root of
507
+ """
508
+ if a >= 0:
509
+ return a**0.5
510
+ return cmath.sqrt(a)