serverdaun commited on
Commit
a6a9e0f
·
1 Parent(s): fb8f1a6

add agent

Browse files
Files changed (4) hide show
  1. agent.py +60 -22
  2. app.py +46 -24
  3. config.py +7 -1
  4. tools.py +86 -50
agent.py CHANGED
@@ -1,17 +1,38 @@
1
- import os
2
- from typing import TypedDict, Annotated
3
- from dotenv import load_dotenv
4
- from langgraph.graph.message import add_messages
5
- from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
6
  from langgraph.prebuilt import ToolNode, tools_condition
7
  from langgraph.graph import START, StateGraph, MessagesState
8
- from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
9
- from tools import wiki_search, tavily_search, arxiv_search, add, subtract, multiply, divide, power, sqrt, modulus
10
 
 
 
 
 
 
 
11
 
12
- load_dotenv()
13
- HF_TOKEN = os.getenv("HF_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
15
  TOOLS = [
16
  wiki_search,
17
  tavily_search,
@@ -22,31 +43,48 @@ TOOLS = [
22
  divide,
23
  power,
24
  sqrt,
25
- modulus
 
 
 
 
 
 
 
 
 
 
26
  ]
27
 
28
- def build_agent():
29
- # Define llm from Hugging Face
30
- llm = HuggingFaceEndpoint(
31
- repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
32
- huggingfacehub_api_token=HF_TOKEN
 
 
 
 
 
 
 
33
  )
34
 
35
- # Define chat interface and the tools
36
- chat = ChatHuggingFace(llm=llm, verbose=True)
37
- chat_w_tools = chat.bind_tools(TOOLS)
38
 
39
- # Node
40
  def assistant(state: MessagesState):
41
  """Assistant node"""
42
  return {"messages": [chat_w_tools.invoke(state["messages"])]}
43
 
44
-
45
  builder = StateGraph(MessagesState)
46
 
 
47
  builder.add_node("assistant", assistant)
48
  builder.add_node("tools", ToolNode(TOOLS))
49
 
 
50
  builder.add_edge(START, "assistant")
51
  builder.add_conditional_edges(
52
  "assistant",
@@ -54,5 +92,5 @@ def build_agent():
54
  )
55
  builder.add_edge("tools", "assistant")
56
 
57
- # Compile graph
58
- return builder.compile()
 
 
 
 
 
 
1
  from langgraph.prebuilt import ToolNode, tools_condition
2
  from langgraph.graph import START, StateGraph, MessagesState
3
+ from langchain_openai import AzureChatOpenAI
 
4
 
5
+ from config import (
6
+ MODEL_ENDPOINT,
7
+ MODEL_KEY,
8
+ MODEL_NAME,
9
+ MODEL_API_VERSION,
10
+ )
11
 
12
+ from tools import (
13
+ wiki_search,
14
+ tavily_search,
15
+ arxiv_search,
16
+ add,
17
+ subtract,
18
+ multiply,
19
+ divide,
20
+ power,
21
+ sqrt,
22
+ modulus,
23
+ scrape_webpage,
24
+ analyze_image,
25
+ is_commutative,
26
+ commutativity_counterexample_pairs,
27
+ commutativity_counterexample_elements,
28
+ find_identity_element,
29
+ find_inverses,
30
+ transcribe_audio,
31
+ execute_source_file,
32
+ interact_tabular,
33
+ )
34
 
35
+ # Define tools
36
  TOOLS = [
37
  wiki_search,
38
  tavily_search,
 
43
  divide,
44
  power,
45
  sqrt,
46
+ modulus,
47
+ scrape_webpage,
48
+ analyze_image,
49
+ is_commutative,
50
+ commutativity_counterexample_pairs,
51
+ commutativity_counterexample_elements,
52
+ find_identity_element,
53
+ find_inverses,
54
+ transcribe_audio,
55
+ execute_source_file,
56
+ interact_tabular
57
  ]
58
 
59
+
60
+ def build_agent() -> StateGraph:
61
+ """
62
+ Build the agent.
63
+ Returns:
64
+ StateGraph: The agent graph.
65
+ """
66
+ llm = AzureChatOpenAI(
67
+ azure_deployment=MODEL_NAME,
68
+ api_version=MODEL_API_VERSION,
69
+ azure_endpoint=MODEL_ENDPOINT,
70
+ api_key=MODEL_KEY,
71
  )
72
 
73
+ chat_w_tools = llm.bind_tools(TOOLS)
 
 
74
 
75
+ # Assistant node
76
  def assistant(state: MessagesState):
77
  """Assistant node"""
78
  return {"messages": [chat_w_tools.invoke(state["messages"])]}
79
 
80
+ # Build graph
81
  builder = StateGraph(MessagesState)
82
 
83
+ # Add nodes
84
  builder.add_node("assistant", assistant)
85
  builder.add_node("tools", ToolNode(TOOLS))
86
 
87
+ # Add edges
88
  builder.add_edge(START, "assistant")
89
  builder.add_conditional_edges(
90
  "assistant",
 
92
  )
93
  builder.add_edge("tools", "assistant")
94
 
95
+ # Compile graph and return it
96
+ return builder.compile()
app.py CHANGED
@@ -1,15 +1,30 @@
1
  import os
2
- import gradio as gr
 
3
  import requests
4
- import inspect
5
  import pandas as pd
6
  from agent import build_agent
7
  from config import SYSTEM_PROMPT, SPACE_ID
8
  from langchain_core.messages import SystemMessage, HumanMessage
9
 
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def get_file(task_id: str) -> requests.Response:
15
  """I
@@ -20,6 +35,7 @@ def get_file(task_id: str) -> requests.Response:
20
  response.raise_for_status()
21
  return response
22
 
 
23
  def get_question_data(elem: dict) -> tuple[str, str]:
24
  """
25
  Fetches question text and file path if there are any.
@@ -30,15 +46,18 @@ def get_question_data(elem: dict) -> tuple[str, str]:
30
  """
31
  question_text = elem["question"]
32
  file_name = elem["file_name"]
 
33
 
34
  if file_name != "":
35
  task_id = elem["task_id"]
36
  response = get_file(task_id=task_id)
37
 
38
- file_path = f"data/{file_name}"
 
39
  with open(file_path, "wb") as f:
40
  f.write(response.content)
41
-
 
42
  return file_path, question_text
43
 
44
 
@@ -47,7 +66,8 @@ class BasicAgent:
47
  def __init__(self):
48
  self.agent = build_agent()
49
  print("BasicAgent initialized.")
50
- def __call__(self, question: str, file_path: str=None) -> str:
 
51
  messages = [
52
  SystemMessage(content=SYSTEM_PROMPT),
53
  ]
@@ -65,17 +85,17 @@ class BasicAgent:
65
 
66
  return final_answer
67
 
68
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
69
  """
70
  Fetches all questions, runs the BasicAgent on them, submits all answers,
71
  and displays the results.
72
  """
73
  # --- Determine HF Space Runtime URL and Repo URL ---
74
- # space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
75
  space_id = SPACE_ID
76
 
77
  if profile:
78
- username= f"{profile.username}"
79
  print(f"User logged in: {username}")
80
  else:
81
  print("User not logged in.")
@@ -102,16 +122,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
102
  response.raise_for_status()
103
  questions_data = response.json()
104
  if not questions_data:
105
- print("Fetched questions list is empty.")
106
- return "Fetched questions list is empty or invalid format.", None
107
  print(f"Fetched {len(questions_data)} questions.")
108
  except requests.exceptions.RequestException as e:
109
  print(f"Error fetching questions: {e}")
110
  return f"Error fetching questions: {e}", None
111
  except requests.exceptions.JSONDecodeError as e:
112
- print(f"Error decoding JSON response from questions endpoint: {e}")
113
- print(f"Response text: {response.text[:500]}")
114
- return f"Error decoding server response for questions: {e}", None
115
  except Exception as e:
116
  print(f"An unexpected error occurred fetching questions: {e}")
117
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -132,14 +152,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
132
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
133
  print(f"Task ID: {task_id}, Question: {question_text}, Submitted Answer: {submitted_answer}")
134
  except Exception as e:
135
- print(f"Error running agent on task {task_id}: {e}")
136
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
137
 
138
  if not answers_payload:
139
  print("Agent did not produce any answers to submit.")
140
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
141
 
142
- # 4. Prepare Submission
143
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
144
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
145
  print(status_update)
@@ -219,11 +239,12 @@ with gr.Blocks() as demo:
219
  outputs=[status_output, results_table]
220
  )
221
 
 
222
  if __name__ == "__main__":
223
- print("\n" + "-"*30 + " App Starting " + "-"*30)
224
  # Check for SPACE_HOST and SPACE_ID at startup for information
225
  space_host_startup = os.getenv("SPACE_HOST")
226
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
227
 
228
  if space_host_startup:
229
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -231,14 +252,15 @@ if __name__ == "__main__":
231
  else:
232
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
233
 
234
- if space_id_startup: # Print repo URLs if SPACE_ID is found
235
  print(f"✅ SPACE_ID found: {space_id_startup}")
236
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
237
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
238
  else:
239
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
240
 
241
- print("-"*(60 + len(" App Starting ")) + "\n")
242
 
243
  print("Launching Gradio Interface for Basic Agent Evaluation...")
244
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import tempfile
3
+ import atexit
4
  import requests
5
+ import gradio as gr
6
  import pandas as pd
7
  from agent import build_agent
8
  from config import SYSTEM_PROMPT, SPACE_ID
9
  from langchain_core.messages import SystemMessage, HumanMessage
10
 
11
+
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
+ TEMP_FILES = []
16
+
17
+
18
+ def cleanup_temp_files():
19
+ for path in TEMP_FILES:
20
+ try:
21
+ os.remove(path)
22
+ except Exception as e:
23
+ print(f"Could not delete temp file {path}: {e}")
24
+
25
+
26
+ atexit.register(cleanup_temp_files)
27
+
28
 
29
  def get_file(task_id: str) -> requests.Response:
30
  """I
 
35
  response.raise_for_status()
36
  return response
37
 
38
+
39
  def get_question_data(elem: dict) -> tuple[str, str]:
40
  """
41
  Fetches question text and file path if there are any.
 
46
  """
47
  question_text = elem["question"]
48
  file_name = elem["file_name"]
49
+ file_path = None
50
 
51
  if file_name != "":
52
  task_id = elem["task_id"]
53
  response = get_file(task_id=task_id)
54
 
55
+ temp_dir = tempfile.gettempdir()
56
+ file_path = os.path.join(temp_dir, file_name)
57
  with open(file_path, "wb") as f:
58
  f.write(response.content)
59
+ TEMP_FILES.append(file_path)
60
+
61
  return file_path, question_text
62
 
63
 
 
66
  def __init__(self):
67
  self.agent = build_agent()
68
  print("BasicAgent initialized.")
69
+
70
+ def __call__(self, question: str, file_path: str = None) -> str:
71
  messages = [
72
  SystemMessage(content=SYSTEM_PROMPT),
73
  ]
 
85
 
86
  return final_answer
87
 
88
+
89
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
90
  """
91
  Fetches all questions, runs the BasicAgent on them, submits all answers,
92
  and displays the results.
93
  """
94
  # --- Determine HF Space Runtime URL and Repo URL ---
 
95
  space_id = SPACE_ID
96
 
97
  if profile:
98
+ username = f"{profile.username}"
99
  print(f"User logged in: {username}")
100
  else:
101
  print("User not logged in.")
 
122
  response.raise_for_status()
123
  questions_data = response.json()
124
  if not questions_data:
125
+ print("Fetched questions list is empty.")
126
+ return "Fetched questions list is empty or invalid format.", None
127
  print(f"Fetched {len(questions_data)} questions.")
128
  except requests.exceptions.RequestException as e:
129
  print(f"Error fetching questions: {e}")
130
  return f"Error fetching questions: {e}", None
131
  except requests.exceptions.JSONDecodeError as e:
132
+ print(f"Error decoding JSON response from questions endpoint: {e}")
133
+ print(f"Response text: {response.text[:500]}")
134
+ return f"Error decoding server response for questions: {e}", None
135
  except Exception as e:
136
  print(f"An unexpected error occurred fetching questions: {e}")
137
  return f"An unexpected error occurred fetching questions: {e}", None
 
152
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
153
  print(f"Task ID: {task_id}, Question: {question_text}, Submitted Answer: {submitted_answer}")
154
  except Exception as e:
155
+ print(f"Error running agent on task {task_id}: {e}")
156
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
157
 
158
  if not answers_payload:
159
  print("Agent did not produce any answers to submit.")
160
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
161
 
162
+ # 4. Prepare Submission
163
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
164
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
165
  print(status_update)
 
239
  outputs=[status_output, results_table]
240
  )
241
 
242
+
243
  if __name__ == "__main__":
244
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
245
  # Check for SPACE_HOST and SPACE_ID at startup for information
246
  space_host_startup = os.getenv("SPACE_HOST")
247
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
248
 
249
  if space_host_startup:
250
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
252
  else:
253
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
254
 
255
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
256
  print(f"✅ SPACE_ID found: {space_id_startup}")
257
+ print(f"Repo URL: https://huggingface.co/spaces/{space_id_startup}")
258
+ print(f"Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
259
  else:
260
+ print("ℹ️ SPACE_ID environment variable not found (running locally?)."
261
+ "Repo URL cannot be determined.")
262
 
263
+ print("-" * (60 + len(" App Starting ")) + "\n")
264
 
265
  print("Launching Gradio Interface for Basic Agent Evaluation...")
266
+ demo.launch(debug=True, share=False)
config.py CHANGED
@@ -10,4 +10,10 @@ SPACE_ID = os.getenv("SPACE_ID")
10
 
11
  with open("system_prompt.yaml", "r") as f:
12
  SYSTEM_PROMPT = yaml.safe_load(f)
13
- SYSTEM_PROMPT = SYSTEM_PROMPT["system_prompt"]
 
 
 
 
 
 
 
10
 
11
  with open("system_prompt.yaml", "r") as f:
12
  SYSTEM_PROMPT = yaml.safe_load(f)
13
+ SYSTEM_PROMPT = SYSTEM_PROMPT["system_prompt"]
14
+
15
+ HF_TOKEN = os.getenv("HF_TOKEN")
16
+ MODEL_ENDPOINT = os.getenv("MODEL_ENDPOINT")
17
+ MODEL_KEY = os.getenv("MODEL_KEY")
18
+ MODEL_NAME = os.getenv("MODEL_NAME")
19
+ MODEL_API_VERSION = os.getenv("MODEL_API_VERSION")
tools.py CHANGED
@@ -1,28 +1,35 @@
1
- from langchain_core.tools import tool
2
- from langchain_community.tools.tavily_search import TavilySearchResults
3
- from langchain_community.document_loaders import WikipediaLoader
4
- from langchain_community.document_loaders import ArxivLoader
5
- from config import TAVILY_API_KEY
6
- import requests
7
- from bs4 import BeautifulSoup
8
- from PIL import Image
9
- from pathlib import Path
10
  import base64
11
- from openai import AzureOpenAI
12
- from config import MODEL_NAME, MODEL_API_VERSION, MODEL_ENDPOINT, MODEL_KEY
13
- from faster_whisper import WhisperModel
14
- from typing import Dict
15
  import shutil
16
  import subprocess as sp
17
  import tempfile
18
- import pandas as pd
19
  import textwrap
20
- import io
21
- import json
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- #=========================================
 
 
 
 
 
 
24
  # Search Tools
25
- #=========================================
 
 
26
  @tool
27
  def wiki_search(query: str) -> str:
28
  """
@@ -38,14 +45,14 @@ def wiki_search(query: str) -> str:
38
  for doc in docs:
39
  # Get the standard wiki summary
40
  wiki_summary = f"\nTitle: {doc.metadata.get('title')}\nURL: {doc.metadata.get('source')}\n\n"
41
-
42
  # Scrape and clean the full webpage
43
  try:
44
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
45
  response = requests.get(doc.metadata.get('source'), headers=headers)
46
  response.raise_for_status()
47
  soup = BeautifulSoup(response.text, 'html.parser')
48
-
49
  # Remove unwanted elements
50
  unwanted_elements = [
51
  '.mw-jump-link', '.mw-editsection', '.reference', # Wiki specific
@@ -56,7 +63,7 @@ def wiki_search(query: str) -> str:
56
  ]
57
  for element in soup.select(','.join(unwanted_elements)):
58
  element.decompose()
59
-
60
  # Get main content area
61
  content_div = soup.select_one('#mw-content-text')
62
  if content_div:
@@ -67,18 +74,19 @@ def wiki_search(query: str) -> str:
67
  else:
68
  full_text = soup.get_text(separator='\n', strip=True)
69
 
70
-
71
  # Combine wiki summary with cleaned webpage content
72
  combined_result = f"{wiki_summary}\n### Full Article Content ###\n{full_text}"
73
  results.append(combined_result)
74
-
75
  except Exception as e:
 
76
  results.append(wiki_summary)
77
 
78
  # Join all results with clear separators
79
- formatted_results = "\n\n" + "="*20 + "\n\n".join(results)
80
  return formatted_results
81
 
 
82
  @tool
83
  def tavily_search(query: str) -> str:
84
  """
@@ -101,6 +109,7 @@ def tavily_search(query: str) -> str:
101
 
102
  return formatted_results
103
 
 
104
  @tool
105
  def arxiv_search(query: str) -> str:
106
  """
@@ -123,6 +132,7 @@ def arxiv_search(query: str) -> str:
123
 
124
  return formatted_results
125
 
 
126
  @tool
127
  def scrape_webpage(url: str) -> str:
128
  """
@@ -137,20 +147,23 @@ def scrape_webpage(url: str) -> str:
137
  response = requests.get(url, headers=headers)
138
  response.raise_for_status()
139
  soup = BeautifulSoup(response.text, 'html.parser')
140
-
141
  # Remove script and style elements
142
  for script in soup(['script', 'style']):
143
  script.decompose()
144
-
145
  # Get text content
146
  text = soup.get_text(separator='\n', strip=True)
147
  return text
148
  except Exception as e:
149
  return f"Error scraping webpage: {str(e)}"
150
 
151
- #=========================================
 
152
  # Math Tools
153
- #=========================================
 
 
154
  @tool
155
  def add(x: float, y: float) -> float:
156
  """
@@ -163,6 +176,7 @@ def add(x: float, y: float) -> float:
163
  """
164
  return x + y
165
 
 
166
  @tool
167
  def subtract(x: float, y: float) -> float:
168
  """
@@ -175,6 +189,7 @@ def subtract(x: float, y: float) -> float:
175
  """
176
  return x - y
177
 
 
178
  @tool
179
  def multiply(x: float, y: float) -> float:
180
  """
@@ -187,6 +202,7 @@ def multiply(x: float, y: float) -> float:
187
  """
188
  return x * y
189
 
 
190
  @tool
191
  def divide(x: float, y: float) -> float:
192
  """
@@ -201,6 +217,7 @@ def divide(x: float, y: float) -> float:
201
  raise ValueError("Cannot divide by zero.")
202
  return x / y
203
 
 
204
  @tool
205
  def power(x: float, y: float) -> float:
206
  """
@@ -213,6 +230,7 @@ def power(x: float, y: float) -> float:
213
  """
214
  return x ** y
215
 
 
216
  @tool
217
  def sqrt(x: float) -> float:
218
  """
@@ -226,6 +244,7 @@ def sqrt(x: float) -> float:
226
  raise ValueError("Cannot calculate square root of a negative number.")
227
  return x ** 0.5
228
 
 
229
  @tool
230
  def modulus(x: float, y: float) -> float:
231
  """
@@ -238,6 +257,7 @@ def modulus(x: float, y: float) -> float:
238
  """
239
  return x % y
240
 
 
241
  @tool
242
  def is_commutative(set_elements: list, operation_table: list) -> bool:
243
  """
@@ -255,6 +275,7 @@ def is_commutative(set_elements: list, operation_table: list) -> bool:
255
  return False
256
  return True
257
 
 
258
  @tool
259
  def commutativity_counterexample_pairs(set_elements: list, operation_table: list) -> list:
260
  """
@@ -273,6 +294,7 @@ def commutativity_counterexample_pairs(set_elements: list, operation_table: list
273
  pairs.append((set_elements[i], set_elements[j]))
274
  return pairs
275
 
 
276
  @tool
277
  def commutativity_counterexample_elements(set_elements: list, operation_table: list) -> str:
278
  """
@@ -292,6 +314,7 @@ def commutativity_counterexample_elements(set_elements: list, operation_table: l
292
  involved.add(set_elements[j])
293
  return ",".join(sorted(involved))
294
 
 
295
  @tool
296
  def is_associative(set_elements: list, operation_table: list) -> bool:
297
  """
@@ -317,6 +340,7 @@ def is_associative(set_elements: list, operation_table: list) -> bool:
317
  return False
318
  return True
319
 
 
320
  @tool
321
  def find_identity_element(set_elements: list, operation_table: list) -> str:
322
  """
@@ -339,6 +363,7 @@ def find_identity_element(set_elements: list, operation_table: list) -> str:
339
  return candidate
340
  return ""
341
 
 
342
  @tool
343
  def find_inverses(set_elements: list, operation_table: list) -> dict:
344
  """
@@ -353,8 +378,6 @@ def find_inverses(set_elements: list, operation_table: list) -> dict:
353
  identity = find_identity_element(set_elements, operation_table)
354
  if not identity:
355
  return {e: None for e in set_elements}
356
- idx = {e: i for i, e in enumerate(set_elements)}
357
- identity_idx = idx[identity]
358
  inverses = {}
359
  for i in range(n):
360
  found = None
@@ -365,9 +388,12 @@ def find_inverses(set_elements: list, operation_table: list) -> dict:
365
  inverses[set_elements[i]] = found
366
  return inverses
367
 
368
- #=========================================
 
369
  # Image Tools
370
- #=========================================
 
 
371
  @tool
372
  def analyze_image(question: str, path: str) -> str:
373
  """
@@ -387,7 +413,7 @@ def analyze_image(question: str, path: str) -> str:
387
  p = Path(path).expanduser().resolve()
388
  if not p.exists():
389
  raise ValueError(f"Image file does not exist: {p}")
390
-
391
  mime = "image/png" if p.suffix.lower() == ".png" else "image/jpeg"
392
  with open(p, "rb") as f:
393
  base64_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode('utf-8')}"
@@ -407,9 +433,12 @@ def analyze_image(question: str, path: str) -> str:
407
 
408
  return response.choices[0].message.content.strip()
409
 
410
- #=========================================
 
411
  # Audio Tools
412
- #=========================================
 
 
413
  @tool
414
  def transcribe_audio(path: str) -> str:
415
  """
@@ -433,21 +462,24 @@ def transcribe_audio(path: str) -> str:
433
  text = "".join(seg.text for seg in segments).strip()
434
  return text
435
 
436
- #=========================================
 
437
  # Code Tools
438
- #=========================================
 
439
  LANG_COMMANDS: Dict[str, callable] = {
440
- ".py": lambda s, _:[["python3", s.name]],
441
- ".js": lambda s, _:[["node", s.name]],
442
- ".ts": lambda s, _:[["deno", "run", "-A", s.name]],
443
- ".sh": lambda s, _:[["bash", s.name]],
444
- ".rb": lambda s, _:[["ruby", s.name]],
445
- ".php": lambda s, _:[["php", s.name]],
446
- ".go": lambda s, _:[["go", "run", s.name]]
447
  }
448
 
 
449
  @tool
450
- def execute_source_file(path: str, timeout: int=10) -> str:
451
  """
452
  Run the program contained in *path*
453
  Returns a newline-separated string:
@@ -463,7 +495,7 @@ def execute_source_file(path: str, timeout: int=10) -> str:
463
  src = Path(path).expanduser().resolve(strict=True)
464
  if src.suffix not in LANG_COMMANDS:
465
  raise ValueError(f"Unsupported file extension: {src.suffix}")
466
-
467
  # Temp work dir for the program
468
  work = Path(tempfile.mkdtemp(prefix="exec_tool_"))
469
  shutil.copy(src, work / src.name)
@@ -490,15 +522,18 @@ def execute_source_file(path: str, timeout: int=10) -> str:
490
  f"STDOUT: {full_out}\n"
491
  f"STDERR: {full_err}"
492
  )
493
-
494
  finally:
495
  shutil.rmtree(work)
496
 
497
- #=========================================
 
498
  # Tabular data tools
499
- #=========================================
 
500
  MAX_BYTES_RETURN = 200000
501
 
 
502
  # Helper functions
503
  def _load_table(path: Path, sheet: str) -> pd.DataFrame:
504
  """
@@ -518,6 +553,7 @@ def _load_table(path: Path, sheet: str) -> pd.DataFrame:
518
  return pd.read_parquet(path)
519
  raise ValueError(f"Unsupported file extension: {ext}")
520
 
 
521
  def _safe_truncate(text: str, limit: int = MAX_BYTES_RETURN) -> tuple[str, bool]:
522
  """
523
  Truncate text to a given limit.
@@ -580,7 +616,7 @@ def interact_tabular(file_path: str, operation: str = "summary", sheet: str = "S
580
  result = buf.getvalue()
581
  else:
582
  raise ValueError(f"Unsupported operation: {operation}")
583
-
584
  result, truncated = _safe_truncate(result)
585
 
586
  info = {
 
 
 
 
 
 
 
 
 
 
1
  import base64
2
+ import io
3
+ import json
 
 
4
  import shutil
5
  import subprocess as sp
6
  import tempfile
 
7
  import textwrap
8
+ from pathlib import Path
9
+ from typing import Dict
10
+ import pandas as pd
11
+ import requests
12
+ from bs4 import BeautifulSoup
13
+
14
+ from config import (
15
+ TAVILY_API_KEY,
16
+ MODEL_NAME,
17
+ MODEL_API_VERSION,
18
+ MODEL_ENDPOINT,
19
+ MODEL_KEY,
20
+ )
21
 
22
+ from langchain_core.tools import tool
23
+ from langchain_community.tools.tavily_search import TavilySearchResults
24
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
25
+ from openai import AzureOpenAI
26
+ from faster_whisper import WhisperModel
27
+
28
+ # =========================================
29
  # Search Tools
30
+ # =========================================
31
+
32
+
33
  @tool
34
  def wiki_search(query: str) -> str:
35
  """
 
45
  for doc in docs:
46
  # Get the standard wiki summary
47
  wiki_summary = f"\nTitle: {doc.metadata.get('title')}\nURL: {doc.metadata.get('source')}\n\n"
48
+
49
  # Scrape and clean the full webpage
50
  try:
51
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
52
  response = requests.get(doc.metadata.get('source'), headers=headers)
53
  response.raise_for_status()
54
  soup = BeautifulSoup(response.text, 'html.parser')
55
+
56
  # Remove unwanted elements
57
  unwanted_elements = [
58
  '.mw-jump-link', '.mw-editsection', '.reference', # Wiki specific
 
63
  ]
64
  for element in soup.select(','.join(unwanted_elements)):
65
  element.decompose()
66
+
67
  # Get main content area
68
  content_div = soup.select_one('#mw-content-text')
69
  if content_div:
 
74
  else:
75
  full_text = soup.get_text(separator='\n', strip=True)
76
 
 
77
  # Combine wiki summary with cleaned webpage content
78
  combined_result = f"{wiki_summary}\n### Full Article Content ###\n{full_text}"
79
  results.append(combined_result)
80
+
81
  except Exception as e:
82
+ print(f"Error scraping Wikipedia page: {e}")
83
  results.append(wiki_summary)
84
 
85
  # Join all results with clear separators
86
+ formatted_results = "\n\n" + "=" * 20 + "\n\n".join(results)
87
  return formatted_results
88
 
89
+
90
  @tool
91
  def tavily_search(query: str) -> str:
92
  """
 
109
 
110
  return formatted_results
111
 
112
+
113
  @tool
114
  def arxiv_search(query: str) -> str:
115
  """
 
132
 
133
  return formatted_results
134
 
135
+
136
  @tool
137
  def scrape_webpage(url: str) -> str:
138
  """
 
147
  response = requests.get(url, headers=headers)
148
  response.raise_for_status()
149
  soup = BeautifulSoup(response.text, 'html.parser')
150
+
151
  # Remove script and style elements
152
  for script in soup(['script', 'style']):
153
  script.decompose()
154
+
155
  # Get text content
156
  text = soup.get_text(separator='\n', strip=True)
157
  return text
158
  except Exception as e:
159
  return f"Error scraping webpage: {str(e)}"
160
 
161
+
162
+ # =========================================
163
  # Math Tools
164
+ # =========================================
165
+
166
+
167
  @tool
168
  def add(x: float, y: float) -> float:
169
  """
 
176
  """
177
  return x + y
178
 
179
+
180
  @tool
181
  def subtract(x: float, y: float) -> float:
182
  """
 
189
  """
190
  return x - y
191
 
192
+
193
  @tool
194
  def multiply(x: float, y: float) -> float:
195
  """
 
202
  """
203
  return x * y
204
 
205
+
206
  @tool
207
  def divide(x: float, y: float) -> float:
208
  """
 
217
  raise ValueError("Cannot divide by zero.")
218
  return x / y
219
 
220
+
221
  @tool
222
  def power(x: float, y: float) -> float:
223
  """
 
230
  """
231
  return x ** y
232
 
233
+
234
  @tool
235
  def sqrt(x: float) -> float:
236
  """
 
244
  raise ValueError("Cannot calculate square root of a negative number.")
245
  return x ** 0.5
246
 
247
+
248
  @tool
249
  def modulus(x: float, y: float) -> float:
250
  """
 
257
  """
258
  return x % y
259
 
260
+
261
  @tool
262
  def is_commutative(set_elements: list, operation_table: list) -> bool:
263
  """
 
275
  return False
276
  return True
277
 
278
+
279
  @tool
280
  def commutativity_counterexample_pairs(set_elements: list, operation_table: list) -> list:
281
  """
 
294
  pairs.append((set_elements[i], set_elements[j]))
295
  return pairs
296
 
297
+
298
  @tool
299
  def commutativity_counterexample_elements(set_elements: list, operation_table: list) -> str:
300
  """
 
314
  involved.add(set_elements[j])
315
  return ",".join(sorted(involved))
316
 
317
+
318
  @tool
319
  def is_associative(set_elements: list, operation_table: list) -> bool:
320
  """
 
340
  return False
341
  return True
342
 
343
+
344
  @tool
345
  def find_identity_element(set_elements: list, operation_table: list) -> str:
346
  """
 
363
  return candidate
364
  return ""
365
 
366
+
367
  @tool
368
  def find_inverses(set_elements: list, operation_table: list) -> dict:
369
  """
 
378
  identity = find_identity_element(set_elements, operation_table)
379
  if not identity:
380
  return {e: None for e in set_elements}
 
 
381
  inverses = {}
382
  for i in range(n):
383
  found = None
 
388
  inverses[set_elements[i]] = found
389
  return inverses
390
 
391
+
392
+ # =========================================
393
  # Image Tools
394
+ # =========================================
395
+
396
+
397
  @tool
398
  def analyze_image(question: str, path: str) -> str:
399
  """
 
413
  p = Path(path).expanduser().resolve()
414
  if not p.exists():
415
  raise ValueError(f"Image file does not exist: {p}")
416
+
417
  mime = "image/png" if p.suffix.lower() == ".png" else "image/jpeg"
418
  with open(p, "rb") as f:
419
  base64_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode('utf-8')}"
 
433
 
434
  return response.choices[0].message.content.strip()
435
 
436
+
437
+ # =========================================
438
  # Audio Tools
439
+ # =========================================
440
+
441
+
442
  @tool
443
  def transcribe_audio(path: str) -> str:
444
  """
 
462
  text = "".join(seg.text for seg in segments).strip()
463
  return text
464
 
465
+
466
+ # =========================================
467
  # Code Tools
468
+ # =========================================
469
+
470
  LANG_COMMANDS: Dict[str, callable] = {
471
+ ".py": lambda s, _: [["python3", s.name]],
472
+ ".js": lambda s, _: [["node", s.name]],
473
+ ".ts": lambda s, _: [["deno", "run", "-A", s.name]],
474
+ ".sh": lambda s, _: [["bash", s.name]],
475
+ ".rb": lambda s, _: [["ruby", s.name]],
476
+ ".php": lambda s, _: [["php", s.name]],
477
+ ".go": lambda s, _: [["go", "run", s.name]]
478
  }
479
 
480
+
481
  @tool
482
+ def execute_source_file(path: str, timeout: int = 10) -> str:
483
  """
484
  Run the program contained in *path*
485
  Returns a newline-separated string:
 
495
  src = Path(path).expanduser().resolve(strict=True)
496
  if src.suffix not in LANG_COMMANDS:
497
  raise ValueError(f"Unsupported file extension: {src.suffix}")
498
+
499
  # Temp work dir for the program
500
  work = Path(tempfile.mkdtemp(prefix="exec_tool_"))
501
  shutil.copy(src, work / src.name)
 
522
  f"STDOUT: {full_out}\n"
523
  f"STDERR: {full_err}"
524
  )
525
+
526
  finally:
527
  shutil.rmtree(work)
528
 
529
+
530
+ # =========================================
531
  # Tabular data tools
532
+ # =========================================
533
+
534
  MAX_BYTES_RETURN = 200000
535
 
536
+
537
  # Helper functions
538
  def _load_table(path: Path, sheet: str) -> pd.DataFrame:
539
  """
 
553
  return pd.read_parquet(path)
554
  raise ValueError(f"Unsupported file extension: {ext}")
555
 
556
+
557
  def _safe_truncate(text: str, limit: int = MAX_BYTES_RETURN) -> tuple[str, bool]:
558
  """
559
  Truncate text to a given limit.
 
616
  result = buf.getvalue()
617
  else:
618
  raise ValueError(f"Unsupported operation: {operation}")
619
+
620
  result, truncated = _safe_truncate(result)
621
 
622
  info = {