RalphThings commited on
Commit
1d46a94
·
verified ·
1 Parent(s): 7d89fac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +437 -73
app.py CHANGED
@@ -1,10 +1,12 @@
1
  import os, re, requests, pandas as pd, gradio as gr
2
- from transformers import pipeline
3
  from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
 
4
  from langchain.tools import tool
5
  from langchain_core.output_parsers import JsonOutputParser
6
  from langchain.agents import AgentExecutor, create_react_agent, initialize_agent, AgentType
7
  from youtube_transcript_api import YouTubeTranscriptApi
 
8
  import chess, chess.engine
9
  from bs4 import BeautifulSoup
10
  from SPARQLWrapper import SPARQLWrapper, JSON
@@ -14,81 +16,423 @@ from SPARQLWrapper import SPARQLWrapper, JSON
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
  HF_TOKEN = os.getenv("HF_TOKEN", None)
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  @tool
18
  def wiki_get_page(title: str) -> str:
19
- """Fetch raw wikitext for a given Wikipedia page title"""
 
 
 
 
20
  API = "https://en.wikipedia.org/w/api.php"
21
- params = {"action": "query", "format": "json", "prop": "revisions", "rvprop": "content", "rvslots": "*", "titles": title}
22
- data = requests.get(API, params=params, timeout=10).json()
23
- page = next(iter(data["query"]["pages"].values()))
24
- return page["revisions"][0]["slots"]["main"]["*"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  @tool
27
  def youtube_transcript(video_id: str) -> str:
28
- """Retrieve transcript for a YouTube video ID"""
29
- transcript = YouTubeTranscriptApi().fetch_transcript(video_id)
30
- return " ".join(t["text"] for t in transcript)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  @tool
33
  def reverse_text(text: str) -> str:
34
- """Reverse the input string"""
 
 
35
  return text[::-1]
36
 
37
  @tool
38
- def find_non_commutative(table: dict) -> list:
39
- """Elements involved in non-commutativity"""
40
- elems = set(x for x,_ in table.keys())
41
- bad = set()
42
- for x in elems:
43
- for y in elems:
44
- if table[(x,y)] != table[(y,x)]:
45
- bad.update([x,y])
46
- return sorted(bad)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  @tool
49
  def libretext_extract(query: str) -> str:
50
- """Input 'url||css_selector', returns the first matching element's text"""
51
- url, selector = query.split("||", 1)
52
- r = requests.get(url, timeout=10)
53
- soup = BeautifulSoup(r.text, "html.parser")
54
- return soup.select_one(selector).get_text(strip=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  @tool
57
- def classify_vegetables(items: list) -> list:
58
- """Alphabetize true vegetables"""
59
- VEGETABLE_SET = {"bell pepper","broccoli","celery","green beans","lettuce","zucchini","sweet potatoes"}
60
- return sorted([i for i in items if i in VEGETABLE_SET])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  @tool
 
 
63
  def execute_code(code: str) -> str:
64
- """Execute code and return `output`"""
 
 
 
 
 
 
 
65
  local_ns = {}
66
- exec(code, {"__builtins__": {}}, local_ns)
67
- return str(local_ns.get("output", ""))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  @tool
70
  def least_athletes_olympics(year: int) -> str:
71
- """IOC code of least-athlete country"""
72
- url = f"https://en.wikipedia.org/wiki/{year}_Summer_Olympics"
73
- r = requests.get(url)
74
- soup = BeautifulSoup(r.text,"html.parser")
75
- table = soup.find("table","wikitable")
76
- rows = table.find_all("tr")[1:]
77
- data = [(r.find_all("td")[0].get_text(strip=True), int(r.find_all("td")[1].get_text(strip=True))) for r in rows]
78
- min_val = min(c for _,c in data)
79
- candidates = sorted([code for code,count in data if count==min_val])
80
- return candidates[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  @tool
83
  def get_nasa_award_number(qid: str) -> str:
84
- """NASA award number for Wikidata QID"""
 
 
 
 
 
 
 
85
  sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
86
- sparql.setQuery(f'SELECT ?award WHERE {{ wd:{qid} wdt:P496 ?award. }}')
 
 
 
 
 
 
 
 
 
87
  sparql.setReturnFormat(JSON)
88
- res = sparql.query().convert()
89
- return res["results"]["bindings"][0]["award"]["value"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  TOOLS = [
 
 
 
 
 
92
  wiki_get_page,
93
  youtube_transcript,
94
  reverse_text,
@@ -101,6 +445,7 @@ TOOLS = [
101
  ]
102
 
103
  SYSTEM_MESSAGE = """You are a concise AI assistant with access to the following tools:
 
104
  - wiki_get_page(title: string) → string
105
  - youtube_transcript(video_id: string) → string
106
  - reverse_text(text: string) → string
@@ -110,9 +455,15 @@ SYSTEM_MESSAGE = """You are a concise AI assistant with access to the following
110
  - execute_code(code: string) → string
111
  - least_athletes_olympics(year: int) → string
112
  - get_nasa_award_number(qid: string) → string
 
 
 
 
113
  When you need to use a tool, respond exactly with:
114
  Action: <tool_name>(<arg_name>=<value>, ...)
115
  Then wait for the tool’s output before continuing.
 
 
116
  Once you have all the information, provide your final answer in as few words as possible, with no extra commentary or prefixes.
117
  """
118
 
@@ -123,45 +474,58 @@ class BasicAgent:
123
  # initialize HF inference pipeline once
124
  if HF_TOKEN is None:
125
  raise ValueError("HF_TOKEN not set in environment")
126
- pipe = pipeline("text-generation", model="EleutherAI/gpt-neo-125M", max_new_tokens=16)
127
- self.llm = HuggingFacePipeline(pipeline=pipe) #.bind_tools(TOOLS)
128
- #hf_pipe = HuggingFacePipeline.from_model_id(
129
- # model_id="EleutherAI/gpt-neo-125M",
130
- # task="text-generation",
131
- # pipeline_kwargs={"max_new_tokens":16},
132
- #)
133
- #chat = ChatHuggingFace(llm=hf_pipe) # wrap in chat‐model
134
- #self.llm = chat.bind_tools(TOOLS) # now this works :contentReference[oaicite:0]{index=0}
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  self.agent = initialize_agent(
137
  tools=TOOLS,
138
  llm=self.llm,
139
  agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
140
- system_message=SYSTEM_MESSAGE,
141
  verbose=True,
142
- handle_parsing_errors=True
 
143
  )
144
-
145
- # The GAIA system prompt (no "FINAL ANSWER:" at the end)
146
- #self.system_prompt = SYSTEM_MESSAGE
147
  print("BasicAgent initialized with LLM.")
148
 
149
  # --- Core dispatcher/fallback ---
150
  def __call__(self, question: str) -> str:
151
- #prompt = f"{self.system_prompt}Q: {question}\nA:"
152
- #out = self.generator(prompt, max_new_tokens=16, return_full_text=False)
153
- #return out[0]["generated_text"].strip()
154
- # build a zero-shot-react-description agent for LLM+tools
155
- #agent_executor = initialize_agent(tools=TOOLS, llm=self.llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
156
- # simply run the agent on the user’s question
157
- #answer = agent_executor.run(question)
158
- #return answer.strip()
159
- return self.agent.run(question).strip()
160
- #agent = create_react_agent(llm=self.llm, tools=TOOLS, prompt=prompt)
161
- #agent = AgentExecutor(agent=agent, tools=TOOLS, verbose=True, return_intermediate_steps=False)
162
- #agent = AgentExecutor(agent=self.llm, tools=TOOLS, prompt=prompt, verbose=False, return_intermediate_steps=False)
163
- #result = agent.invoke({"input": question})
164
- #return JsonOutputParser().parse(result)
165
 
166
  def run_and_submit_all( profile: gr.OAuthProfile | None):
167
  """
 
1
  import os, re, requests, pandas as pd, gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
  from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
4
+ from langchain_community.tools import DuckDuckGoSearchRun
5
  from langchain.tools import tool
6
  from langchain_core.output_parsers import JsonOutputParser
7
  from langchain.agents import AgentExecutor, create_react_agent, initialize_agent, AgentType
8
  from youtube_transcript_api import YouTubeTranscriptApi
9
+ import whisper
10
  import chess, chess.engine
11
  from bs4 import BeautifulSoup
12
  from SPARQLWrapper import SPARQLWrapper, JSON
 
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
  HF_TOKEN = os.getenv("HF_TOKEN", None)
18
 
19
+ @tool
20
+ def web_search(query: str) -> str:
21
+ """Runs a web search and returns the results."""
22
+ search = DuckDuckGoSearchRun()
23
+ return search.run(query)
24
+
25
+ @tool
26
+ def read_file(file_path: str) -> str:
27
+ """Reads the content of a text file."""
28
+ try:
29
+ with open(file_path, 'r', encoding='utf-8') as f:
30
+ return f.read()
31
+ except Exception as e:
32
+ return f"Error reading file {file_path}: {e}"
33
+
34
+ @tool
35
+ def read_excel_cell(file_path: str, sheet_name: str | int = 0, row: int, col: int) -> str:
36
+ """Reads a specific cell from an Excel file (1-based index for row/col)."""
37
+ try:
38
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
39
+ return str(df.iloc[row-1, col-1])
40
+ except Exception as e:
41
+ return f"Error reading Excel file {file_path}: {e}"
42
+
43
+ @tool
44
+ def transcribe_audio(file_path: str) -> str:
45
+ """Transcribes audio from a file path."""
46
+ try:
47
+ # Load model here or use pre-loaded one
48
+ model = whisper.load_model("base") # Or tiny, small, medium, large
49
+ result = model.transcribe(file_path)
50
+ return result["text"]
51
+ except Exception as e:
52
+ return f"Error transcribing audio file {file_path}: {e}"
53
+
54
+ @tool
55
+ def analyze_sales_data(file_path: str) -> str:
56
+ """Reads the specific sales data Excel file, calculates total food sales."""
57
+ try:
58
+ df = pd.read_excel(file_path)
59
+ # Assuming columns 'Category' and 'Total Sales' exist
60
+ food_sales = df[df['Category'] != 'Drink']['Total Sales'].sum()
61
+ return f"${food_sales:.2f}" # Format as USD
62
+ except Exception as e:
63
+ return f"Error processing sales data from {file_path}: {e}"
64
+
65
+ @tool
66
+ def find_chess_mate_move(fen: str, engine_path: str = "/usr/bin/stockfish") -> str:
67
+ """
68
+ Given a FEN string representing a chess position (Black to move),
69
+ finds the best move that guarantees a win using Stockfish engine.
70
+ Requires Stockfish engine installed at engine_path.
71
+ Returns the move in algebraic notation (e.g., 'Qh4').
72
+ """
73
+ try:
74
+ engine = chess.engine.SimpleEngine.popen_uci(engine_path)
75
+ board = chess.Board(fen)
76
+ if board.turn != chess.BLACK:
77
+ return "Error: It's not Black's turn in the provided FEN."
78
+ info = engine.analyse(board, chess.engine.Limit(time=2.0))
79
+
80
+ score = info.get("score")
81
+ if score is not None and score.is_mate():
82
+ mate_score = score.white().mate()
83
+ if mate_score < 0:
84
+ best_move = info["pv"][0]
85
+ engine.quit()
86
+ return best_move.uci()
87
+ elif score is not None and score.relative.score(mate_score=10000) < -500: # Significant advantage for Black (-5 pawns)
88
+ best_move = info["pv"][0]
89
+ engine.quit()
90
+ return best_move.uci()
91
+
92
+ result = engine.play(board, chess.engine.Limit(time=1.0)) # Get a move anyway
93
+ engine.quit()
94
+ #return f"No guaranteed mate found quickly. Best move found: {result.move.uci()}"
95
+ return result.move.uci() # Return best move found even if not provably mate
96
+
97
+ except Exception as e:
98
+ return f"Chess engine error: {e}. Is Stockfish installed at {engine_path} and is the FEN valid?"
99
+
100
  @tool
101
  def wiki_get_page(title: str) -> str:
102
+ """
103
+ Fetch raw wikitext content for a given English Wikipedia page title.
104
+ Returns the page content as a string or an error message.
105
+ Note: Raw wikitext can be complex to parse.
106
+ """
107
  API = "https://en.wikipedia.org/w/api.php"
108
+ params = {
109
+ "action": "query",
110
+ "format": "json",
111
+ "prop": "revisions",
112
+ "rvprop": "content",
113
+ "rvslots": "*",
114
+ "titles": title,
115
+ "redirects": 1 # Automatically follow redirects
116
+ }
117
+ try:
118
+ response = requests.get(API, params=params, timeout=REQUESTS_TIMEOUT, headers=HEADERS)
119
+ response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
120
+ data = response.json()
121
+ page = next(iter(data["query"]["pages"].values()))
122
+
123
+ if "missing" in page:
124
+ return f"Error: Wikipedia page '{title}' not found."
125
+ if "invalid" in page:
126
+ return f"Error: Invalid page title '{title}' requested."
127
+ if "revisions" not in page or not page["revisions"]:
128
+ return f"Error: No revisions found for page '{title}' (page might be empty or protected)."
129
+
130
+ # Access content safely
131
+ content = page["revisions"][0].get("slots", {}).get("main", {}).get("*")
132
+ if content is None:
133
+ return f"Error: Could not extract main content slot for page '{title}'."
134
+ return content
135
+
136
+ except requests.exceptions.RequestException as e:
137
+ return f"Error fetching Wikipedia page '{title}': Network error - {e}"
138
+ except KeyError as e:
139
+ return f"Error parsing Wikipedia response for '{title}': Unexpected structure - missing key {e}"
140
+ except Exception as e:
141
+ return f"An unexpected error occurred fetching Wikipedia page '{title}': {e}"
142
 
143
  @tool
144
  def youtube_transcript(video_id: str) -> str:
145
+ """
146
+ Retrieve the English transcript for a given YouTube video ID.
147
+ Returns the transcript as a single string or an error message.
148
+ """
149
+ try:
150
+ # Fetch available transcripts and prioritize English
151
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
152
+ transcript = transcript_list.find_generated_transcript(['en']) # Prefer generated English
153
+ # You could add fallbacks here for manual 'en' or other languages if needed
154
+ # transcript = transcript_list.find_manually_created_transcript(['en'])
155
+ # transcript = transcript_list.find_transcript(['en', 'en-US', ...])
156
+
157
+ full_transcript = transcript.fetch()
158
+ return " ".join(t["text"] for t in full_transcript)
159
+ except (TranscriptsDisabled, NoTranscriptFound):
160
+ return f"Error: Transcripts are disabled or no English transcript found for YouTube video ID '{video_id}'."
161
+ except Exception as e:
162
+ # Catch other potential errors from the API or network issues
163
+ return f"An unexpected error occurred fetching transcript for YouTube video ID '{video_id}': {e}"
164
 
165
  @tool
166
  def reverse_text(text: str) -> str:
167
+ """Reverses the input string character by character."""
168
+ if not isinstance(text, str):
169
+ return "Error: Input must be a string."
170
  return text[::-1]
171
 
172
  @tool
173
+ def find_non_commutative(table: dict) -> str:
174
+ """
175
+ Given a dictionary representing a multiplication table (keys are tuples (row_elem, col_elem)),
176
+ finds all elements involved in non-commutative pairs (where table[(x,y)] != table[(y,x)]).
177
+ Returns a comma-separated list of these elements in alphabetical order, or an error message.
178
+ Example input: {('a','a'):'a', ('a','b'):'c', ('b','a'):'b', ...}
179
+ """
180
+ try:
181
+ if not isinstance(table, dict):
182
+ return "Error: Input must be a dictionary."
183
+ if not all(isinstance(k, tuple) and len(k) == 2 for k in table.keys()):
184
+ return "Error: Dictionary keys must be tuples of length 2, e.g., ('a', 'b')."
185
+
186
+ elems = sorted(list(set(x for k in table.keys() for x in k))) # Get all unique elements alphabetically
187
+ bad_elements = set()
188
+
189
+ for x in elems:
190
+ for y in elems:
191
+ # Check if both pairs exist in the table before comparing
192
+ pair_xy = (x, y)
193
+ pair_yx = (y, x)
194
+ if pair_xy in table and pair_yx in table:
195
+ if table[pair_xy] != table[pair_yx]:
196
+ bad_elements.add(x)
197
+ bad_elements.add(y)
198
+ # Optional: Handle cases where one pair exists but the other doesn't,
199
+ # depending on how strictly commutativity should be defined for partial tables.
200
+ # else:
201
+ # # If one exists and the other doesn't, it could be considered non-commutative
202
+ # # or simply an incomplete table. Current logic ignores this.
203
+ # pass
204
+
205
+
206
+ if not bad_elements:
207
+ return "Result: The operation defined by the table is commutative for all checked pairs."
208
+ return ",".join(sorted(list(bad_elements)))
209
+
210
+ except Exception as e:
211
+ return f"An unexpected error occurred processing the table: {e}"
212
+
213
 
214
  @tool
215
  def libretext_extract(query: str) -> str:
216
+ """
217
+ Extracts text content from a web page using a URL and a CSS selector.
218
+ Input must be a string formatted as 'url||css_selector'.
219
+ Returns the text of the first matching element or an error message.
220
+ """
221
+ try:
222
+ if "||" not in query:
223
+ return "Error: Input format must be 'url||css_selector'."
224
+ url, selector = query.split("||", 1)
225
+
226
+ response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=HEADERS)
227
+ response.raise_for_status()
228
+ soup = BeautifulSoup(response.text, "html.parser")
229
+ element = soup.select_one(selector)
230
+
231
+ if element:
232
+ return element.get_text(strip=True)
233
+ else:
234
+ return f"Error: CSS selector '{selector}' did not find any elements on page {url}."
235
+
236
+ except requests.exceptions.RequestException as e:
237
+ return f"Error fetching URL '{url}': Network error - {e}"
238
+ except Exception as e:
239
+ # Catch potential errors from BeautifulSoup or invalid selectors
240
+ return f"An unexpected error occurred during extraction from {url}: {e}"
241
 
242
  @tool
243
+ def classify_vegetables(items: list) -> str:
244
+ """
245
+ Filters a list of items, keeping only those considered common culinary vegetables.
246
+ Returns a comma-separated, alphabetized list of the identified vegetables.
247
+ Note: This uses a predefined list and may not align perfectly with botanical definitions
248
+ (e.g., tomatoes, bell peppers are botanically fruits but often treated as vegetables).
249
+ Input items should be strings.
250
+ """
251
+ # Using a case-insensitive comparison by converting known veggies to lowercase
252
+ # Added more items, still imperfect and culturally dependent.
253
+ VEGETABLE_SET = {
254
+ "broccoli", "celery", "green beans", "lettuce", "zucchini", "sweet potato", # original + fixed space
255
+ "carrot", "spinach", "kale", "onion", "garlic", "potato", "cabbage", "asparagus",
256
+ "cucumber", # Botanically fruit, culinary vegetable
257
+ "bell pepper", # Botanically fruit, culinary vegetable
258
+ "corn", # Botanically fruit/grain, culinary vegetable
259
+ # Avoid controversial ones like tomato unless explicitly needed
260
+ }
261
+ try:
262
+ if not isinstance(items, list):
263
+ return "Error: Input must be a list of strings."
264
+ # Filter using lowercase comparison
265
+ vegetables = sorted([item for item in items if isinstance(item, str) and item.lower() in VEGETABLE_SET])
266
+ if not vegetables:
267
+ return "Result: No items from the list were classified as vegetables based on the predefined set."
268
+ return ",".join(vegetables)
269
+ except Exception as e:
270
+ return f"An unexpected error occurred classifying vegetables: {e}"
271
 
272
  @tool
273
+ # Optional: Add timeout to prevent runaway code execution
274
+ # @timeout_decorator.timeout(10, timeout_exception=TimeoutError) # Limit execution to 10 seconds
275
  def execute_code(code: str) -> str:
276
+ """
277
+ Executes a given Python code snippet and returns the value of the 'output' variable.
278
+ WARNING: Executes arbitrary code. Use with extreme caution in trusted environments only.
279
+ The code runs in a restricted environment, but vulnerabilities might exist.
280
+ The code should assign its result to a variable named 'output'.
281
+ Example: "output = sum([1, 2, 3])"
282
+ """
283
+ print(f"[!!!] Executing potentially unsafe code:\n---\n{code}\n---") # Log execution
284
  local_ns = {}
285
+ # Restrict builtins more severely for safety. Allow only necessary ones.
286
+ # This is still not perfectly safe. Sandboxing is complex.
287
+ safe_builtins = {
288
+ 'print': print, # Allow print for debugging within the code
289
+ 'range': range, 'len': len, 'list': list, 'dict': dict, 'set': set,
290
+ 'str': str, 'int': int, 'float': float, 'bool': bool, 'sum': sum,
291
+ 'min': min, 'max': max, 'abs': abs, 'pow': pow, 'round': round,
292
+ 'True': True, 'False': False, 'None': None,
293
+ # Add other safe builtins carefully if absolutely required by expected code snippets
294
+ }
295
+ # Also restrict imports if possible, though exec doesn't directly prevent them easily.
296
+
297
+ try:
298
+ # Using exec within a function's local scope
299
+ exec(code, {"__builtins__": safe_builtins}, local_ns)
300
+ # Check if 'output' was assigned, otherwise return empty string or error
301
+ output_val = local_ns.get("output", None)
302
+ if output_val is None:
303
+ return "Result: Code executed, but no variable named 'output' was assigned."
304
+ return str(output_val)
305
+ # except TimeoutError:
306
+ # return "Error: Code execution timed out."
307
+ except Exception as e:
308
+ # Capture and return execution errors
309
+ error_details = traceback.format_exc()
310
+ print(f"Error during code execution: {e}\n{error_details}") # Log full traceback
311
+ return f"Error during code execution: {type(e).__name__}: {e}"
312
+
313
 
314
  @tool
315
  def least_athletes_olympics(year: int) -> str:
316
+ """
317
+ Finds the country (IOC code) that sent the fewest athletes to the specified Summer Olympics year.
318
+ Data is scraped from the English Wikipedia page for that year's Olympics.
319
+ Returns the IOC code as a string. If there's a tie, returns the first code alphabetically.
320
+ Returns an error message if data cannot be retrieved or parsed.
321
+ """
322
+ try:
323
+ if not isinstance(year, int):
324
+ return "Error: Year must be an integer."
325
+
326
+ url = f"https://en.wikipedia.org/wiki/{year}_Summer_Olympics"
327
+ response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=HEADERS)
328
+ response.raise_for_status()
329
+ soup = BeautifulSoup(response.text, "html.parser")
330
+
331
+ # Find the participating NOCs table - this selector might need adjustment over time
332
+ # Look for tables with captions containing 'Participating National Olympic Committees' or similar
333
+ tables = soup.find_all("table", class_="wikitable")
334
+ noc_table = None
335
+ for table in tables:
336
+ caption = table.find("caption")
337
+ # Check caption text or look for characteristic headers like 'NOC', 'Athletes'
338
+ if caption and "Participating National Olympic" in caption.get_text():
339
+ noc_table = table
340
+ break
341
+ # Fallback: check headers if no caption found or caption doesn't match
342
+ headers = [th.get_text(strip=True).lower() for th in table.find_all("th")]
343
+ if "noc" in headers and "athletes" in headers:
344
+ noc_table = table
345
+ break
346
+
347
+ if noc_table is None:
348
+ return f"Error: Could not find the expected NOC table on the Wikipedia page for {year} Summer Olympics."
349
+
350
+ rows = noc_table.find_all("tr")[1:] # Skip header row
351
+ data = []
352
+ for r in rows:
353
+ cols = r.find_all("td")
354
+ # Adapt column indices based on typical table structure (NOC code, Athletes count)
355
+ # This is fragile and depends on Wikipedia's table layout.
356
+ try:
357
+ # Attempt to find columns by text content or relative position
358
+ # Assuming NOC code is often linked, e.g., inside an <a> tag
359
+ noc_link = cols[0].find("a")
360
+ noc_code = noc_link.get_text(strip=True) if noc_link else cols[0].get_text(strip=True)
361
+ # Clean up potential bracketed numbers like (123) in NOC code cell
362
+ noc_code = re.sub(r'\s*\(\d+\)\s*$', '', noc_code).strip()
363
+
364
+ # Find athletes column - often the next column, check if it's numeric
365
+ athletes_text = cols[1].get_text(strip=True).replace(',', '') # Remove commas
366
+ athletes_count = int(athletes_text)
367
+
368
+ data.append((noc_code, athletes_count))
369
+ except (IndexError, ValueError, AttributeError):
370
+ # Skip rows that don't match the expected format
371
+ print(f"Skipping malformed row in table for {year}: {r.get_text(strip=True)}")
372
+ continue
373
+
374
+ if not data:
375
+ return f"Error: No valid NOC/athlete data parsed from the table for {year}."
376
+
377
+ min_athletes = min(count for _, count in data)
378
+ candidates = sorted([code for code, count in data if count == min_athletes])
379
+
380
+ if not candidates:
381
+ return f"Error: Could not determine country with fewest athletes for {year}."
382
+ return candidates[0]
383
+
384
+ except requests.exceptions.RequestException as e:
385
+ return f"Error fetching Olympics page for {year}: Network error - {e}"
386
+ except Exception as e:
387
+ return f"An unexpected error occurred processing Olympics data for {year}: {e}\n{traceback.format_exc()}"
388
+
389
 
390
  @tool
391
  def get_nasa_award_number(qid: str) -> str:
392
+ """
393
+ Retrieves the NASA award number (property P496) associated with a given Wikidata Item QID.
394
+ Input must be a valid Wikidata QID string (e.g., 'Q42').
395
+ Returns the award number as a string, or an error message.
396
+ """
397
+ if not isinstance(qid, str) or not re.match(r'^Q\d+$', qid):
398
+ return f"Error: Invalid Wikidata QID format provided: '{qid}'. Must be like 'Q42'."
399
+
400
  sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
401
+ sparql.setMethod('POST') # Recommended by Wikidata for robustness
402
+ sparql.agent = HEADERS['User-Agent'] # Set User-Agent for SPARQL queries
403
+
404
+ query = f"""
405
+ SELECT ?award WHERE {{
406
+ wd:{qid} wdt:P496 ?award .
407
+ }}
408
+ LIMIT 1
409
+ """
410
+ sparql.setQuery(query)
411
  sparql.setReturnFormat(JSON)
412
+
413
+ try:
414
+ results = sparql.query().convert()
415
+ bindings = results.get("results", {}).get("bindings", [])
416
+
417
+ if bindings:
418
+ award = bindings[0].get("award", {}).get("value")
419
+ if award:
420
+ return award
421
+ else:
422
+ return f"Error: Found property P496 for {qid}, but the award value is missing."
423
+ else:
424
+ return f"Error: No NASA award number (P496) found for Wikidata item {qid}."
425
+
426
+ except Exception as e:
427
+ # Catch SPARQL query errors, network issues, JSON parsing problems
428
+ return f"An error occurred querying Wikidata for {qid}: {e}"
429
 
430
  TOOLS = [
431
+ web_search,
432
+ read_file,
433
+ transcribe_audio,
434
+ analyze_sales_data, # Or a more general excel tool
435
+ find_chess_mate_move, # Needs image-to-FEN first!
436
  wiki_get_page,
437
  youtube_transcript,
438
  reverse_text,
 
445
  ]
446
 
447
  SYSTEM_MESSAGE = """You are a concise AI assistant with access to the following tools:
448
+ - web_search(query: string) -> string
449
  - wiki_get_page(title: string) → string
450
  - youtube_transcript(video_id: string) → string
451
  - reverse_text(text: string) → string
 
455
  - execute_code(code: string) → string
456
  - least_athletes_olympics(year: int) → string
457
  - get_nasa_award_number(qid: string) → string
458
+ - read_file(file_path: string) -> string
459
+ - transcribe_audio(file_path: string) -> string
460
+ - analyze_sales_data(file_path: string) -> string
461
+ - find_chess_mate_move(fen: string, engine_path: string = "/usr/bin/stockfish") -> string
462
  When you need to use a tool, respond exactly with:
463
  Action: <tool_name>(<arg_name>=<value>, ...)
464
  Then wait for the tool’s output before continuing.
465
+ If a tool requires a file path, assume the file is accessible in the current environment.
466
+ If a question involves an image or audio file, state that you need the content extracted first (e.g., text from audio, FEN from chess image) before you can proceed.
467
  Once you have all the information, provide your final answer in as few words as possible, with no extra commentary or prefixes.
468
  """
469
 
 
474
  # initialize HF inference pipeline once
475
  if HF_TOKEN is None:
476
  raise ValueError("HF_TOKEN not set in environment")
477
+
478
+ # --- Replace with your chosen LLM ---
479
+ model_id = "microsoft/Phi-3-mini-4k-instruct"
 
 
 
 
 
 
480
 
481
+ try:
482
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) # Some models need trust_remote_code
483
+ model = AutoModelForCausalLM.from_pretrained(
484
+ model_id,
485
+ torch_dtype=torch.float32, # Use float32 for CPU compatibility usually
486
+ device_map=None, # Explicitly set to None or 'cpu' for CPU
487
+ trust_remote_code=True
488
+ )
489
+ model.to('cpu') # Ensure model is on CPU
490
+
491
+ pipe = pipeline(
492
+ "text-generation",
493
+ model=model,
494
+ tokenizer=tokenizer,
495
+ max_new_tokens=512,
496
+ do_sample=False,
497
+ return_full_text=False,
498
+ # No temperature/top_k needed if do_sample=False
499
+ )
500
+ self.llm = HuggingFacePipeline(pipeline=pipe)
501
+
502
+ except ImportError as e:
503
+ raise ImportError(f"Required library not found: {e}. Make sure 'transformers', 'torch', 'accelerate' are installed.")
504
+ except Exception as e:
505
+ # Catch potential issues like model download failure, OOM errors
506
+ raise RuntimeError(f"Failed to initialize HuggingFacePipeline for {model_id}: {e}")
507
+
508
+ # --- Agent Initialization (remains the same) ---
509
  self.agent = initialize_agent(
510
  tools=TOOLS,
511
  llm=self.llm,
512
  agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
513
+ agent_kwargs={'prefix': SYSTEM_MESSAGE},
514
  verbose=True,
515
+ handle_parsing_errors="Check your output and make sure it conforms!",
516
+ max_iterations=10
517
  )
 
 
 
518
  print("BasicAgent initialized with LLM.")
519
 
520
  # --- Core dispatcher/fallback ---
521
  def __call__(self, question: str) -> str:
522
+ try:
523
+ response = self.agent.invoke({"input": question})
524
+ answer = response.get('output', "Agent did not produce an output.")
525
+ return str(answer).strip()
526
+ except Exception as e:
527
+ print(f"Error during agent execution: {e}")
528
+ return f"Agent Error: {e}"
 
 
 
 
 
 
 
529
 
530
  def run_and_submit_all( profile: gr.OAuthProfile | None):
531
  """