RalphThings commited on
Commit
28d2f1c
·
verified ·
1 Parent(s): fc01ba5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -541
app.py CHANGED
@@ -1,473 +1,99 @@
1
- import os, re, requests, pandas as pd, gradio as gr
2
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
- from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
4
- from langchain_community.tools import DuckDuckGoSearchRun
5
- from langchain.tools import tool
6
- from langchain_core.output_parsers import JsonOutputParser
7
- from langchain.agents import AgentExecutor, create_react_agent, initialize_agent, AgentType
8
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
9
- import torch
10
- import traceback # For detailed error logging
11
- #import timeout_decorator # Optional: for execute_code timeout (pip install timeout-decorator)
12
- import whisper
13
- import chess, chess.engine
14
- from bs4 import BeautifulSoup
15
- from SPARQLWrapper import SPARQLWrapper, JSON
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # (Keep Constants as is)
18
  # --- Constants ---
19
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
20
  HF_TOKEN = os.getenv("HF_TOKEN", None)
21
- REQUESTS_TIMEOUT = 15 # Define a standard timeout for requests
22
- HEADERS = {'User-Agent': 'GAIAgent/1.0 (Langchain Agent; +http://example.com/info)'} # Be a good citizen
23
-
24
- @tool
25
- def web_search(query: str) -> str:
26
- """Runs a web search and returns the results."""
27
- search = DuckDuckGoSearchRun()
28
- return search.run(query)
29
-
30
- @tool
31
- def read_file(file_path: str) -> str:
32
- """Reads the content of a text file."""
33
- try:
34
- with open(file_path, 'r', encoding='utf-8') as f:
35
- return f.read()
36
- except Exception as e:
37
- return f"Error reading file {file_path}: {e}"
38
-
39
- @tool
40
- def transcribe_audio(file_path: str) -> str:
41
- """Transcribes audio from a file path."""
42
- try:
43
- # Load model here or use pre-loaded one
44
- model = whisper.load_model("base") # Or tiny, small, medium, large
45
- result = model.transcribe(file_path)
46
- return result["text"]
47
- except Exception as e:
48
- return f"Error transcribing audio file {file_path}: {e}"
49
-
50
- @tool
51
- def analyze_sales_data(file_path: str) -> str:
52
- """Reads the specific sales data Excel file, calculates total food sales."""
53
- try:
54
- df = pd.read_excel(file_path)
55
- # Assuming columns 'Category' and 'Total Sales' exist
56
- food_sales = df[df['Category'] != 'Drink']['Total Sales'].sum()
57
- return f"${food_sales:.2f}" # Format as USD
58
- except Exception as e:
59
- return f"Error processing sales data from {file_path}: {e}"
60
-
61
- @tool
62
- def find_chess_mate_move(fen: str) -> str:
63
- """
64
- Given a FEN string representing a chess position (Black to move),
65
- finds the best move that guarantees a win using Stockfish engine.
66
- Requires Stockfish engine installed at engine_path.
67
- Returns the move in algebraic notation (e.g., 'Qh4').
68
- """
69
- try:
70
- engine = chess.engine.SimpleEngine.popen_uci("/usr/bin/stockfish")
71
- board = chess.Board(fen)
72
- if board.turn != chess.BLACK:
73
- return "Error: It's not Black's turn in the provided FEN."
74
- info = engine.analyse(board, chess.engine.Limit(time=2.0))
75
-
76
- score = info.get("score")
77
- if score is not None and score.is_mate():
78
- mate_score = score.white().mate()
79
- if mate_score < 0:
80
- best_move = info["pv"][0]
81
- engine.quit()
82
- return best_move.uci()
83
- elif score is not None and score.relative.score(mate_score=10000) < -500: # Significant advantage for Black (-5 pawns)
84
- best_move = info["pv"][0]
85
- engine.quit()
86
- return best_move.uci()
87
-
88
- result = engine.play(board, chess.engine.Limit(time=1.0)) # Get a move anyway
89
- engine.quit()
90
- #return f"No guaranteed mate found quickly. Best move found: {result.move.uci()}"
91
- return result.move.uci() # Return best move found even if not provably mate
92
 
93
- except Exception as e:
94
- return f"Chess engine error: {e}. Is Stockfish installed at {engine_path} and is the FEN valid?"
95
-
96
- @tool
97
- def wiki_get_page(title: str) -> str:
98
- """
99
- Fetch raw wikitext content for a given English Wikipedia page title.
100
- Returns the page content as a string or an error message.
101
- Note: Raw wikitext can be complex to parse.
102
- """
103
- API = "https://en.wikipedia.org/w/api.php"
104
- params = {
105
- "action": "query",
106
- "format": "json",
107
- "prop": "revisions",
108
- "rvprop": "content",
109
- "rvslots": "*",
110
- "titles": title,
111
- "redirects": 1 # Automatically follow redirects
 
112
  }
113
- try:
114
- response = requests.get(API, params=params, timeout=REQUESTS_TIMEOUT, headers=HEADERS)
115
- response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
116
- data = response.json()
117
- page = next(iter(data["query"]["pages"].values()))
118
-
119
- if "missing" in page:
120
- return f"Error: Wikipedia page '{title}' not found."
121
- if "invalid" in page:
122
- return f"Error: Invalid page title '{title}' requested."
123
- if "revisions" not in page or not page["revisions"]:
124
- return f"Error: No revisions found for page '{title}' (page might be empty or protected)."
125
-
126
- # Access content safely
127
- content = page["revisions"][0].get("slots", {}).get("main", {}).get("*")
128
- if content is None:
129
- return f"Error: Could not extract main content slot for page '{title}'."
130
- return content
131
-
132
- except requests.exceptions.RequestException as e:
133
- return f"Error fetching Wikipedia page '{title}': Network error - {e}"
134
- except KeyError as e:
135
- return f"Error parsing Wikipedia response for '{title}': Unexpected structure - missing key {e}"
136
- except Exception as e:
137
- return f"An unexpected error occurred fetching Wikipedia page '{title}': {e}"
138
-
139
- @tool
140
- def youtube_transcript(video_id: str) -> str:
141
- """
142
- Retrieve the English transcript for a given YouTube video ID.
143
- Returns the transcript as a single string or an error message.
144
- """
145
- try:
146
- # Fetch available transcripts and prioritize English
147
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
148
- transcript = transcript_list.find_generated_transcript(['en']) # Prefer generated English
149
- # You could add fallbacks here for manual 'en' or other languages if needed
150
- # transcript = transcript_list.find_manually_created_transcript(['en'])
151
- # transcript = transcript_list.find_transcript(['en', 'en-US', ...])
152
-
153
- full_transcript = transcript.fetch()
154
- return " ".join(t["text"] for t in full_transcript)
155
- except (TranscriptsDisabled, NoTranscriptFound):
156
- return f"Error: Transcripts are disabled or no English transcript found for YouTube video ID '{video_id}'."
157
- except Exception as e:
158
- # Catch other potential errors from the API or network issues
159
- return f"An unexpected error occurred fetching transcript for YouTube video ID '{video_id}': {e}"
160
-
161
- @tool
162
- def reverse_text(text: str) -> str:
163
- """Reverses the input string character by character."""
164
- if not isinstance(text, str):
165
- return "Error: Input must be a string."
166
- return text[::-1]
167
-
168
- @tool
169
- def find_non_commutative(table: dict) -> str:
170
- """
171
- Given a dictionary representing a multiplication table (keys are tuples (row_elem, col_elem)),
172
- finds all elements involved in non-commutative pairs (where table[(x,y)] != table[(y,x)]).
173
- Returns a comma-separated list of these elements in alphabetical order, or an error message.
174
- Example input: {('a','a'):'a', ('a','b'):'c', ('b','a'):'b', ...}
175
- """
176
- try:
177
- if not isinstance(table, dict):
178
- return "Error: Input must be a dictionary."
179
- if not all(isinstance(k, tuple) and len(k) == 2 for k in table.keys()):
180
- return "Error: Dictionary keys must be tuples of length 2, e.g., ('a', 'b')."
181
-
182
- elems = sorted(list(set(x for k in table.keys() for x in k))) # Get all unique elements alphabetically
183
- bad_elements = set()
184
-
185
- for x in elems:
186
- for y in elems:
187
- # Check if both pairs exist in the table before comparing
188
- pair_xy = (x, y)
189
- pair_yx = (y, x)
190
- if pair_xy in table and pair_yx in table:
191
- if table[pair_xy] != table[pair_yx]:
192
- bad_elements.add(x)
193
- bad_elements.add(y)
194
- # Optional: Handle cases where one pair exists but the other doesn't,
195
- # depending on how strictly commutativity should be defined for partial tables.
196
- # else:
197
- # # If one exists and the other doesn't, it could be considered non-commutative
198
- # # or simply an incomplete table. Current logic ignores this.
199
- # pass
200
-
201
-
202
- if not bad_elements:
203
- return "Result: The operation defined by the table is commutative for all checked pairs."
204
- return ",".join(sorted(list(bad_elements)))
205
-
206
- except Exception as e:
207
- return f"An unexpected error occurred processing the table: {e}"
208
-
209
-
210
- @tool
211
- def libretext_extract(query: str) -> str:
212
- """
213
- Extracts text content from a web page using a URL and a CSS selector.
214
- Input must be a string formatted as 'url||css_selector'.
215
- Returns the text of the first matching element or an error message.
216
- """
217
- try:
218
- if "||" not in query:
219
- return "Error: Input format must be 'url||css_selector'."
220
- url, selector = query.split("||", 1)
221
-
222
- response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=HEADERS)
223
- response.raise_for_status()
224
- soup = BeautifulSoup(response.text, "html.parser")
225
- element = soup.select_one(selector)
226
-
227
- if element:
228
- return element.get_text(strip=True)
229
- else:
230
- return f"Error: CSS selector '{selector}' did not find any elements on page {url}."
231
-
232
- except requests.exceptions.RequestException as e:
233
- return f"Error fetching URL '{url}': Network error - {e}"
234
- except Exception as e:
235
- # Catch potential errors from BeautifulSoup or invalid selectors
236
- return f"An unexpected error occurred during extraction from {url}: {e}"
237
-
238
- @tool
239
- def classify_vegetables(items: list) -> str:
240
- """
241
- Filters a list of items, keeping only those considered common culinary vegetables.
242
- Returns a comma-separated, alphabetized list of the identified vegetables.
243
- Note: This uses a predefined list and may not align perfectly with botanical definitions
244
- (e.g., tomatoes, bell peppers are botanically fruits but often treated as vegetables).
245
- Input items should be strings.
246
- """
247
- # Using a case-insensitive comparison by converting known veggies to lowercase
248
- # Added more items, still imperfect and culturally dependent.
249
- VEGETABLE_SET = {
250
- "broccoli", "celery", "green beans", "lettuce", "zucchini", "sweet potato", # original + fixed space
251
- "carrot", "spinach", "kale", "onion", "garlic", "potato", "cabbage", "asparagus",
252
- "cucumber", # Botanically fruit, culinary vegetable
253
- "bell pepper", # Botanically fruit, culinary vegetable
254
- "corn", # Botanically fruit/grain, culinary vegetable
255
- # Avoid controversial ones like tomato unless explicitly needed
256
- }
257
- try:
258
- if not isinstance(items, list):
259
- return "Error: Input must be a list of strings."
260
- # Filter using lowercase comparison
261
- vegetables = sorted([item for item in items if isinstance(item, str) and item.lower() in VEGETABLE_SET])
262
- if not vegetables:
263
- return "Result: No items from the list were classified as vegetables based on the predefined set."
264
- return ",".join(vegetables)
265
- except Exception as e:
266
- return f"An unexpected error occurred classifying vegetables: {e}"
267
-
268
- @tool
269
- # Optional: Add timeout to prevent runaway code execution
270
- #@timeout_decorator.timeout(10, timeout_exception=TimeoutError) # Limit execution to 10 seconds
271
- def execute_code(code: str) -> str:
272
- """
273
- Executes a given Python code snippet and returns the value of the 'output' variable.
274
- WARNING: Executes arbitrary code. Use with extreme caution in trusted environments only.
275
- The code runs in a restricted environment, but vulnerabilities might exist.
276
- The code should assign its result to a variable named 'output'.
277
- Example: "output = sum([1, 2, 3])"
278
- """
279
- print(f"[!!!] Executing potentially unsafe code:\n---\n{code}\n---") # Log execution
280
- local_ns = {}
281
- # Restrict builtins more severely for safety. Allow only necessary ones.
282
- # This is still not perfectly safe. Sandboxing is complex.
283
- safe_builtins = {
284
- 'print': print, # Allow print for debugging within the code
285
- 'range': range, 'len': len, 'list': list, 'dict': dict, 'set': set,
286
- 'str': str, 'int': int, 'float': float, 'bool': bool, 'sum': sum,
287
- 'min': min, 'max': max, 'abs': abs, 'pow': pow, 'round': round,
288
- 'True': True, 'False': False, 'None': None,
289
- # Add other safe builtins carefully if absolutely required by expected code snippets
290
- }
291
- # Also restrict imports if possible, though exec doesn't directly prevent them easily.
292
-
293
- try:
294
- # Using exec within a function's local scope
295
- exec(code, {"__builtins__": safe_builtins}, local_ns)
296
- # Check if 'output' was assigned, otherwise return empty string or error
297
- output_val = local_ns.get("output", None)
298
- if output_val is None:
299
- return "Result: Code executed, but no variable named 'output' was assigned."
300
- return str(output_val)
301
- except TimeoutError:
302
- return "Error: Code execution timed out."
303
- except Exception as e:
304
- # Capture and return execution errors
305
- error_details = traceback.format_exc()
306
- print(f"Error during code execution: {e}\n{error_details}") # Log full traceback
307
- return f"Error during code execution: {type(e).__name__}: {e}"
308
-
309
-
310
- @tool
311
- def least_athletes_olympics(year: int) -> str:
312
- """
313
- Finds the country (IOC code) that sent the fewest athletes to the specified Summer Olympics year.
314
- Data is scraped from the English Wikipedia page for that year's Olympics.
315
- Returns the IOC code as a string. If there's a tie, returns the first code alphabetically.
316
- Returns an error message if data cannot be retrieved or parsed.
317
- """
318
- try:
319
- if not isinstance(year, int):
320
- return "Error: Year must be an integer."
321
-
322
- url = f"https://en.wikipedia.org/wiki/{year}_Summer_Olympics"
323
- response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=HEADERS)
324
- response.raise_for_status()
325
- soup = BeautifulSoup(response.text, "html.parser")
326
-
327
- # Find the participating NOCs table - this selector might need adjustment over time
328
- # Look for tables with captions containing 'Participating National Olympic Committees' or similar
329
- tables = soup.find_all("table", class_="wikitable")
330
- noc_table = None
331
- for table in tables:
332
- caption = table.find("caption")
333
- # Check caption text or look for characteristic headers like 'NOC', 'Athletes'
334
- if caption and "Participating National Olympic" in caption.get_text():
335
- noc_table = table
336
- break
337
- # Fallback: check headers if no caption found or caption doesn't match
338
- headers = [th.get_text(strip=True).lower() for th in table.find_all("th")]
339
- if "noc" in headers and "athletes" in headers:
340
- noc_table = table
341
- break
342
-
343
- if noc_table is None:
344
- return f"Error: Could not find the expected NOC table on the Wikipedia page for {year} Summer Olympics."
345
-
346
- rows = noc_table.find_all("tr")[1:] # Skip header row
347
- data = []
348
- for r in rows:
349
- cols = r.find_all("td")
350
- # Adapt column indices based on typical table structure (NOC code, Athletes count)
351
- # This is fragile and depends on Wikipedia's table layout.
352
- try:
353
- # Attempt to find columns by text content or relative position
354
- # Assuming NOC code is often linked, e.g., inside an <a> tag
355
- noc_link = cols[0].find("a")
356
- noc_code = noc_link.get_text(strip=True) if noc_link else cols[0].get_text(strip=True)
357
- # Clean up potential bracketed numbers like (123) in NOC code cell
358
- noc_code = re.sub(r'\s*\(\d+\)\s*$', '', noc_code).strip()
359
-
360
- # Find athletes column - often the next column, check if it's numeric
361
- athletes_text = cols[1].get_text(strip=True).replace(',', '') # Remove commas
362
- athletes_count = int(athletes_text)
363
-
364
- data.append((noc_code, athletes_count))
365
- except (IndexError, ValueError, AttributeError):
366
- # Skip rows that don't match the expected format
367
- print(f"Skipping malformed row in table for {year}: {r.get_text(strip=True)}")
368
- continue
369
-
370
- if not data:
371
- return f"Error: No valid NOC/athlete data parsed from the table for {year}."
372
-
373
- min_athletes = min(count for _, count in data)
374
- candidates = sorted([code for code, count in data if count == min_athletes])
375
-
376
- if not candidates:
377
- return f"Error: Could not determine country with fewest athletes for {year}."
378
- return candidates[0]
379
-
380
- except requests.exceptions.RequestException as e:
381
- return f"Error fetching Olympics page for {year}: Network error - {e}"
382
- except Exception as e:
383
- return f"An unexpected error occurred processing Olympics data for {year}: {e}\n{traceback.format_exc()}"
384
-
385
-
386
- @tool
387
- def get_nasa_award_number(qid: str) -> str:
388
- """
389
- Retrieves the NASA award number (property P496) associated with a given Wikidata Item QID.
390
- Input must be a valid Wikidata QID string (e.g., 'Q42').
391
- Returns the award number as a string, or an error message.
392
- """
393
- if not isinstance(qid, str) or not re.match(r'^Q\d+$', qid):
394
- return f"Error: Invalid Wikidata QID format provided: '{qid}'. Must be like 'Q42'."
395
-
396
- sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
397
- sparql.setMethod('POST') # Recommended by Wikidata for robustness
398
- sparql.agent = HEADERS['User-Agent'] # Set User-Agent for SPARQL queries
399
-
400
- query = f"""
401
- SELECT ?award WHERE {{
402
- wd:{qid} wdt:P496 ?award .
403
- }}
404
- LIMIT 1
405
- """
406
- sparql.setQuery(query)
407
- sparql.setReturnFormat(JSON)
408
 
409
- try:
410
- results = sparql.query().convert()
411
- bindings = results.get("results", {}).get("bindings", [])
412
-
413
- if bindings:
414
- award = bindings[0].get("award", {}).get("value")
415
- if award:
416
- return award
417
- else:
418
- return f"Error: Found property P496 for {qid}, but the award value is missing."
419
- else:
420
- return f"Error: No NASA award number (P496) found for Wikidata item {qid}."
421
 
422
- except Exception as e:
423
- # Catch SPARQL query errors, network issues, JSON parsing problems
424
- return f"An error occurred querying Wikidata for {qid}: {e}"
425
-
426
- TOOLS = [
427
- web_search,
428
- read_file,
429
- transcribe_audio,
430
- analyze_sales_data, # Or a more general excel tool
431
- find_chess_mate_move, # Needs image-to-FEN first!
432
- wiki_get_page,
433
- youtube_transcript,
434
- reverse_text,
435
- find_non_commutative,
436
- libretext_extract,
437
- classify_vegetables,
438
- execute_code,
439
- least_athletes_olympics,
440
- get_nasa_award_number
441
- ]
442
-
443
- SYSTEM_MESSAGE = """You are a concise AI assistant with access to the following tools:
444
- - web_search(query: string) -> string
445
- - wiki_get_page(title: string) → string
446
- - youtube_transcript(video_id: string) → string
447
- - reverse_text(text: string) → string
448
- - find_non_commutative(table: dict[tuple[string, string]: string]) -> string
449
- - libretext_extract(url: string, selector: string) → string
450
- - classify_vegetables(items: list[string]) → list[string]
451
- - execute_code(code: string) → string
452
- - least_athletes_olympics(year: int) → string
453
- - get_nasa_award_number(qid: string) → string
454
- - read_file(file_path: string) -> string
455
- - transcribe_audio(file_path: string) -> string
456
- - analyze_sales_data(file_path: string) -> string
457
- - find_chess_mate_move(fen: string) -> string
458
- When you need to use a tool, respond exactly with:
459
- Action: <tool_name>(<arg_name>=<value>, ...)
460
-
461
- IMPORTANT FORMATTING:
462
- - For the find_non_commutative tool, the 'table' argument MUST be a valid Python dictionary with tuple keys, like this:
463
- Action: find_non_commutative(table={('a','a'):'a', ('a','b'):'c', ('b','a'):'d', ('b','b'):'e'})
464
-
465
- Then wait for the tool’s output before continuing.
466
- If a tool returns an error message starting with 'Error:', treat that as the observation.
467
- If a tool requires a file path, assume the file is accessible in the current environment.
468
- If a question involves an image or audio file, state that you need the content extracted first (e.g., text from audio, FEN from chess image) before you can proceed.
469
- Once you have all the information, provide your final answer in as few words as possible, with no extra commentary or prefixes.
470
- """
471
 
472
  # --- Basic Agent Definition ---
473
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
@@ -478,91 +104,13 @@ class BasicAgent:
478
  raise ValueError("HF_TOKEN not set in environment")
479
 
480
  # --- Replace with your chosen LLM ---
481
- model_id = "microsoft/Phi-3-mini-4k-instruct"
482
-
483
- try:
484
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) # Some models need trust_remote_code
485
- model = AutoModelForCausalLM.from_pretrained(
486
- model_id,
487
- torch_dtype=torch.float32, # Use float32 for CPU compatibility usually
488
- device_map=None, # Explicitly set to None or 'cpu' for CPU
489
- trust_remote_code=True
490
- )
491
- model.to('cpu') # Ensure model is on CPU
492
-
493
- pipe = pipeline(
494
- "text-generation",
495
- model=model,
496
- tokenizer=tokenizer,
497
- max_new_tokens=512,
498
- do_sample=False,
499
- return_full_text=False,
500
- # No temperature/top_k needed if do_sample=False
501
- )
502
- self.llm = HuggingFacePipeline(pipeline=pipe)
503
-
504
- except ImportError as e:
505
- raise ImportError(f"Required library not found: {e}. Make sure 'transformers', 'torch', 'accelerate' are installed.")
506
- except Exception as e:
507
- # Catch potential issues like model download failure, OOM errors
508
- raise RuntimeError(f"Failed to initialize HuggingFacePipeline for {model_id}: {e}")
509
-
510
- try:
511
- # Construct a simplified test prompt (mimicking agent input)
512
- test_prompt = SYSTEM_MESSAGE + "\nHuman: What is the capital of France?\nAssistant:" # A simple question
513
- # Or use a prompt closer to the problematic one if you know which one it is
514
- # test_prompt = SYSTEM_MESSAGE + "\nHuman: [Insert the non-commutative table question here]\nAssistant:"
515
-
516
- # Use invoke which is standard now
517
- test_response = self.llm.invoke(test_prompt)
518
- print(f"--- Direct LLM Test Response ---:\n{test_response}\n-----------------------------")
519
- if not test_response or len(test_response.strip()) == 0:
520
- print("!!! Direct LLM Test returned empty or whitespace result.")
521
-
522
- except Exception as test_e:
523
- print(f"!!! Direct LLM Test FAILED: {test_e}")
524
- # print traceback for more details
525
- import traceback
526
- traceback.print_exc()
527
  print("BasicAgent initialized with LLM.")
528
 
529
  def __call__(self, question: str) -> str:
530
  # Comment out agent call temporarily if testing in __init__
531
  # Or add the direct test here before calling the agent
532
- print(f"\n>> Processing Question (Agent): {question}")
533
- try:
534
- response = self.agent.invoke({"input": question})
535
- answer = response.get('output', "Agent did not produce an output.")
536
- print(f"<< Agent Answer: {answer}")
537
- return str(answer).strip()
538
- except Exception as e:
539
- print(f"Error during agent execution: {e}")
540
- # Also print traceback here to see where the error originates
541
- import traceback
542
- traceback.print_exc()
543
- return f"Agent Error: {e}"
544
-
545
- # --- Agent Initialization (remains the same) ---
546
- self.agent = initialize_agent(
547
- tools=TOOLS,
548
- llm=self.llm,
549
- agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
550
- agent_kwargs={'prefix': SYSTEM_MESSAGE},
551
- verbose=True,
552
- handle_parsing_errors="Check your output and make sure it conforms!",
553
- max_iterations=10
554
- )
555
- print("BasicAgent initialized with LLM.")
556
-
557
- # --- Core dispatcher/fallback ---
558
- def __call__(self, question: str) -> str:
559
- try:
560
- response = self.agent.invoke({"input": question})
561
- answer = response.get('output', "Agent did not produce an output.")
562
- return str(answer).strip()
563
- except Exception as e:
564
- print(f"Error during agent execution: {e}")
565
- return f"Agent Error: {e}"
566
 
567
  def run_and_submit_all( profile: gr.OAuthProfile | None):
568
  """
 
1
+ import os
2
+ import threading
3
+ import requests
4
+ import pandas as pd
5
+ import gradio as gr
6
+
7
+ from dotenv import load_dotenv
8
+
9
+ from text_inspector_tool import TextInspectorTool
10
+ from text_web_browser import (
11
+ ArchiveSearchTool,
12
+ FinderTool,
13
+ FindNextTool,
14
+ PageDownTool,
15
+ PageUpTool,
16
+ SimpleTextBrowser,
17
+ VisitTool,
18
+ )
19
+ from visual_qa import visualizer
20
+
21
+ from smolagents import (
22
+ CodeAgent,
23
+ GoogleSearchTool,
24
+ LiteLLMModel,
25
+ ToolCallingAgent,
26
+ )
27
+
28
+ load_dotenv(override=True)
29
 
30
  # (Keep Constants as is)
31
  # --- Constants ---
32
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
33
  HF_TOKEN = os.getenv("HF_TOKEN", None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Browser config copied verbatim
36
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)…"
37
+ BROWSER_CONFIG = {
38
+ "viewport_size": 5120,
39
+ "downloads_folder": "downloads_folder",
40
+ "request_kwargs": {
41
+ "headers": {"User-Agent": user_agent},
42
+ "timeout": 300,
43
+ },
44
+ "serpapi_key": os.getenv("SERPAPI_API_KEY"),
45
+ }
46
+ os.makedirs(BROWSER_CONFIG["downloads_folder"], exist_ok=True)
47
+
48
+
49
+ def create_agent(model_id="o1"):
50
+ custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
51
+ model_params = {
52
+ "model_id": model_id,
53
+ "custom_role_conversions": custom_role_conversions,
54
+ "max_completion_tokens": 8192,
55
  }
56
+ if model_id == "o1":
57
+ model_params["reasoning_effort"] = "high"
58
+ model = LiteLLMModel(**model_params)
59
+
60
+ browser = SimpleTextBrowser(**BROWSER_CONFIG)
61
+ WEB_TOOLS = [
62
+ GoogleSearchTool(provider="serper"),
63
+ VisitTool(browser),
64
+ PageUpTool(browser),
65
+ PageDownTool(browser),
66
+ FinderTool(browser),
67
+ FindNextTool(browser),
68
+ ArchiveSearchTool(browser),
69
+ TextInspectorTool(model, text_limit=100000),
70
+ ]
71
+ text_webbrowser_agent = ToolCallingAgent(
72
+ model=model,
73
+ tools=WEB_TOOLS,
74
+ max_steps=20,
75
+ verbosity_level=2,
76
+ planning_interval=4,
77
+ name="search_agent",
78
+ description="""
79
+ A team member that will search the internet to answer your question.
80
+ Ask him for all your questions that require browsing the web.
81
+ Provide him as much context as possible…
82
+ """,
83
+ provide_run_summary=True,
84
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ manager_agent = CodeAgent(
87
+ model=model,
88
+ tools=[visualizer, TextInspectorTool(model, 100000)],
89
+ max_steps=12,
90
+ verbosity_level=2,
91
+ additional_authorized_imports=["*"],
92
+ planning_interval=4,
93
+ managed_agents=[text_webbrowser_agent],
94
+ )
 
 
 
95
 
96
+ return manager_agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  # --- Basic Agent Definition ---
99
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
104
  raise ValueError("HF_TOKEN not set in environment")
105
 
106
  # --- Replace with your chosen LLM ---
107
+ self.agent = create_agent(model_id="o1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  print("BasicAgent initialized with LLM.")
109
 
110
  def __call__(self, question: str) -> str:
111
  # Comment out agent call temporarily if testing in __init__
112
  # Or add the direct test here before calling the agent
113
+ return self.agent.run(question).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  def run_and_submit_all( profile: gr.OAuthProfile | None):
116
  """