ekabaruh commited on
Commit
821fd3d
·
verified ·
1 Parent(s): 3539fd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -626
app.py CHANGED
@@ -3,640 +3,86 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- import time
7
- import json
8
- import re
9
- import wikipedia
10
- from bs4 import BeautifulSoup
11
- from datetime import datetime
12
- from typing import Dict, List, Any, Tuple, TypedDict, Literal, Optional
13
- from dotenv import load_dotenv
14
-
15
- # Load environment variables
16
- load_dotenv()
17
-
18
- # Try to import Tavily
19
- try:
20
- from tavily import TavilyClient
21
- TAVILY_AVAILABLE = True
22
- except ImportError:
23
- TAVILY_AVAILABLE = False
24
- print("Tavily not available. Falling back to other search methods.")
25
-
26
- # LangGraph and LangChain imports
27
- from langgraph.graph import END, StateGraph, MessagesState
28
- from langgraph.prebuilt import ToolNode
29
- from langchain_core.messages import HumanMessage, AIMessage
30
- from langchain_openai import ChatOpenAI
31
- # Use Wikipedia tools
32
- from langchain_community.tools import WikipediaQueryRun
33
- from langchain_community.utilities import WikipediaAPIWrapper
34
- try:
35
- # Try to import ArxivAPIWrapper
36
- from langchain_community.utilities import ArxivAPIWrapper
37
- ARXIV_AVAILABLE = True
38
- except ImportError:
39
- ARXIV_AVAILABLE = False
40
- from langchain_core.tools import tool, BaseTool
41
- from langchain_core.pydantic_v1 import BaseModel, Field
42
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
43
 
44
  # (Keep Constants as is)
45
  # --- Constants ---
46
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
47
 
48
- # Define state for the agent
49
- class AgentState(MessagesState):
50
- """State for the agent"""
51
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- # Function to perform a web search using Tavily (free tier)
54
- def tavily_search(query: str, max_results: int = 3) -> str:
55
- """Perform a web search using Tavily's API (free tier).
56
- This provides limited free searches without an API key.
 
 
 
 
 
 
 
 
 
 
 
57
  """
58
- if not TAVILY_AVAILABLE:
59
- return ""
60
-
61
- try:
62
- # Create a Tavily client (uses TAVILY_API_KEY env var if set)
63
- tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
64
-
65
- # Perform the search
66
- search_result = tavily_client.search(
67
- query=query,
68
- search_depth="basic", # Use the free tier
69
- max_results=max_results
70
- )
71
-
72
- if search_result and "results" in search_result:
73
- results = search_result["results"]
74
- formatted_results = []
75
-
76
- for result in results:
77
- title = result.get("title", "No title")
78
- content = result.get("content", "No content")
79
- url = result.get("url", "No URL")
80
- formatted_results.append(f"Title: {title}\nContent: {content}\nURL: {url}\n")
81
-
82
- return "\n".join(formatted_results)
83
-
84
- except Exception as e:
85
- print(f"Tavily search error: {str(e)}")
86
-
87
- return ""
88
-
89
- # Function to perform a basic web search using requests and BeautifulSoup
90
- def perform_web_search(query: str, max_results: int = 3) -> str:
91
- """Perform a simple web search by scraping search results.
92
- This doesn't require an API key but is less reliable than paid APIs.
93
  """
94
- # Clean up and encode the query
95
- clean_query = query.replace(" ", "+")
96
-
97
- try:
98
- # Try to get search results from lite search engine
99
- headers = {
100
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
101
- }
102
-
103
- # First try DuckDuckGo HTML
104
- try:
105
- response = requests.get(
106
- f"https://html.duckduckgo.com/html/?q={clean_query}",
107
- headers=headers,
108
- timeout=5
109
- )
110
-
111
- if response.status_code == 200:
112
- # Use BeautifulSoup for more reliable parsing
113
- soup = BeautifulSoup(response.text, 'html.parser')
114
- results = []
115
-
116
- # Extract results from DuckDuckGo HTML
117
- result_elements = soup.select('.result__body')
118
- for element in result_elements[:max_results]:
119
- title_elem = element.select_one('.result__a')
120
- title = title_elem.get_text() if title_elem else "No title"
121
-
122
- snippet_elem = element.select_one('.result__snippet')
123
- snippet = snippet_elem.get_text() if snippet_elem else "No snippet"
124
-
125
- results.append(f"Title: {title}\nSnippet: {snippet}\n")
126
-
127
- if results:
128
- return "\n".join(results)
129
- except Exception as ddg_err:
130
- print(f"DuckDuckGo search error: {str(ddg_err)}")
131
-
132
- # Try Qwant as fallback
133
- try:
134
- response = requests.get(
135
- f"https://lite.qwant.com/?q={clean_query}&t=web",
136
- headers=headers,
137
- timeout=5
138
- )
139
-
140
- if response.status_code == 200:
141
- soup = BeautifulSoup(response.text, 'html.parser')
142
- results = []
143
-
144
- # Extract results from Qwant
145
- article_elements = soup.select('article')
146
- for article in article_elements[:max_results]:
147
- title_elem = article.select_one('h2')
148
- title = title_elem.get_text().strip() if title_elem else "No title"
149
-
150
- desc_elem = article.select_one('.desc')
151
- description = desc_elem.get_text().strip() if desc_elem else "No description"
152
-
153
- results.append(f"Title: {title}\nSnippet: {description}\n")
154
-
155
- if results:
156
- return "\n".join(results)
157
- except Exception as qwant_err:
158
- print(f"Qwant search error: {str(qwant_err)}")
159
-
160
- except Exception as e:
161
- print(f"Basic search error: {str(e)}")
162
-
163
- # If the above fails, return empty string
164
- return ""
165
-
166
- # --- LangGraph Agent Definition ---
167
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
168
- class LangGraphAgent:
169
  def __init__(self):
170
- print("LangGraphAgent initialized.")
171
- # Initialize the OpenAI model with API key from environment
172
- self.openai_api_key = os.getenv("OPENAI_API_KEY")
173
- if not self.openai_api_key:
174
- raise ValueError("OPENAI_API_KEY not found in environment variables")
175
-
176
- # Set up web search tool
177
- self.tools = self._setup_tools()
178
-
179
- # Build the agent graph
180
- self.agent = self._build_agent_graph()
181
-
182
- def _setup_tools(self) -> List[BaseTool]:
183
- """Set up the tools for the agent."""
184
- # Initialize Wikipedia API
185
- wikipedia_api = WikipediaAPIWrapper(top_k_results=3)
186
- wikipedia_tool = WikipediaQueryRun(api_wrapper=wikipedia_api)
187
-
188
- # Initialize ArXiv if available
189
- if ARXIV_AVAILABLE:
190
- arxiv_api = ArxivAPIWrapper(top_k_results=3)
191
-
192
- # Define search tool with improved error handling and retry logic
193
- @tool
194
- def search(query: str) -> str:
195
- """Search the web for information about a specific topic or question.
196
- Always use this tool for questions requiring factual information, current events, or specific details.
197
- """
198
- max_retries = 2
199
- retry_count = 0
200
- search_results = ""
201
-
202
- # Clean up the query to make it more searchable
203
- # Remove URL parameters and make it more general
204
- if "youtube.com" in query or "youtu.be" in query:
205
- # Handle YouTube video queries specially
206
- # Extract video ID if possible
207
- video_id_match = re.search(r'(?:v=|youtu\.be\/)([\w-]+)', query)
208
- video_id = video_id_match.group(1) if video_id_match else ""
209
- if video_id:
210
- clean_query = f"YouTube video {video_id} information"
211
- else:
212
- clean_query = query
213
- else:
214
- clean_query = query
215
-
216
- # Special case for chess position or image description questions
217
- if "image" in query.lower() or "chess position" in query.lower() or "picture" in query.lower():
218
- return "This query requires analyzing an image, which is not available. Please provide a text-based answer based on general knowledge about the topic."
219
-
220
- while retry_count < max_retries:
221
- # Try multiple search approaches in sequence
222
-
223
- # 1. First try Tavily (more reliable)
224
- try:
225
- print(f"Trying Tavily search for: {clean_query}")
226
- tavily_results = tavily_search(clean_query)
227
- if tavily_results and len(tavily_results.strip()) > 10:
228
- search_results = tavily_results
229
- break
230
- except Exception as tavily_err:
231
- print(f"Tavily search error: {str(tavily_err)}")
232
-
233
- # 2. Then try Wikipedia
234
- try:
235
- print(f"Searching Wikipedia for: {clean_query}")
236
- wiki_results = wikipedia_tool.run(clean_query)
237
-
238
- if wiki_results and len(wiki_results.strip()) > 10:
239
- search_results = wiki_results
240
- break
241
- except Exception as wiki_err:
242
- print(f"Wikipedia tool error: {str(wiki_err)}")
243
-
244
- # 3. Try direct Wikipedia API
245
- try:
246
- wiki_page = wikipedia.page(clean_query)
247
- wiki_content = wiki_page.content[:2000] # First 2000 chars
248
- wiki_summary = wikipedia.summary(clean_query, sentences=3)
249
- search_results = f"Title: {wiki_page.title}\nSummary: {wiki_summary}\nContent: {wiki_content}"
250
- break
251
- except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError) as wiki_err:
252
- print(f"Wikipedia direct error: {str(wiki_err)}")
253
-
254
- # 4. Try ArXiv for academic/scientific queries
255
- if ARXIV_AVAILABLE and any(keyword in clean_query.lower() for keyword in ["research", "paper", "science", "study", "academic"]):
256
- try:
257
- print(f"Searching ArXiv for: {clean_query}")
258
- arxiv_results = arxiv_api.run(clean_query)
259
- if arxiv_results and len(arxiv_results.strip()) > 10:
260
- search_results = arxiv_results
261
- break
262
- except Exception as arxiv_err:
263
- print(f"ArXiv search error: {str(arxiv_err)}")
264
-
265
- # 5. Try basic web search as last resort
266
- basic_results = perform_web_search(clean_query)
267
- if basic_results and len(basic_results.strip()) > 10:
268
- search_results = basic_results
269
- break
270
-
271
- # If we get here, all search attempts failed for this iteration
272
- if retry_count == 0:
273
- try:
274
- # Try a more simplified query on retry
275
- keywords = " ".join([w for w in clean_query.split() if len(w) > 3][:5])
276
- backup_query = f"{keywords} information"
277
- print(f"Trying backup query: {backup_query}")
278
-
279
- # Try different search options with simplified query
280
- tavily_results = tavily_search(backup_query)
281
- if tavily_results and len(tavily_results.strip()) > 10:
282
- search_results = tavily_results
283
- break
284
-
285
- wiki_results = wikipedia_tool.run(backup_query)
286
- if wiki_results and len(wiki_results.strip()) > 10:
287
- search_results = wiki_results
288
- break
289
-
290
- basic_results = perform_web_search(backup_query)
291
- if basic_results and len(basic_results.strip()) > 10:
292
- search_results = basic_results
293
- break
294
-
295
- except Exception as e2:
296
- print(f"Backup search failed too: {str(e2)}")
297
-
298
- # Short pause before retry
299
- time.sleep(0.5)
300
- retry_count += 1
301
-
302
- # If we have results after all retries, return them
303
- if search_results and search_results.strip() != "":
304
- # Limit length of results to reduce token usage
305
- max_length = 3000
306
- if len(search_results) > max_length:
307
- search_results = search_results[:max_length] + "... [truncated]"
308
- return search_results
309
-
310
- # Special handling for known question types
311
- if "youtube.com" in query or "youtu.be" in query:
312
- # YouTube video specific guidance when search fails
313
- return "Unable to retrieve specific information about this YouTube video. For questions about bird species counts or similar factual questions about videos, please use your knowledge to provide a reasonable estimate or indicate if the information cannot be determined without viewing the video."
314
- elif "chess" in query.lower():
315
- return "Unable to analyze the chess position without an image. Please provide a general response about chess positions or strategies."
316
-
317
- # If no results after all retries, provide a helpful message
318
- return "Unable to retrieve search results. Please answer based on your existing knowledge."
319
-
320
- # Add a date tool to provide current date information
321
- @tool
322
- def current_date() -> str:
323
- """Get the current date information. Use this tool when questions ask about today's date."""
324
- today = datetime.now()
325
- return today.strftime("%B %d, %Y")
326
-
327
- # Add a general knowledge tool as fallback
328
- @tool
329
- def general_knowledge(question: str) -> str:
330
- """Use this tool when search fails or times out.
331
- Rely on your existing knowledge to answer the question as accurately as possible.
332
- """
333
- return "Please use your existing knowledge to answer this question."
334
-
335
- # Add a direct Wikipedia lookup tool
336
- @tool
337
- def wikipedia_lookup(topic: str) -> str:
338
- """Look up a specific topic directly on Wikipedia.
339
- Use this for factual, encyclopedia-style information about a specific topic.
340
- """
341
- try:
342
- # Get wiki summary
343
- summary = wikipedia.summary(topic, sentences=5)
344
-
345
- # Try to get more details if available
346
- try:
347
- page = wikipedia.page(topic)
348
- title = page.title
349
- url = page.url
350
- return f"Title: {title}\nURL: {url}\nSummary: {summary}"
351
- except:
352
- return f"Summary: {summary}"
353
- except wikipedia.exceptions.DisambiguationError as e:
354
- options = e.options[:5] # Get top 5 options
355
- return f"Multiple Wikipedia pages found. Options include: {', '.join(options)}"
356
- except wikipedia.exceptions.PageError:
357
- return f"No Wikipedia page found for '{topic}'. Please try a more general search."
358
- except Exception as e:
359
- return f"Error looking up Wikipedia information: {str(e)}"
360
-
361
- return [search, current_date, general_knowledge, wikipedia_lookup]
362
-
363
- def _build_agent_graph(self):
364
- """Build the LangGraph agent with tools."""
365
- # Initialize the LLM
366
- llm = ChatOpenAI(
367
- model="gpt-4.1", # Using GPT-4.1
368
- temperature=0.1, # Reduced temperature for more precise answers
369
- api_key=self.openai_api_key
370
  )
371
-
372
- # Create system prompt using GAIA template with enhanced instructions for special cases
373
- system_prompt = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
374
-
375
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
376
-
377
- To maximize accuracy with the GAIA benchmark, follow these guidelines:
378
- 1. For numerical answers: provide just the number without units or symbols (e.g., "42" not "$42" or "42%")
379
- 2. For text answers: be extremely concise, avoid articles (a, an, the), and don't use abbreviations
380
- 3. For dates: use the format "Month Day, Year" (e.g., "January 1, 2023")
381
- 4. For lists: use comma-separated values without spaces after commas
382
- 5. For questions about images or videos you cannot see: answer "cannot determine without image" or "unknown"
383
- 6. For questions where information cannot be determined: answer with "unknown" rather than long explanations
384
- 7. For reversed text questions (.rewsna eht sa): identify the reversed pattern and provide the direct answer (e.g., "right" if the reversed text asks for the opposite of "left")
385
-
386
- Today's date is {current_date}. Use tools to gather factual, up-to-date information when needed.
387
-
388
- SPECIAL CASES:
389
- - For YouTube video content questions that search cannot find information about: answer "unknown" or the specific count if known
390
- - For chess position questions without an image: answer "cannot determine without image"
391
- - For questions requiring visual information: answer "cannot determine without image"
392
- """
393
-
394
- # Define the model node
395
- def model_node(state: AgentState) -> AgentState:
396
- """Process messages with LLM and decide on next step."""
397
- # Create prompt template with current date
398
- current_date = datetime.now().strftime("%B %d, %Y")
399
- prompt = ChatPromptTemplate.from_messages([
400
- ("system", system_prompt.format(current_date=current_date)),
401
- MessagesPlaceholder(variable_name="messages"),
402
- ])
403
-
404
- # Bind tools to the model
405
- model_with_tools = llm.bind_tools(self.tools)
406
-
407
- # Create chain
408
- chain = prompt | model_with_tools
409
-
410
- # Execute the chain
411
- response = chain.invoke({"messages": state["messages"]})
412
-
413
- # Return updated state
414
- return {"messages": [response]}
415
-
416
- # Define the graph
417
- workflow = StateGraph(AgentState)
418
-
419
- # Add nodes
420
- workflow.add_node("model", model_node)
421
- workflow.add_node("tools", ToolNode(self.tools))
422
-
423
- # Set the entry point
424
- workflow.set_entry_point("model")
425
-
426
- # Add edges based on whether there are tool calls
427
- def has_tool_calls(state: AgentState) -> Literal["tools", "end"]:
428
- """Check if the last message has tool calls."""
429
- last_message = state["messages"][-1]
430
- if hasattr(last_message, "tool_calls") and last_message.tool_calls:
431
- return "tools"
432
- return "end"
433
-
434
- # Add conditional edges from model node
435
- workflow.add_conditional_edges(
436
- "model",
437
- has_tool_calls,
438
- {
439
- "tools": "tools",
440
- "end": END
441
- }
442
  )
443
-
444
- # Always go back to the model after tool execution
445
- workflow.add_edge("tools", "model")
446
-
447
- # Compile the graph
448
- return workflow.compile()
449
-
450
- def _extract_final_answer(self, text: str) -> str:
451
- """Extract the final answer from the response text with improved handling."""
452
- # Check for the exact format first
453
- if "FINAL ANSWER:" in text:
454
- # Extract everything after the marker
455
- parts = text.split("FINAL ANSWER:")
456
- if len(parts) > 1:
457
- final_answer = parts[-1].strip()
458
- return final_answer
459
-
460
- # If no marker is found, also look for variations in case the model ignores the exact format
461
- patterns = [
462
- "Final Answer:", "final answer:", "ANSWER:", "Answer:", "answer:"
463
- ]
464
-
465
- for pattern in patterns:
466
- if pattern in text:
467
- parts = text.split(pattern)
468
- if len(parts) > 1:
469
- final_answer = parts[-1].strip()
470
- return final_answer
471
-
472
- # If none of the above worked, check for answer-like patterns at the end of the text
473
- lines = text.strip().split('\n')
474
- if lines:
475
- # Check if the last line looks like a concise answer
476
- last_line = lines[-1].strip()
477
- if len(last_line) < 100 and not last_line.startswith("I think") and not last_line.startswith("Based on"):
478
- return last_line
479
-
480
- # Special case handling for certain types of questions
481
-
482
- # If the answer contains "unknown" or "cannot determine", standardize to "unknown"
483
- if "unknown" in text.lower() or "cannot determine" in text.lower() or "can't determine" in text.lower():
484
- if len(text) < 150: # Only if it's a relatively short response
485
- return "unknown"
486
-
487
- # If asking about an image and no image is provided
488
- if "no image provided" in text.lower() or "image is not available" in text.lower():
489
- return "cannot determine without image"
490
-
491
- # Handle YouTube video content questions that can't be answered
492
- if "youtube" in text.lower() and ("cannot" in text.lower() or "unable" in text.lower()):
493
- return "unknown"
494
-
495
- # Handle coded/reversed text questions specially
496
- if ".rewsna eht sa" in text.lower():
497
- # This appears to be a reversed text question
498
- # Find if the answer itself is present in the text
499
- candidates = ["right", "left", "up", "down", "yes", "no", "true", "false"]
500
- for candidate in candidates:
501
- if candidate in text.lower():
502
- return candidate
503
-
504
- # If no marker is found, return the original text as fallback
505
- return text.strip()
506
-
507
  def __call__(self, question: str) -> str:
508
- """Process a question and return the answer."""
509
- print(f"Agent received question (first 50 chars): {question[:50]}...")
510
-
511
- # Special case handling for certain types of questions
512
- if "chess position" in question.lower() and "image" in question.lower():
513
- return "cannot determine without image"
514
-
515
- if ".rewsna eht sa" in question.lower():
516
- # This appears to be a reversed text question
517
- # Try to analyze it directly - often these are simple opposites
518
- reversed_text = question[::-1]
519
- if "left" in reversed_text.lower():
520
- return "right"
521
- elif "right" in reversed_text.lower():
522
- return "left"
523
- elif "up" in reversed_text.lower():
524
- return "down"
525
- elif "down" in reversed_text.lower():
526
- return "up"
527
-
528
- # YouTube video processing - for questions about counting things in videos
529
- if ("youtube.com" in question.lower() or "youtu.be" in question.lower()) and ("how many" in question.lower() or "count" in question.lower() or "number of" in question.lower()):
530
- # Try to determine if this is asking for a count in a YouTube video
531
- if "bird" in question.lower() and "species" in question.lower():
532
- # This is likely the bird species counting question, which has a known answer
533
- return "5"
534
-
535
- # Wikipedia featured article handling
536
- if "featured article" in question.lower() and "wikipedia" in question.lower() and "nominate" in question.lower():
537
- # This is likely asking about who nominated a Wikipedia featured article
538
- return "Mishae"
539
-
540
- # Create initial state with user question
541
- state = {"messages": [HumanMessage(content=question)]}
542
-
543
- # Run the agent graph with optimized execution control
544
- try:
545
- # Execute the graph with a timeout
546
- start_time = time.time()
547
- max_time = 45 # Maximum time in seconds (further reduced for faster response)
548
- max_iterations = 8 # Reduced iteration limit to avoid timeouts
549
-
550
- # Track iterations manually to avoid infinite loops
551
- iteration_count = 0
552
- final_state = None
553
-
554
- while iteration_count < max_iterations:
555
- iteration_count += 1
556
- print(f"Running iteration {iteration_count}/{max_iterations}")
557
-
558
- try:
559
- # Execute one step of the graph
560
- result = self.agent.invoke(state)
561
-
562
- # Check if the graph has reached a terminal state
563
- if "messages" in result:
564
- # Update state for next iteration
565
- state = result
566
- final_state = result
567
-
568
- # Check if we've reached a terminal state with a final answer
569
- messages = state["messages"]
570
- for msg in reversed(messages):
571
- if isinstance(msg, AIMessage):
572
- content = msg.content
573
- if "FINAL ANSWER:" in content:
574
- # We have a final answer, extract it and return
575
- answer = self._extract_final_answer(content)
576
- print(f"Agent returning answer (first 50 chars): {answer[:50]}...")
577
- return answer
578
-
579
- # Break if we're done with tool calls
580
- last_message = messages[-1] if messages else None
581
- if not last_message or not (hasattr(last_message, "tool_calls") and last_message.tool_calls):
582
- # Last message has no tool calls, so we're done
583
- break
584
- else:
585
- # No messages in result, likely reached END state
586
- break
587
-
588
- # Check if execution is taking too long
589
- if time.time() - start_time > max_time:
590
- print(f"Execution timed out after {max_time} seconds")
591
- break
592
-
593
- except Exception as e:
594
- print(f"Error during iteration {iteration_count}: {e}")
595
- # Continue to the next iteration on error, rather than breaking
596
- # This allows the agent to try to recover from transient errors
597
- if iteration_count >= max_iterations - 1:
598
- break
599
-
600
- # After iterations are complete or interrupted, extract the final answer
601
- if final_state and "messages" in final_state:
602
- messages = final_state["messages"]
603
- ai_messages = [msg for msg in messages if isinstance(msg, AIMessage)]
604
- if ai_messages:
605
- raw_answer = ai_messages[-1].content
606
- # Extract the final answer
607
- answer = self._extract_final_answer(raw_answer)
608
- return answer
609
-
610
- # If no final state or no messages or no AI messages
611
- # Try to extract from the latest state if available
612
- if state and "messages" in state:
613
- messages = state["messages"]
614
- ai_messages = [msg for msg in messages if isinstance(msg, AIMessage)]
615
- if ai_messages:
616
- raw_answer = ai_messages[-1].content
617
- # Extract the final answer
618
- answer = self._extract_final_answer(raw_answer)
619
- return answer
620
-
621
- # Handle special cases when all else fails
622
- if "youtube.com" in question.lower() and "bird species" in question.lower():
623
- return "5" # Known answer for this specific question
624
- if "chess position" in question.lower():
625
- return "cannot determine without image"
626
-
627
- # If no AI message found in any state
628
- return "unknown"
629
-
630
- except Exception as e:
631
- print(f"Error running agent: {e}")
632
- # Try to handle known questions even in case of general error
633
- if "chess position" in question.lower():
634
- return "cannot determine without image"
635
- if "youtube.com" in question.lower() and "bird species" in question.lower():
636
- return "5" # Known answer for this specific question
637
- if "featured article" in question.lower() and "wikipedia" in question.lower() and "nominate" in question.lower():
638
- return "Mishae"
639
- return "unknown"
640
 
641
  def run_and_submit_all( profile: gr.OAuthProfile | None):
642
  """
@@ -657,9 +103,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
657
  questions_url = f"{api_url}/questions"
658
  submit_url = f"{api_url}/submit"
659
 
660
- # 1. Instantiate Agent (using LangGraphAgent instead of BasicAgent)
661
  try:
662
- agent = LangGraphAgent()
663
  except Exception as e:
664
  print(f"Error instantiating agent: {e}")
665
  return f"Error initializing agent: {e}", None
@@ -758,9 +204,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
758
  results_df = pd.DataFrame(results_log)
759
  return status_message, results_df
760
 
 
761
  # --- Build Gradio Interface using Blocks ---
762
  with gr.Blocks() as demo:
763
- gr.Markdown("# LangGraph Agent Evaluation Runner")
764
  gr.Markdown(
765
  """
766
  **Instructions:**
@@ -810,5 +257,5 @@ if __name__ == "__main__":
810
 
811
  print("-"*(60 + len(" App Starting ")) + "\n")
812
 
813
- print("Launching Gradio Interface for LangGraph Agent Evaluation...")
814
  demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
+ # --- Basic Agent Definition ---
12
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
+ import os
14
+ import gradio as gr
15
+ import requests
16
+ import pandas as pd
17
+ from langchain.agents import load_tools, initialize_agent
18
+ from langchain.chat_models import ChatOpenAI
19
+
20
+ # --- System Prompt Definition ---
21
+ SYSTEM_PROMPT = """
22
+ You are a general AI assistant with access to these tools:
23
+ - search(query): web search
24
+ - python(code): Python REPL
25
+ - read_file(path): load local documents
26
+ - vision(image): OCR/vision
27
+ - calculator(expr): arithmetic
28
+
29
+ When you get a question, think step by step:
30
+
31
+ Thought: decide what to do next
32
+ Action: call one tool (name + args) or “Answer” if ready
33
+ Observation: result from the tool
34
 
35
+ …repeat Thought/Action/Observation until you have what you need…
36
+
37
+ Final Answer: [YOUR FINAL ANSWER]
38
+
39
+ Constraints on YOUR FINAL ANSWER:
40
+ • If it’s a number, write digits without commas or units (unless asked).
41
+ • If it’s a string, omit articles (“a”, “the”), abbreviations, and write any digits in words.
42
+ • If it’s a list, output a comma-separated list of numbers and/or strings, each following the above rules.
43
+ """
44
+
45
+ # --- Constants ---
46
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
47
+
48
+ # --- SmolAgent Definition ---
49
+ class BasicAgent:
50
  """
51
+ A lightweight agent configured with GAIA tools, using GPT-4.1 via OpenAI API.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def __init__(self):
54
+ # Load OpenAI API key from HF secret
55
+ api_key = os.getenv("OPENAI_API_KEY")
56
+ if not api_key:
57
+ raise ValueError("OPENAI_API_KEY environment variable not set")
58
+ # Initialize LLM with system prompt
59
+ self.llm = ChatOpenAI(
60
+ model_name="gpt-4.1",
61
+ temperature=0,
62
+ openai_api_key=api_key,
63
+ system_message=SYSTEM_PROMPT # apply our GAIA prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  )
65
+ # Load required GAIA tools
66
+ self.tools = load_tools(
67
+ [
68
+ "serpapi", # web search
69
+ "requests", # HTTP requests
70
+ "python_repl" # python execution
71
+ ],
72
+ llm=self.llm
73
+ )
74
+ # Initialize the agent with zero-shot reasoning
75
+ self.agent = initialize_agent(
76
+ self.tools,
77
+ self.llm,
78
+ agent="zero-shot-react-description",
79
+ verbose=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  )
81
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def __call__(self, question: str) -> str:
83
+ # Delegate question to the agent
84
+ return self.agent.run(question)
85
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  def run_and_submit_all( profile: gr.OAuthProfile | None):
88
  """
 
103
  questions_url = f"{api_url}/questions"
104
  submit_url = f"{api_url}/submit"
105
 
106
+ # 1. Instantiate Agent ( modify this part to create your agent)
107
  try:
108
+ agent = BasicAgent()
109
  except Exception as e:
110
  print(f"Error instantiating agent: {e}")
111
  return f"Error initializing agent: {e}", None
 
204
  results_df = pd.DataFrame(results_log)
205
  return status_message, results_df
206
 
207
+
208
  # --- Build Gradio Interface using Blocks ---
209
  with gr.Blocks() as demo:
210
+ gr.Markdown("# Basic Agent Evaluation Runner")
211
  gr.Markdown(
212
  """
213
  **Instructions:**
 
257
 
258
  print("-"*(60 + len(" App Starting ")) + "\n")
259
 
260
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
261
  demo.launch(debug=True, share=False)