gabejavitt commited on
Commit
07b9d51
·
verified ·
1 Parent(s): 2358285

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +316 -488
app.py CHANGED
@@ -24,8 +24,6 @@ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMes
24
  from langgraph.prebuilt import ToolNode
25
  from langgraph.graph import START, StateGraph
26
  from langgraph.prebuilt import tools_condition
27
- # REMOVED: from langchain_huggingface import ChatHuggingFace
28
- # REMOVED: from langchain_huggingface import HuggingFaceEndpoint
29
  from langchain_community.tools import DuckDuckGoSearchRun
30
  from langchain_core.tools import tool, BaseTool
31
  # --- ADD GROQ IMPORT ---
@@ -33,305 +31,269 @@ from langchain_groq import ChatGroq
33
 
34
  # (Keep Constants as is)
35
  # --- Constants ---
36
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
37
-
38
- # --- LangGraph Agent State ---
39
- class AgentState(TypedDict):
40
- messages: Annotated[list[AnyMessage], add_messages]
41
-
42
-
43
- # --- Basic Agent Definition ---
44
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
45
- class BasicAgent:
46
-
47
- # --- Tool Definitions as Methods ---
48
- # By making tools methods, they can access self.asr_pipeline
49
-
50
- @tool
51
- def search_tool(self, query: str) -> str:
52
- """Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
53
- print(f"--- Calling Search Tool with query: {query} ---")
54
- try:
55
- search = DuckDuckGoSearchRun()
56
- return search.run(query)
57
- except Exception as e:
58
- return f"Error running search: {e}"
59
-
60
- @tool
61
- def code_interpreter(self, code: str) -> str:
62
- """
63
- Executes a string of Python code and returns its stdout, stderr, and any error.
64
- Use this for calculations, data manipulation (including pandas on dataframes read from files), list operations, string manipulations, or any other Python operation.
65
- The code runs in a sandboxed environment. 'pandas' (as pd) and 'openpyxl' are available.
66
- Ensure the code is complete and executable. If printing, use print().
67
- """
68
- print(f"--- Calling Code Interpreter with code:\n{code}\n---")
69
- output_stream = io.StringIO()
70
- error_stream = io.StringIO()
71
-
72
- try:
73
- # Use contextlib to redirect stdout and stderr
74
- with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
75
- # Execute the code. Provide 'pd' (pandas) in the globals
76
- exec(code, {"pd": pd}, {})
77
-
78
- stdout = output_stream.getvalue()
79
- stderr = error_stream.getvalue()
80
-
81
- if stderr:
82
- return f"Error: {stderr}\nStdout: {stdout}"
83
- if stdout:
84
- return f"Success:\n{stdout}"
85
- return "Success: Code executed without error and produced no stdout."
86
-
87
- except Exception as e:
88
- # Capture any exception during exec
89
- return f"Execution failed with error: {str(e)}"
90
 
91
- @tool
92
- def read_file(self, path: str) -> str:
93
- """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
94
- print(f"--- Calling Read File Tool at path: {path} ---")
95
- try:
96
- # Try finding the file relative to the app directory first
97
- # Use os.path.dirname(os.path.realpath(__file__)) for robustness in different execution contexts
98
- script_dir = os.path.dirname(os.path.realpath(__file__))
99
- full_path = os.path.join(script_dir, path)
100
- print(f"Attempting to read relative path: {full_path}")
101
- if not os.path.exists(full_path):
102
- # If not found, try the direct path (might be absolute or relative to cwd)
103
- full_path = path
104
- print(f"Attempting to read direct path: {full_path}")
105
- if not os.path.exists(full_path):
106
- # Try basename for GAIA questions providing just the filename
107
- base_path = os.path.basename(path)
108
- print(f"Attempting to read basename path in cwd: {os.path.join(os.getcwd(), base_path)}")
109
- if os.path.exists(base_path): # Check relative to CWD
110
- full_path = base_path
111
- else:
112
- # List files in current and script directory for debugging
113
- try:
114
- cwd_files = os.listdir(".")
115
- except Exception:
116
- cwd_files = ["Error listing CWD"]
117
- try:
118
- script_dir_files = os.listdir(script_dir)
119
- except Exception:
120
- script_dir_files = ["Error listing script dir"]
121
- return (f"Error: File not found.\n"
122
- f"Tried relative path: '{os.path.join(script_dir, path)}'\n"
123
- f"Tried direct path: '{path}'\n"
124
- f"Tried basename in CWD: '{base_path}'\n"
125
- f"Files in current dir (.): {cwd_files}\n"
126
- f"Files in script dir ({script_dir}): {script_dir_files}")
127
-
128
- print(f"Reading file: {full_path}")
129
- with open(full_path, 'r', encoding='utf-8') as f:
130
- return f.read()
131
- except Exception as e:
132
- return f"Error reading file {path}: {str(e)}"
133
 
134
- @tool
135
- def write_file(self, path: str, content: str) -> str:
136
- """Writes the given content to a file at the specified path relative to the app's directory. Creates directories if they don't exist."""
137
- print(f"--- Calling Write File Tool at path: {path} ---")
138
- try:
139
- # Ensure the directory exists
140
- script_dir = os.path.dirname(os.path.realpath(__file__))
141
- full_path = os.path.join(script_dir, path) # Write relative to script dir
142
- print(f"Writing file to: {full_path}")
143
- os.makedirs(os.path.dirname(full_path), exist_ok=True)
144
-
145
- with open(full_path, 'w', encoding='utf-8') as f:
146
- f.write(content)
147
- return f"Successfully wrote to file {path} (relative to app)."
148
- except Exception as e:
149
- return f"Error writing to file {path}: {str(e)}"
150
 
151
- @tool
152
- def list_directory(self, path: str = ".") -> str:
153
- """Lists the contents (files and directories) of a directory at the specified path relative to the app's directory."""
154
- print(f"--- Calling List Directory Tool at path: {path} ---")
155
- try:
156
- script_dir = os.path.dirname(os.path.realpath(__file__))
157
- full_path = os.path.join(script_dir, path) # List relative to script dir
158
- print(f"Listing directory: {full_path}")
159
- if not os.path.isdir(full_path):
160
- return f"Error: '{path}' is not a valid directory relative to the app."
161
- files = os.listdir(full_path)
162
- return "\n".join(files) if files else "Directory is empty."
163
- except Exception as e:
164
- return f"Error listing directory {path}: {str(e)}"
165
-
166
- @tool
167
- def audio_transcription_tool(self, file_path: str) -> str:
168
- """
169
- Transcribes an audio file (like .mp3 or .wav) using Whisper and returns the text content.
170
- Use this for questions involving audio file analysis.
171
- """
172
- print(f"--- Calling Audio Transcription Tool at path: {file_path} ---")
173
- # Access the pipeline via self
174
- if not self.asr_pipeline:
175
- return "Error: Audio transcription pipeline is not available."
176
- try:
177
- # Try finding the file relative to the app directory first
178
- script_dir = os.path.dirname(os.path.realpath(__file__))
179
- full_path = os.path.join(script_dir, file_path)
180
- print(f"Attempting to transcribe relative path: {full_path}")
181
- if not os.path.exists(full_path):
182
- # If not found, try the direct path
183
- full_path = file_path
184
- print(f"Attempting to transcribe direct path: {full_path}")
185
- if not os.path.exists(full_path):
186
- # Try basename for GAIA questions
187
- base_path = os.path.basename(file_path)
188
- print(f"Attempting to transcribe basename path in CWD: {os.path.join(os.getcwd(), base_path)}")
189
- if os.path.exists(base_path): # Check relative to CWD
190
- full_path = base_path
191
- else:
192
- try:
193
- cwd_files = os.listdir(".")
194
- except Exception:
195
- cwd_files = ["Error listing CWD"]
196
- try:
197
- script_dir_files = os.listdir(script_dir)
198
- except Exception:
199
- script_dir_files = ["Error listing script dir"]
200
- return (f"Error: Audio file not found.\n"
201
- f"Tried relative path: '{os.path.join(script_dir, file_path)}'\n"
202
- f"Tried direct path: '{file_path}'\n"
203
- f"Tried basename in CWD: '{base_path}'\n"
204
- f"Files in current dir (.): {cwd_files}\n"
205
- f"Files in script dir ({script_dir}): {script_dir_files}")
206
-
207
- print(f"Transcribing file: {full_path}")
208
- # Important: Ensure the pipeline can handle the file path directly
209
- transcription = self.asr_pipeline(full_path)
210
- print("--- Transcription Complete ---")
211
- # The output structure might vary slightly based on pipeline version
212
- return transcription.get("text", "Error: Transcription failed to produce text.")
213
- except Exception as e:
214
- import traceback
215
- print(f"Error during audio transcription: {e}")
216
- traceback.print_exc()
217
- return f"Error during audio transcription: {str(e)}"
218
-
219
- @tool
220
- def get_youtube_transcript(self, video_url: str) -> str:
221
- """
222
- Fetches the transcript for a given YouTube video URL. Use this for questions about YouTube video content.
223
- """
224
- print(f"--- Calling YouTube Transcript Tool for URL: {video_url} ---")
225
- try:
226
- # Extract video ID from URL more robustly
227
- video_id = None
228
- if "watch?v=" in video_url:
229
- video_id = video_url.split("v=")[1].split("&")[0]
230
- elif "youtu.be/" in video_url:
231
- video_id = video_url.split("youtu.be/")[1].split("?")[0]
232
-
233
- if not video_id:
234
- return f"Error: Could not extract video ID from URL: {video_url}"
235
-
236
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
237
-
238
- # Combine all transcript parts into one string
239
- full_transcript = " ".join([item["text"] for item in transcript_list])
240
- print("--- Transcript Fetched ---")
241
- # Return a limited amount to avoid overwhelming the context
242
- return full_transcript[:8000]
243
- except Exception as e:
244
- return f"Error fetching YouTube transcript: {str(e)}"
245
-
246
- @tool
247
- def scrape_web_page(self, url: str) -> str:
248
- """
249
- Fetches the primary text content of a given web page URL, removing navigation, footer, scripts, and styles.
250
- Use this when you need the full content of a webpage found via search.
251
- """
252
- print(f"--- Calling Web Scraper Tool for URL: {url} ---")
253
- try:
254
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
255
- response = requests.get(url, headers=headers, timeout=15) # Increased timeout
256
- response.raise_for_status() # Raise an error for bad responses (4xx or 5xx)
257
 
258
- # Check content type to avoid parsing non-HTML
259
- if 'html' not in response.headers.get('Content-Type', '').lower():
260
- return f"Error: URL {url} did not return HTML content."
 
 
261
 
262
- soup = BeautifulSoup(response.text, 'html.parser')
 
 
263
 
264
- # Remove common non-content tags
265
- for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]):
266
- tag.extract()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
- # Attempt to find the main content area (heuristics, may not always work)
269
- main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body
270
- if not main_content:
271
- main_content = soup # Fallback to the whole soup if no main area found
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
- text = main_content.get_text(separator='\n', strip=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
- # Clean up excessive whitespace
276
- lines = (line.strip() for line in text.splitlines())
277
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
278
- text = '\n'.join(chunk for chunk in chunks if chunk)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
- print("--- Web Page Scraped ---")
281
- # Limit context size
282
- return text[:8000]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
- except requests.exceptions.RequestException as e:
285
- return f"Error fetching web page {url}: {str(e)}"
286
- except Exception as e:
287
- return f"Error scraping web page {url}: {str(e)}"
288
 
289
- # --- End of Tool Definitions ---
290
 
 
 
 
291
 
292
  def __init__(self):
293
  print("BasicAgent (LangGraph) initializing...")
294
 
295
- # 1. Initialize ASR Pipeline *inside* init - DELAYED LOADING
296
- self.asr_pipeline = None # Initialize as None first
297
- try:
298
- print("Loading ASR (Whisper) pipeline...")
299
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
300
- print(f"Using device: {device} for ASR.")
301
- self.asr_pipeline = pipeline(
302
- "automatic-speech-recognition",
303
- model="openai/whisper-base",
304
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
305
- device=device
306
- )
307
- print("✅ ASR (Whisper) pipeline loaded successfully.")
308
- except Exception as e:
309
- print(f"⚠️ Warning: Could not load ASR pipeline. Audio tool will not work. Error: {e}")
310
- import traceback
311
- traceback.print_exc() # Print full traceback for ASR load error
312
- self.asr_pipeline = None
313
- # ====================================================
314
-
315
- # 2. Get API Tokens from Space Secrets
316
- # HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Keep if needed elsewhere
317
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
318
  if not GROQ_API_KEY:
319
  raise ValueError("GROQ_API_KEY secret is not set! Please add it to your Space secrets.")
 
 
 
 
320
 
321
- # 3. Collect Tool Methods
322
- self.tools = [
323
- self.search_tool,
324
- self.code_interpreter,
325
- self.read_file,
326
- self.write_file,
327
- self.list_directory,
328
- self.audio_transcription_tool,
329
- self.get_youtube_transcript,
330
- self.scrape_web_page
331
- ]
332
-
333
- # 4. Define the Improved System Prompt with Placeholders
334
  tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in self.tools])
 
335
  self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
336
  Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question, exactly matching the format required by the benchmark (e.g., a name, a number, a specific string format, a comma-separated list).
337
 
@@ -366,156 +328,55 @@ You have access to the following tools to gather information and perform actions
366
  7. Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
367
  """
368
 
369
- # 5. Initialize the LLM (Using Groq and Mistral Instruct)
370
- # ==================== MODIFIED LLM INIT ====================
371
  print("Initializing Groq LLM Endpoint...")
372
  try:
373
- # Use the specific model name recommended by Groq for mixtral-8x7b-32768
374
  chat_llm = ChatGroq(
375
  temperature=0.01,
376
  groq_api_key=GROQ_API_KEY,
377
- model_name="llama-3.1-8b-instant"
378
  )
379
- print("✅ Groq LLM Endpoint initialized.")
380
  except Exception as e:
381
- print(f"Error initializing Groq LLM: {e}")
382
- raise # Reraise the exception to stop initialization if LLM fails
383
  # ===========================================================
384
 
385
- # 6. Bind tools to the LLM
386
- # We still bind tools, but we'll manually parse if it fails
387
  self.llm_with_tools = chat_llm.bind_tools(self.tools)
388
- print("✅ Tools bound to LLM.")
389
 
390
- # 7. Define the Agent Node with Manual Tool Parsing
391
- # ==================== NODE WITH PLACEHOLDER REGEX ====================
392
  def agent_node(state: AgentState):
393
  print("--- Running Agent Node ---")
394
- messages_with_prompt = state["messages"]
395
-
396
- # Invoke the LLM (which has tools bound)
397
- ai_message: AIMessage = self.llm_with_tools.invoke(messages_with_prompt)
398
  print(f"AI Message Raw Content: {ai_message.content}")
399
-
400
- # --- Manual Tool Call Parsing Logic ---
401
- tool_calls = []
402
- # Check if bind_tools already populated tool_calls (ideal case)
403
- if ai_message.tool_calls:
404
- print(f"SUCCESS: bind_tools correctly parsed tool_calls: {ai_message.tool_calls}")
405
- tool_calls = ai_message.tool_calls
406
- # Fallback: Check if content contains likely JSON for tool calls
407
- # Use regex to find JSON possibly wrapped in markdown
408
- elif isinstance(ai_message.content, str):
409
- print("Attempting manual JSON parsing from content...")
410
- # --- THIS IS THE LINE WITH THE FIRST PLACEHOLDER ---
411
- json_match = re.search(r"...") # Replace this line manually
412
-
413
- if json_match:
414
- # Extract the first valid group that contains JSON
415
- json_str = json_match.group(1) or json_match.group(2)
416
- if json_str:
417
- try:
418
- # Attempt to strip potential leading/trailing non-JSON chars if regex was too broad
419
- json_str_cleaned = json_str.strip()
420
- # Basic validation: starts with { or [ ends with } or ]
421
- if (json_str_cleaned.startswith('{') and json_str_cleaned.endswith('}')) or \
422
- (json_str_cleaned.startswith('[') and json_str_cleaned.endswith(']')):
423
- data = json.loads(json_str_cleaned)
424
- # Check structure for single tool call (dict)
425
- if isinstance(data, dict) and "tool" in data and "tool_input" in data:
426
- tool_name = data.get("tool")
427
- tool_input = data.get("tool_input")
428
- # Basic validation of tool name and input type
429
- if isinstance(tool_name, str) and isinstance(tool_input, dict):
430
- call_id = f"tool_{uuid.uuid4()}" # Generate unique ID
431
- tool_calls.append(ToolCall(name=tool_name, args=tool_input, id=call_id))
432
- print(f"Manually parsed Single Tool Call: ID={call_id}, Name={tool_name}, Args={tool_input}")
433
- ai_message.content = "" # Clear content after successful parse
434
- else:
435
- print("Parsed JSON dict, but incorrect tool name type or tool_input is not a dict.")
436
- # Check structure for multiple tool calls (if model outputs a list)
437
- elif isinstance(data, list):
438
- print("Attempting to parse list as multiple tool calls...")
439
- parsed_list_ok = True
440
- temp_tool_calls = []
441
- for item in data:
442
- if isinstance(item, dict) and "tool" in item and "tool_input" in item:
443
- tool_name = item.get("tool")
444
- tool_input = item.get("tool_input")
445
- if isinstance(tool_name, str) and isinstance(tool_input, dict):
446
- call_id = f"tool_{uuid.uuid4()}"
447
- temp_tool_calls.append(ToolCall(name=tool_name, args=tool_input, id=call_id))
448
- print(f"Manually parsed Multi-Tool Call item: ID={call_id}, Name={tool_name}, Args={tool_input}")
449
- else:
450
- parsed_list_ok = False
451
- print("Parsed JSON list item, but incorrect tool name type or tool_input is not a dict.")
452
- break
453
- else:
454
- parsed_list_ok = False
455
- print("Parsed JSON list item, but not a valid tool call structure (missing 'tool' or 'tool_input').")
456
- break
457
- if parsed_list_ok and temp_tool_calls:
458
- tool_calls.extend(temp_tool_calls)
459
- ai_message.content = "" # Clear content if list successfully parsed
460
- else:
461
- print("Parsed JSON, but incorrect structure (neither dict with tool/tool_input nor list of such dicts).")
462
- else:
463
- print(f"Skipping manual parse: Cleaned JSON string ('{json_str_cleaned[:50]}...') does not start/end correctly with braces/brackets.")
464
- except json.JSONDecodeError as e:
465
- print(f"Manual JSON parsing failed: {e}. String was: '{json_str[:500]}...'") # Log the problematic string
466
- except Exception as e:
467
- print(f"Unexpected error during manual parsing: {e}")
468
- import traceback
469
- traceback.print_exc()
470
- else:
471
- print("Regex matched, but no JSON content found in capture groups.")
472
- else:
473
- print("No JSON block found in content for manual parsing.")
474
- else:
475
- print("AI Message content is not a string, skipping manual parse.")
476
- # --- End Manual Parsing ---
477
-
478
- # Attach manually parsed calls (if any) to the message
479
- # This allows tools_condition to work correctly
480
- if tool_calls and not ai_message.tool_calls:
481
- ai_message.tool_calls = tool_calls
482
- # Also clear invalid_tool_calls if we manually succeeded
483
- ai_message.invalid_tool_calls = [] # Use empty list instead of None
484
-
485
- # Log final interpretation
486
- if ai_message.tool_calls:
487
- print(f"AI Message contains tool calls (after manual check): {ai_message.tool_calls}")
488
- elif ai_message.invalid_tool_calls:
489
- print(f"AI Message contains INVALID tool calls: {ai_message.invalid_tool_calls}")
490
- else:
491
- print(f"AI Message Interpreted Content (no tool calls): {ai_message.pretty_repr()}")
492
-
493
  return {"messages": [ai_message]}
494
  # =======================================================
495
 
496
- # 8. Define the Tool Node
 
497
  tool_node = ToolNode(self.tools)
498
 
499
- # 9. Create the Graph
500
  print("Building agent graph...")
501
  graph_builder = StateGraph(AgentState)
502
  graph_builder.add_node("agent", agent_node)
503
  graph_builder.add_node("tools", tool_node)
504
  graph_builder.add_edge(START, "agent")
505
- graph_builder.add_conditional_edges(
506
- "agent",
507
- tools_condition, # This condition checks ai_message.tool_calls
508
- {
509
- "tools": "tools",
510
- "__end__": "__end__",
511
- },
512
- )
513
  graph_builder.add_edge("tools", "agent")
514
 
515
- # 10. Compile the graph and store it
516
  self.graph = graph_builder.compile()
517
  print("✅ Graph compiled successfully.")
518
 
 
 
 
519
  def __call__(self, question: str) -> str:
520
  print(f"\n--- Starting Agent Run for Question ---")
521
  print(f"Agent received question (first 100 chars): {question[:100]}...")
@@ -602,8 +463,7 @@ You have access to the following tools to gather information and perform actions
602
  traceback.print_exc()
603
  return f"AGENT GRAPH ERROR: {e}"
604
 
605
-
606
- # --- (Original Template Code Starts Here - NO CHANGES NEEDED BELOW THIS LINE) ---
607
 
608
  def run_and_submit_all( profile: gr.OAuthProfile | None):
609
  """
@@ -615,27 +475,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
615
  username= f"{profile.username}"
616
  print(f"User logged in: {username}")
617
  else:
618
- # For local testing without login, create a dummy username
619
- username = "local_test_user"
620
- print("User not logged in, using dummy username for testing.")
621
- # return "Please Login to Hugging Face with the button.", None # Don't block local test
622
 
623
  api_url = DEFAULT_API_URL
624
- # questions_url = f"{api_url}/questions" # Skip fetching
625
- submit_url = f"{api_url}/submit" # Keep for context
626
 
627
  print("Instantiating agent...")
628
  try:
629
- # This assumes the BasicAgent class is defined in the same scope
630
- # when this function is actually run in the full app.py
631
- agent = BasicAgent()
632
- if not hasattr(agent, 'asr_pipeline') or agent.asr_pipeline is None:
633
- print("⚠️ ASR Pipeline might not have loaded correctly. Audio questions could fail.")
634
 
635
- except Exception as e:
636
- print(f"Error instantiating agent: {e}")
637
- import traceback; traceback.print_exc()
638
- return f"Error initializing agent: {e}", None
639
  print("Agent instantiated successfully.")
640
 
641
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
@@ -644,20 +496,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
644
  # ==================== MOCK QUESTIONS SECTION ====================
645
  print("--- USING MOCK QUESTIONS FOR TESTING ---")
646
  mock_questions_data = [
647
- {
648
- "task_id": "mock_task_search_001",
649
- "question": "What is the capital of France?"
650
- },
651
- {
652
- "task_id": "mock_task_code_001",
653
- "question": "Calculate 15 factorial using python. Only output the final number."
654
- },
655
- # Add more mock questions here if needed, e.g., for file/audio/youtube
656
- # {
657
- # "task_id": "mock_task_audio_001",
658
- # "question": "Transcribe the audio file 'sample.mp3'", # Make sure sample.mp3 exists
659
- # "file_path": "sample.mp3"
660
- # },
661
  ]
662
  questions_data = mock_questions_data
663
  print(f"Using {len(questions_data)} mock questions.")
@@ -668,78 +509,81 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
668
  total_questions = len(questions_data)
669
  print(f"Running agent on {total_questions} mock questions...")
670
 
671
- questions_to_run = questions_data # Use the mock data
672
 
673
  for i, item in enumerate(questions_to_run):
674
  task_id = item.get("task_id")
675
  question_text = item.get("question")
676
- if not task_id or question_text is None: print(f"Skipping item {i+1}: missing task_id or question: {item}"); continue
677
 
678
  print(f"\n--- Running Mock Task {i+1}/{len(questions_to_run)} (ID: {task_id}) ---")
679
  try:
680
  file_path = item.get("file_path")
681
  question_text_with_context = question_text
682
  if file_path:
683
- # Check existence relative to script dir first, then CWD
684
  try: script_dir = os.path.dirname(os.path.realpath(__file__))
685
  except NameError: script_dir = os.getcwd()
686
  potential_script_path = os.path.join(script_dir, file_path)
687
- potential_cwd_path = os.path.join(os.getcwd(), file_path) # Check CWD too
688
-
689
  if os.path.exists(potential_script_path): file_context = f"[Attached File (exists): {file_path}]"
690
  elif os.path.exists(potential_cwd_path): file_context = f"[Attached File (exists in cwd): {file_path}]"
691
- else: file_context = f"[Attached File (path provided, NOT FOUND): {file_path}]" # Indicate if not found
692
-
693
  question_text_with_context = f"{question_text}\n\n{file_context}"
694
  print(f"Question includes file reference: {file_path}")
695
 
 
696
  submitted_answer = agent(question_text_with_context)
 
 
697
  submitted_answer_str = str(submitted_answer) if submitted_answer is not None else ""
698
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer_str})
699
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer_str})
700
  print(f"--- Mock Task {task_id} Complete ---")
701
  except Exception as e:
702
- print(f"FATAL ERROR running agent graph on mock task {task_id}: {e}")
703
- import traceback; traceback.print_exc()
704
- submitted_answer = f"AGENT CRASH ERROR: {e}"
 
 
 
 
 
 
705
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
706
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
707
 
708
- if not answers_payload: print("Agent did not produce any answers."); return "Agent did not produce answers.", pd.DataFrame(results_log)
709
 
710
- # 4. Prepare Submission Data (for display only)
711
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
712
- status_update = f"Agent finished processing mock questions. Prepared {len(answers_payload)} answers for user '{username}'."
713
  print(status_update)
714
  print("--- MOCK RUN COMPLETE - SUBMISSION SKIPPED ---")
715
 
716
- # 5. Skip Actual Submission for Mock Run
717
- final_status = "--- MOCK RUN COMPLETE ---\n" + status_update + "\nSubmission to the scoring server was SKIPPED."
718
  results_df = pd.DataFrame(results_log)
719
- # Add a column indicating mock status
720
  results_df['Correct'] = 'N/A (Mock)'
721
 
722
  return final_status, results_df
723
 
 
724
  # --- Build Gradio Interface using Blocks ---
725
  with gr.Blocks() as demo:
726
- gr.Markdown("# GAIA Agent Evaluation Runner (LangGraph + Mistral)") # Updated title
727
- gr.Markdown(
728
- """
729
  **Instructions:**
730
- 1. Log in to your Hugging Face account using the button below.
731
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, submit answers, and see the score.
732
  ---
733
  **Notes:**
734
- * The full evaluation can take **several hours**. Use the logs tab to monitor progress.
735
- * This agent uses `mistralai/Mistral-7B-Instruct-v0.2` and multiple tools.
736
- * Make sure your `HUGGINGFACEHUB_API_TOKEN` secret is set correctly in Settings.
737
- """
738
- )
739
  gr.LoginButton()
740
- run_button = gr.Button("Run Evaluation & Submit All Answers")
741
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
742
- results_table = gr.DataFrame(label="Questions, Agent Answers, and Results", wrap=True)
743
 
744
  run_button.click(
745
  fn=run_and_submit_all,
@@ -748,35 +592,19 @@ with gr.Blocks() as demo:
748
 
749
  if __name__ == "__main__":
750
  print("\n" + "-"*30 + " App Starting " + "-"*30)
751
-
752
- # Check for SPACE_HOST and SPACE_ID at startup for information
753
  space_host_startup = os.getenv("SPACE_HOST")
754
  space_id_startup = os.getenv("SPACE_ID")
755
-
756
- if space_host_startup:
757
- print(f"✅ SPACE_HOST found: {space_host_startup}")
758
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
759
- else:
760
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
761
-
762
- if space_id_startup:
763
- print(f" SPACE_ID found: {space_id_startup}")
764
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
765
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
766
- else:
767
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
768
-
769
- # Add detailed path info for debugging file access
770
- print(f"Script directory (__file__): {os.path.dirname(os.path.realpath(__file__))}")
771
- print(f"Current working directory (os.getcwd()): {os.getcwd()}")
772
- # List files only if the directory exists
773
- try:
774
- print("Files in current working directory:", os.listdir("."))
775
- except FileNotFoundError:
776
- print("Warning: Could not list current working directory.")
777
-
778
-
779
  print("-"*(60 + len(" App Starting ")) + "\n")
780
- print("Launching Gradio Interface for GAIA Agent Evaluation...")
781
- # Set queue=True to handle multiple clicks better, though only one run should happen at a time.
782
  demo.queue().launch(debug=True, share=False)
 
 
24
  from langgraph.prebuilt import ToolNode
25
  from langgraph.graph import START, StateGraph
26
  from langgraph.prebuilt import tools_condition
 
 
27
  from langchain_community.tools import DuckDuckGoSearchRun
28
  from langchain_core.tools import tool, BaseTool
29
  # --- ADD GROQ IMPORT ---
 
31
 
32
  # (Keep Constants as is)
33
  # --- Constants ---
34
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # This URL is currently not working
35
+
36
+ # --- Initialize ASR Pipeline (Moved back to Global Scope) ---
37
+ # Load the model once when the app starts for efficiency
38
+ asr_pipeline = None # Initialize as None first
39
+ try:
40
+ print("Loading ASR (Whisper) pipeline globally...")
41
+ # Decide device based on availability, default to CPU if unsure
42
+ device = 0 if torch.cuda.is_available() else -1 # device=0 for GPU, -1 for CPU
43
+ device_name = "cuda:0" if device == 0 else "cpu"
44
+ print(f"Attempting to use device: {device_name} for ASR.")
45
+ asr_pipeline = pipeline(
46
+ "automatic-speech-recognition",
47
+ model="openai/whisper-base",
48
+ # Use float16 only if CUDA is definitely available and working
49
+ torch_dtype=torch.float16 if device == 0 else torch.float32,
50
+ device=device # Pass device index or -1
51
+ )
52
+ print("✅ ASR (Whisper) pipeline loaded successfully.")
53
+ except Exception as e:
54
+ print(f"⚠️ Warning: Could not load ASR pipeline globally. Audio tool will not work. Error: {e}")
55
+ import traceback
56
+ traceback.print_exc() # Print full traceback for ASR load error
57
+ asr_pipeline = None
58
+ # ====================================================
59
+
60
+ # --- Tool Definitions (Standalone Functions) ---
61
+
62
+ @tool
63
+ def search_tool(query: str) -> str:
64
+ """Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
65
+ print(f"--- Calling Search Tool with query: {query} ---")
66
+ try:
67
+ search = DuckDuckGoSearchRun()
68
+ return search.run(query)
69
+ except Exception as e:
70
+ return f"Error running search: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ @tool
73
+ def code_interpreter(code: str) -> str:
74
+ """
75
+ Executes a string of Python code and returns its stdout, stderr, and any error.
76
+ Use this for calculations, data manipulation (including pandas on dataframes read from files), list operations, string manipulations, or any other Python operation.
77
+ The code runs in a sandboxed environment. 'pandas' (as pd) and 'openpyxl' are available.
78
+ Ensure the code is complete and executable. If printing, use print().
79
+ """
80
+ print(f"--- Calling Code Interpreter with code:\n{code}\n---")
81
+ output_stream = io.StringIO()
82
+ error_stream = io.StringIO()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ try:
85
+ # Use contextlib to redirect stdout and stderr
86
+ with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
87
+ # Execute the code. Provide 'pd' (pandas) in the globals
88
+ exec(code, {"pd": pd}, {})
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ stdout = output_stream.getvalue()
91
+ stderr = error_stream.getvalue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ if stderr:
94
+ return f"Error: {stderr}\nStdout: {stdout}"
95
+ if stdout:
96
+ return f"Success:\n{stdout}"
97
+ return "Success: Code executed without error and produced no stdout."
98
 
99
+ except Exception as e:
100
+ # Capture any exception during exec
101
+ return f"Execution failed with error: {str(e)}"
102
 
103
+ @tool
104
+ def read_file(path: str) -> str:
105
+ """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
106
+ print(f"--- Calling Read File Tool at path: {path} ---")
107
+ try:
108
+ # Use getcwd() as the primary base for relative paths in Spaces
109
+ script_dir = os.getcwd() # Changed from __file__ for broader compatibility
110
+ print(f"Base directory for reading: {script_dir}")
111
+
112
+ full_path = os.path.join(script_dir, path)
113
+ print(f"Attempting to read relative path: {full_path}")
114
+ if not os.path.exists(full_path):
115
+ # If not found, try the direct path (might be absolute)
116
+ full_path = path
117
+ print(f"Attempting to read direct/absolute path: {full_path}")
118
+ if not os.path.exists(full_path):
119
+ # Try basename in CWD as last resort (GAIA might just give filename)
120
+ base_path = os.path.basename(path)
121
+ cwd_base_path = os.path.join(os.getcwd(), base_path)
122
+ print(f"Attempting to read basename path in CWD: {cwd_base_path}")
123
+ if os.path.exists(cwd_base_path):
124
+ full_path = cwd_base_path
125
+ else:
126
+ # List files for debugging
127
+ try: cwd_files = os.listdir(".")
128
+ except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
129
+ return (f"Error: File not found.\n"
130
+ f"Tried relative: '{os.path.join(script_dir, path)}'\n"
131
+ f"Tried direct/absolute: '{path}'\n"
132
+ f"Tried basename in CWD: '{cwd_base_path}'\n"
133
+ f"Files in CWD (.): {cwd_files}")
134
+
135
+ print(f"Reading file: {full_path}")
136
+ with open(full_path, 'r', encoding='utf-8') as f:
137
+ return f.read()
138
+ except Exception as e:
139
+ return f"Error reading file {path}: {str(e)}"
140
 
141
+ @tool
142
+ def write_file(path: str, content: str) -> str:
143
+ """Writes the given content to a file at the specified path relative to the app's current directory. Creates directories if they don't exist."""
144
+ print(f"--- Calling Write File Tool at path: {path} ---")
145
+ try:
146
+ # Ensure the directory exists relative to CWD
147
+ base_dir = os.getcwd()
148
+ full_path = os.path.join(base_dir, path)
149
+ print(f"Writing file to: {full_path}")
150
+ os.makedirs(os.path.dirname(full_path), exist_ok=True)
151
+
152
+ with open(full_path, 'w', encoding='utf-8') as f:
153
+ f.write(content)
154
+ return f"Successfully wrote to file {path} (relative to CWD)."
155
+ except Exception as e:
156
+ return f"Error writing to file {path}: {str(e)}"
157
 
158
+ @tool
159
+ def list_directory(path: str = ".") -> str:
160
+ """Lists the contents (files and directories) of a directory at the specified path relative to the app's current directory."""
161
+ print(f"--- Calling List Directory Tool at path: {path} ---")
162
+ try:
163
+ base_dir = os.getcwd()
164
+ full_path = os.path.join(base_dir, path)
165
+ print(f"Listing directory: {full_path}")
166
+ if not os.path.isdir(full_path):
167
+ return f"Error: '{path}' is not a valid directory relative to CWD."
168
+
169
+ files = os.listdir(full_path)
170
+ return "\n".join(files) if files else "Directory is empty."
171
+ except Exception as e:
172
+ return f"Error listing directory {path}: {str(e)}"
173
 
174
+ @tool
175
+ def audio_transcription_tool(file_path: str) -> str:
176
+ """
177
+ Transcribes an audio file (like .mp3 or .wav) using Whisper and returns the text content.
178
+ Use this for questions involving audio file analysis.
179
+ """
180
+ print(f"--- Calling Audio Transcription Tool at path: {file_path} ---")
181
+ # Access the globally loaded pipeline
182
+ if asr_pipeline is None:
183
+ return "Error: Audio transcription pipeline is not available or failed to load."
184
+ try:
185
+ # Use the same path resolution logic as read_file
186
+ script_dir = os.getcwd() # Base directory
187
+ full_path = os.path.join(script_dir, file_path)
188
+ print(f"Attempting to transcribe relative path: {full_path}")
189
+ if not os.path.exists(full_path):
190
+ full_path = file_path # Try direct/absolute
191
+ print(f"Attempting to transcribe direct/absolute path: {full_path}")
192
+ if not os.path.exists(full_path):
193
+ base_path = os.path.basename(file_path)
194
+ cwd_base_path = os.path.join(os.getcwd(), base_path)
195
+ print(f"Attempting to transcribe basename path in CWD: {cwd_base_path}")
196
+ if os.path.exists(cwd_base_path):
197
+ full_path = cwd_base_path
198
+ else:
199
+ try: cwd_files = os.listdir(".")
200
+ except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
201
+ return (f"Error: Audio file not found.\n"
202
+ f"Tried relative: '{os.path.join(script_dir, file_path)}'\n"
203
+ f"Tried direct/absolute: '{file_path}'\n"
204
+ f"Tried basename in CWD: '{cwd_base_path}'\n"
205
+ f"Files in CWD (.): {cwd_files}")
206
+
207
+ print(f"Transcribing file: {full_path}")
208
+ transcription = asr_pipeline(full_path)
209
+ print("--- Transcription Complete ---")
210
+ return transcription.get("text", "Error: Transcription failed.")
211
+ except Exception as e:
212
+ import traceback; traceback.print_exc()
213
+ return f"Error during audio transcription: {str(e)}"
214
 
215
+ @tool
216
+ def get_youtube_transcript(video_url: str) -> str:
217
+ """
218
+ Fetches the transcript for a given YouTube video URL. Use this for questions about YouTube video content.
219
+ """
220
+ print(f"--- Calling YouTube Transcript Tool for URL: {video_url} ---")
221
+ try:
222
+ video_id = None
223
+ if "watch?v=" in video_url: video_id = video_url.split("v=")[1].split("&")[0]
224
+ elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[1].split("?")[0]
225
+ if not video_id: return f"Error: Could not extract video ID from URL: {video_url}"
226
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
227
+ full_transcript = " ".join([item["text"] for item in transcript_list])
228
+ print("--- Transcript Fetched ---")
229
+ return full_transcript[:8000] # Limit context
230
+ except Exception as e: return f"Error fetching YouTube transcript: {str(e)}"
231
+
232
+ @tool
233
+ def scrape_web_page(url: str) -> str:
234
+ """
235
+ Fetches the primary text content of a given web page URL, removing navigation, footer, scripts, and styles.
236
+ Use this when you need the full content of a webpage found via search.
237
+ """
238
+ print(f"--- Calling Web Scraper Tool for URL: {url} ---")
239
+ try:
240
+ headers = {'User-Agent': 'Mozilla/5.0'}
241
+ response = requests.get(url, headers=headers, timeout=15)
242
+ response.raise_for_status()
243
+ if 'html' not in response.headers.get('Content-Type', '').lower(): return f"Error: URL {url} did not return HTML."
244
+ soup = BeautifulSoup(response.text, 'html.parser')
245
+ for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]): tag.extract()
246
+ main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body or soup
247
+ text = main_content.get_text(separator='\n', strip=True)
248
+ lines = (line.strip() for line in text.splitlines())
249
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
250
+ text = '\n'.join(chunk for chunk in chunks if chunk)
251
+ print("--- Web Page Scraped ---")
252
+ return text[:8000] # Limit context
253
+ except requests.exceptions.RequestException as e: return f"Error fetching web page {url}: {str(e)}"
254
+ except Exception as e: return f"Error scraping web page {url}: {str(e)}"
255
+
256
+ # --- End of Tool Definitions ---
257
+
258
+ # List of standalone tool functions
259
+ defined_tools = [
260
+ search_tool,
261
+ code_interpreter,
262
+ read_file,
263
+ write_file,
264
+ list_directory,
265
+ audio_transcription_tool,
266
+ get_youtube_transcript,
267
+ scrape_web_page
268
+ ]
269
 
270
+ # --- LangGraph Agent State ---
271
+ class AgentState(TypedDict):
272
+ messages: Annotated[list[AnyMessage], add_messages]
 
273
 
 
274
 
275
+ # --- Basic Agent Definition ---
276
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
277
+ class BasicAgent:
278
 
279
  def __init__(self):
280
  print("BasicAgent (LangGraph) initializing...")
281
 
282
+ # 1. Get API Tokens from Space Secrets
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
284
  if not GROQ_API_KEY:
285
  raise ValueError("GROQ_API_KEY secret is not set! Please add it to your Space secrets.")
286
+ # Keep HF Token check just in case
287
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
288
+ if not HUGGINGFACEHUB_API_TOKEN:
289
+ print("⚠️ Warning: HUGGINGFACEHUB_API_TOKEN secret not set.")
290
 
291
+ # 2. Use the globally defined tools list
292
+ self.tools = defined_tools # Use the list of functions
293
+
294
+ # 3. Define the Improved System Prompt
 
 
 
 
 
 
 
 
 
295
  tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in self.tools])
296
+ # Use placeholders for JSON markers
297
  self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
298
  Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question, exactly matching the format required by the benchmark (e.g., a name, a number, a specific string format, a comma-separated list).
299
 
 
328
  7. Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
329
  """
330
 
331
+ # 4. Initialize the LLM (Using Groq and Llama 3.1 8B Instant)
 
332
  print("Initializing Groq LLM Endpoint...")
333
  try:
 
334
  chat_llm = ChatGroq(
335
  temperature=0.01,
336
  groq_api_key=GROQ_API_KEY,
337
+ model_name="llama-3.1-8b-instant" # Use Llama 3.1 8B Instant
338
  )
339
+ print("✅ Groq LLM Endpoint initialized with llama-3.1-8b-instant.")
340
  except Exception as e:
341
+ print(f"Error initializing Groq LLM: {e}"); raise
 
342
  # ===========================================================
343
 
344
+ # 5. Bind tools to the LLM
345
+ # bind_tools should work correctly with ChatGroq and standalone functions
346
  self.llm_with_tools = chat_llm.bind_tools(self.tools)
347
+ print("✅ Tools bound to LLM (using bind_tools).")
348
 
349
+ # 6. Define the Agent Node (Simplified)
 
350
  def agent_node(state: AgentState):
351
  print("--- Running Agent Node ---")
352
+ ai_message: AIMessage = self.llm_with_tools.invoke(state["messages"])
 
 
 
353
  print(f"AI Message Raw Content: {ai_message.content}")
354
+ if ai_message.tool_calls: print(f"AI Message tool calls via bind_tools: {ai_message.tool_calls}")
355
+ elif ai_message.invalid_tool_calls: print(f"AI Message INVALID tool calls via bind_tools: {ai_message.invalid_tool_calls}")
356
+ else: print(f"AI Message content (no calls): {ai_message.pretty_repr()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  return {"messages": [ai_message]}
358
  # =======================================================
359
 
360
+ # 7. Define the Tool Node
361
+ # Pass the list of standalone functions
362
  tool_node = ToolNode(self.tools)
363
 
364
+ # 8. Create the Graph
365
  print("Building agent graph...")
366
  graph_builder = StateGraph(AgentState)
367
  graph_builder.add_node("agent", agent_node)
368
  graph_builder.add_node("tools", tool_node)
369
  graph_builder.add_edge(START, "agent")
370
+ graph_builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", "__end__": "__end__"})
 
 
 
 
 
 
 
371
  graph_builder.add_edge("tools", "agent")
372
 
373
+ # 9. Compile the graph and store it
374
  self.graph = graph_builder.compile()
375
  print("✅ Graph compiled successfully.")
376
 
377
+ # >>>>> __call__ METHOD REMOVED FROM HERE <<<<<
378
+
379
+
380
  def __call__(self, question: str) -> str:
381
  print(f"\n--- Starting Agent Run for Question ---")
382
  print(f"Agent received question (first 100 chars): {question[:100]}...")
 
463
  traceback.print_exc()
464
  return f"AGENT GRAPH ERROR: {e}"
465
 
466
+ # --- (Original Template Code Starts Here - Modified for Mock Questions) ---
 
467
 
468
  def run_and_submit_all( profile: gr.OAuthProfile | None):
469
  """
 
475
  username= f"{profile.username}"
476
  print(f"User logged in: {username}")
477
  else:
478
+ username = "local_test_user"; print("User not logged in, using dummy username.")
 
 
 
479
 
480
  api_url = DEFAULT_API_URL
481
+ submit_url = f"{api_url}/submit"
 
482
 
483
  print("Instantiating agent...")
484
  try:
485
+ agent = BasicAgent() # Assumes BasicAgent class is defined above
486
+ # Check global asr_pipeline status
487
+ if asr_pipeline is None:
488
+ print("⚠️ ASR Pipeline might not have loaded correctly globally. Audio questions could fail.")
 
489
 
490
+ except Exception as e: print(f"Error instantiating agent: {e}"); import traceback; traceback.print_exc(); return f"Error initializing agent: {e}", None
 
 
 
491
  print("Agent instantiated successfully.")
492
 
493
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
 
496
  # ==================== MOCK QUESTIONS SECTION ====================
497
  print("--- USING MOCK QUESTIONS FOR TESTING ---")
498
  mock_questions_data = [
499
+ {"task_id": "mock_task_search_001", "question": "What is the capital of France?"},
500
+ {"task_id": "mock_task_code_001", "question": "Calculate 15 factorial using python. Only output the final number."},
501
+ # {"task_id": "mock_task_audio_001", "question": "Transcribe the audio file 'sample.mp3'", "file_path": "sample.mp3"}, # Make sure sample.mp3 exists
 
 
 
 
 
 
 
 
 
 
 
502
  ]
503
  questions_data = mock_questions_data
504
  print(f"Using {len(questions_data)} mock questions.")
 
509
  total_questions = len(questions_data)
510
  print(f"Running agent on {total_questions} mock questions...")
511
 
512
+ questions_to_run = questions_data
513
 
514
  for i, item in enumerate(questions_to_run):
515
  task_id = item.get("task_id")
516
  question_text = item.get("question")
517
+ if not task_id or question_text is None: print(f"Skipping item {i+1}: missing ID or question."); continue
518
 
519
  print(f"\n--- Running Mock Task {i+1}/{len(questions_to_run)} (ID: {task_id}) ---")
520
  try:
521
  file_path = item.get("file_path")
522
  question_text_with_context = question_text
523
  if file_path:
 
524
  try: script_dir = os.path.dirname(os.path.realpath(__file__))
525
  except NameError: script_dir = os.getcwd()
526
  potential_script_path = os.path.join(script_dir, file_path)
527
+ potential_cwd_path = os.path.join(os.getcwd(), file_path)
528
+ file_context = f"[Attached File (path provided): {file_path}]"
529
  if os.path.exists(potential_script_path): file_context = f"[Attached File (exists): {file_path}]"
530
  elif os.path.exists(potential_cwd_path): file_context = f"[Attached File (exists in cwd): {file_path}]"
531
+ else: file_context = f"[Attached File (path provided, NOT FOUND): {file_path}]"
 
532
  question_text_with_context = f"{question_text}\n\n{file_context}"
533
  print(f"Question includes file reference: {file_path}")
534
 
535
+ # >>>>> This line will now cause an error because agent has no __call__ method <<<<<
536
  submitted_answer = agent(question_text_with_context)
537
+ # <<<<< ERROR HERE <<<<<
538
+
539
  submitted_answer_str = str(submitted_answer) if submitted_answer is not None else ""
540
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer_str})
541
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer_str})
542
  print(f"--- Mock Task {task_id} Complete ---")
543
  except Exception as e:
544
+ # If the error is because __call__ is missing, catch it specifically
545
+ if isinstance(e, TypeError) and "'BasicAgent' object is not callable" in str(e):
546
+ print(f"ERROR: agent object is not callable because __call__ method was removed.")
547
+ submitted_answer = "ERROR: Agent has no __call__ method"
548
+ else:
549
+ print(f"FATAL ERROR running agent graph on mock task {task_id}: {e}")
550
+ import traceback; traceback.print_exc()
551
+ submitted_answer = f"AGENT CRASH ERROR: {e}"
552
+
553
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
554
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
555
 
556
+ if not answers_payload: print("Agent did not produce answers."); return "Agent did not produce answers.", pd.DataFrame(results_log)
557
 
 
558
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
559
+ status_update = f"Agent finished mock questions. Prepared {len(answers_payload)} answers for '{username}'."
560
  print(status_update)
561
  print("--- MOCK RUN COMPLETE - SUBMISSION SKIPPED ---")
562
 
563
+ final_status = "--- MOCK RUN COMPLETE ---\n" + status_update + "\nSubmission SKIPPED."
 
564
  results_df = pd.DataFrame(results_log)
 
565
  results_df['Correct'] = 'N/A (Mock)'
566
 
567
  return final_status, results_df
568
 
569
+
570
  # --- Build Gradio Interface using Blocks ---
571
  with gr.Blocks() as demo:
572
+ gr.Markdown("# GAIA Agent Evaluation Runner (LangGraph + Groq Llama3.1 - MOCK TEST)") # Updated title
573
+ gr.Markdown( """
 
574
  **Instructions:**
575
+ 1. Login is optional for this MOCK test run.
576
+ 2. Click 'Run Mock Evaluation' to run the agent on a few hardcoded questions.
577
  ---
578
  **Notes:**
579
+ * This uses Groq for LLM inference (Llama 3.1 8B Instant). Ensure `GROQ_API_KEY` is set as an environment variable or Space secret.
580
+ * This version **DOES NOT** fetch questions from the official server and **DOES NOT** submit results. It only runs locally on mock questions to test the agent's logic.
581
+ * Check the terminal/logs to see tool calls and agent reasoning.
582
+ """)
 
583
  gr.LoginButton()
584
+ run_button = gr.Button("Run Mock Evaluation") # Changed button text
585
+ status_output = gr.Textbox(label="Run Status / Mock Result", lines=5, interactive=False) # Changed label
586
+ results_table = gr.DataFrame(label="Mock Questions, Agent Answers, and Results", wrap=True) # Changed label
587
 
588
  run_button.click(
589
  fn=run_and_submit_all,
 
592
 
593
  if __name__ == "__main__":
594
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
 
595
  space_host_startup = os.getenv("SPACE_HOST")
596
  space_id_startup = os.getenv("SPACE_ID")
597
+ if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n Runtime URL: https://{space_host_startup}.hf.space")
598
+ else: print("ℹ️ SPACE_HOST env var not found (likely local run).")
599
+ if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n Repo URL: https://huggingface.co/spaces/{space_id_startup}\n Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
600
+ else: print("ℹ️ SPACE_ID env var not found (likely local run).")
601
+ try: script_dir = os.path.dirname(os.path.realpath(__file__))
602
+ except NameError: script_dir = os.getcwd()
603
+ print(f"Script directory: {script_dir}")
604
+ print(f"CWD: {os.getcwd()}")
605
+ try: print("Files in CWD:", os.listdir("."))
606
+ except FileNotFoundError: print("Warning: CWD listing failed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  print("-"*(60 + len(" App Starting ")) + "\n")
608
+ print("Launching Gradio Interface for GAIA Agent Mock Evaluation...")
 
609
  demo.queue().launch(debug=True, share=False)
610
+