gabejavitt commited on
Commit
c72322b
·
verified ·
1 Parent(s): afbd919

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -250
app.py CHANGED
@@ -22,8 +22,8 @@ from langgraph.graph.message import add_messages
22
  # Make sure to import ToolCall
23
  from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage, ToolCall
24
  from langgraph.prebuilt import ToolNode
25
- from langgraph.graph import START, StateGraph
26
- from langgraph.prebuilt import tools_condition
27
  from langchain_community.tools import DuckDuckGoSearchRun
28
  from langchain_core.tools import tool, BaseTool
29
  # --- ADD GROQ IMPORT ---
@@ -34,26 +34,23 @@ from langchain_groq import ChatGroq
34
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # This URL is currently not working
35
 
36
  # --- Initialize ASR Pipeline (Moved back to Global Scope) ---
37
- # Load the model once when the app starts for efficiency
38
- asr_pipeline = None # Initialize as None first
39
  try:
40
  print("Loading ASR (Whisper) pipeline globally...")
41
- # Decide device based on availability, default to CPU if unsure
42
- device = 0 if torch.cuda.is_available() else -1 # device=0 for GPU, -1 for CPU
43
  device_name = "cuda:0" if device == 0 else "cpu"
44
  print(f"Attempting to use device: {device_name} for ASR.")
45
  asr_pipeline = pipeline(
46
  "automatic-speech-recognition",
47
  model="openai/whisper-base",
48
- # Use float16 only if CUDA is definitely available and working
49
  torch_dtype=torch.float16 if device == 0 else torch.float32,
50
- device=device # Pass device index or -1
51
  )
52
  print("✅ ASR (Whisper) pipeline loaded successfully.")
53
  except Exception as e:
54
- print(f"⚠️ Warning: Could not load ASR pipeline globally. Audio tool will not work. Error: {e}")
55
  import traceback
56
- traceback.print_exc() # Print full traceback for ASR load error
57
  asr_pipeline = None
58
  # ====================================================
59
 
@@ -81,50 +78,33 @@ def code_interpreter(code: str) -> str:
81
  print(f"--- Calling Code Interpreter with code:\n{code}\n---")
82
  output_stream = io.StringIO()
83
  error_stream = io.StringIO()
84
-
85
  try:
86
- # Use contextlib to redirect stdout and stderr
87
  with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
88
- # Execute the code. Provide 'pd' (pandas) in the globals
89
  exec(code, {"pd": pd}, {})
90
-
91
- stdout = output_stream.getvalue()
92
- stderr = error_stream.getvalue()
93
-
94
- if stderr:
95
- return f"Error: {stderr}\nStdout: {stdout}"
96
- if stdout:
97
- return f"Success:\n{stdout}"
98
  return "Success: Code executed without error and produced no stdout."
99
-
100
- except Exception as e:
101
- # Capture any exception during exec
102
- return f"Execution failed with error: {str(e)}"
103
 
104
  @tool
105
  def read_file(path: str) -> str:
106
  """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
107
  print(f"--- Calling Read File Tool at path: {path} ---")
108
  try:
109
- # Use getcwd() as the primary base for relative paths in Spaces
110
- script_dir = os.getcwd() # Changed from __file__ for broader compatibility
111
  print(f"Base directory for reading: {script_dir}")
112
-
113
  full_path = os.path.join(script_dir, path)
114
  print(f"Attempting to read relative path: {full_path}")
115
  if not os.path.exists(full_path):
116
- # If not found, try the direct path (might be absolute)
117
  full_path = path
118
  print(f"Attempting to read direct/absolute path: {full_path}")
119
  if not os.path.exists(full_path):
120
- # Try basename in CWD as last resort (GAIA might just give filename)
121
  base_path = os.path.basename(path)
122
  cwd_base_path = os.path.join(os.getcwd(), base_path)
123
  print(f"Attempting to read basename path in CWD: {cwd_base_path}")
124
- if os.path.exists(cwd_base_path):
125
- full_path = cwd_base_path
126
  else:
127
- # List files for debugging
128
  try: cwd_files = os.listdir(".")
129
  except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
130
  return (f"Error: File not found.\n"
@@ -132,29 +112,22 @@ def read_file(path: str) -> str:
132
  f"Tried direct/absolute: '{path}'\n"
133
  f"Tried basename in CWD: '{cwd_base_path}'\n"
134
  f"Files in CWD (.): {cwd_files}")
135
-
136
  print(f"Reading file: {full_path}")
137
- with open(full_path, 'r', encoding='utf-8') as f:
138
- return f.read()
139
- except Exception as e:
140
- return f"Error reading file {path}: {str(e)}"
141
 
142
  @tool
143
  def write_file(path: str, content: str) -> str:
144
  """Writes the given content to a file at the specified path relative to the app's current directory. Creates directories if they don't exist."""
145
  print(f"--- Calling Write File Tool at path: {path} ---")
146
  try:
147
- # Ensure the directory exists relative to CWD
148
  base_dir = os.getcwd()
149
  full_path = os.path.join(base_dir, path)
150
  print(f"Writing file to: {full_path}")
151
  os.makedirs(os.path.dirname(full_path), exist_ok=True)
152
-
153
- with open(full_path, 'w', encoding='utf-8') as f:
154
- f.write(content)
155
  return f"Successfully wrote to file {path} (relative to CWD)."
156
- except Exception as e:
157
- return f"Error writing to file {path}: {str(e)}"
158
 
159
  @tool
160
  def list_directory(path: str = ".") -> str:
@@ -164,115 +137,85 @@ def list_directory(path: str = ".") -> str:
164
  base_dir = os.getcwd()
165
  full_path = os.path.join(base_dir, path)
166
  print(f"Listing directory: {full_path}")
167
- if not os.path.isdir(full_path):
168
- return f"Error: '{path}' is not a valid directory relative to CWD."
169
-
170
- files = os.listdir(full_path)
171
- return "\n".join(files) if files else "Directory is empty."
172
- except Exception as e:
173
- return f"Error listing directory {path}: {str(e)}"
174
 
175
  @tool
176
  def audio_transcription_tool(file_path: str) -> str:
177
- """
178
- Transcribes an audio file (like .mp3 or .wav) using Whisper and returns the text content.
179
- Use this for questions involving audio file analysis.
180
- """
181
- print(f"--- Calling Audio Transcription Tool at path: {file_path} ---")
182
- # Access the globally loaded pipeline
183
- if asr_pipeline is None:
184
- return "Error: Audio transcription pipeline is not available or failed to load."
185
  try:
186
- # Use the same path resolution logic as read_file
187
- script_dir = os.getcwd() # Base directory
188
  full_path = os.path.join(script_dir, file_path)
189
- print(f"Attempting to transcribe relative path: {full_path}")
190
  if not os.path.exists(full_path):
191
- full_path = file_path # Try direct/absolute
192
- print(f"Attempting to transcribe direct/absolute path: {full_path}")
193
  if not os.path.exists(full_path):
194
  base_path = os.path.basename(file_path)
195
  cwd_base_path = os.path.join(os.getcwd(), base_path)
196
- print(f"Attempting to transcribe basename path in CWD: {cwd_base_path}")
197
- if os.path.exists(cwd_base_path):
198
- full_path = cwd_base_path
199
- else:
200
- try: cwd_files = os.listdir(".")
201
- except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
202
- return (f"Error: Audio file not found.\n"
203
- f"Tried relative: '{os.path.join(script_dir, file_path)}'\n"
204
- f"Tried direct/absolute: '{file_path}'\n"
205
- f"Tried basename in CWD: '{cwd_base_path}'\n"
206
- f"Files in CWD (.): {cwd_files}")
207
-
208
- print(f"Transcribing file: {full_path}")
209
  transcription = asr_pipeline(full_path)
210
- print("--- Transcription Complete ---")
211
  return transcription.get("text", "Error: Transcription failed.")
212
- except Exception as e:
213
- import traceback; traceback.print_exc()
214
- return f"Error during audio transcription: {str(e)}"
215
 
216
  @tool
217
  def get_youtube_transcript(video_url: str) -> str:
218
- """
219
- Fetches the transcript for a given YouTube video URL. Use this for questions about YouTube video content.
220
- """
221
- print(f"--- Calling YouTube Transcript Tool for URL: {video_url} ---")
222
  try:
223
  video_id = None
224
  if "watch?v=" in video_url: video_id = video_url.split("v=")[1].split("&")[0]
225
  elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[1].split("?")[0]
226
- if not video_id: return f"Error: Could not extract video ID from URL: {video_url}"
227
  transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
228
  full_transcript = " ".join([item["text"] for item in transcript_list])
229
- print("--- Transcript Fetched ---")
230
- return full_transcript[:8000] # Limit context
231
- except Exception as e: return f"Error fetching YouTube transcript: {str(e)}"
232
 
233
  @tool
234
  def scrape_web_page(url: str) -> str:
235
- """
236
- Fetches the primary text content of a given web page URL, removing navigation, footer, scripts, and styles.
237
- Use this when you need the full content of a webpage found via search.
238
- """
239
- print(f"--- Calling Web Scraper Tool for URL: {url} ---")
240
  try:
241
  headers = {'User-Agent': 'Mozilla/5.0'}
242
- response = requests.get(url, headers=headers, timeout=15)
243
- response.raise_for_status()
244
- if 'html' not in response.headers.get('Content-Type', '').lower(): return f"Error: URL {url} did not return HTML."
245
  soup = BeautifulSoup(response.text, 'html.parser')
246
  for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]): tag.extract()
247
  main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body or soup
248
  text = main_content.get_text(separator='\n', strip=True)
249
- lines = (line.strip() for line in text.splitlines())
250
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
251
  text = '\n'.join(chunk for chunk in chunks if chunk)
252
- print("--- Web Page Scraped ---")
253
- return text[:8000] # Limit context
254
- except requests.exceptions.RequestException as e: return f"Error fetching web page {url}: {str(e)}"
255
- except Exception as e: return f"Error scraping web page {url}: {str(e)}"
 
 
 
 
 
 
 
 
 
256
 
257
  # --- Helper Function for Cleaning Fences ---
258
- # +++++++++++++++++++ ADDED FUNCTION DEFINITION +++++++++++++++++++
259
  def remove_fences_simple(text):
260
  """Removes triple backtick fences and optional language identifiers."""
261
- original_text = text # Keep original for comparison
262
- text = text.strip() # Remove leading/trailing whitespace
263
  if text.startswith("```") and text.endswith("```"):
264
- text = text[3:-3].strip() # Remove the fences and any inner whitespace
265
- # Attempt to remove language identifier if present
266
  if '\n' in text:
267
  first_line, rest = text.split('\n', 1)
268
- # Simple check: is the first line short and likely a language tag?
269
  if first_line.strip().replace('_','').isalnum() and len(first_line.strip()) < 15:
270
  text = rest.strip()
271
  return text
272
- return original_text # Return original if no fences found
273
- # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
274
-
275
- # --- End of Tool Definitions ---
276
 
277
  # List of standalone tool functions
278
  defined_tools = [
@@ -283,13 +226,29 @@ defined_tools = [
283
  list_directory,
284
  audio_transcription_tool,
285
  get_youtube_transcript,
286
- scrape_web_page
 
287
  ]
288
 
289
  # --- LangGraph Agent State ---
290
  class AgentState(TypedDict):
291
  messages: Annotated[list[AnyMessage], add_messages]
292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
  # --- Basic Agent Definition ---
295
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
@@ -297,64 +256,73 @@ class BasicAgent:
297
 
298
  def __init__(self):
299
  print("BasicAgent (LangGraph) initializing...")
 
300
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
301
  if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
302
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
303
  if not HUGGINGFACEHUB_API_TOKEN: print("⚠️ Warning: HUGGINGFACEHUB_API_TOKEN secret not set.")
304
 
305
  self.tools = defined_tools
306
- tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in self.tools])
 
 
 
 
 
 
307
 
308
  # ==================== MODIFIED SYSTEM PROMPT ====================
309
  self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
310
- Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question, exactly matching the format required by the benchmark (e.g., a name, a number, a specific string format, a comma-separated list).
311
 
312
  **CRITICAL INSTRUCTIONS:**
313
- * **DO NOT** include conversational filler (e.g., "Sure, I can help...", "The answer is...", "Here is the information...").
314
- * **DO NOT** explain your reasoning or the steps you took unless the question *explicitly* asks for it.
315
- * **DO NOT** repeat the question in your final answer.
316
- * **FINAL ANSWER FORMAT:** Your final response must contain *only* the answer itself, with no extra text or formatting like markdown code blocks unless the answer itself is code.
317
 
318
  You have access to the following tools:
319
  {tool_descriptions}
320
 
321
  **TOOL USAGE PROTOCOL:**
322
- * To use a tool, you MUST respond ONLY with a single JSON object formatted exactly like this:
323
- ```json
324
  {{
325
  "tool": "tool_name",
326
  "tool_input": {{ "arg_name1": "value1", ... }}
327
  }}
328
- ```
329
  * Replace `tool_name` with the tool's name. Provide arguments in `tool_input`. Match names/types precisely.
330
- * No text before or after the JSON block.
331
 
332
  **REASONING PROCESS & STOPPING CONDITION:**
333
- 1. Analyze question for required info and format. Check for `[Attached File: ...]`.
334
- 2. Break down problem into steps.
335
- 3. Determine if tools are needed.
336
- 4. If needed, call tool using JSON format. Wait for output.
337
- 5. Analyze tool output.
338
- 6. **DECISION POINT:**
339
- a. **If the tool output directly contains the final answer in the correct format:** Your *next* response MUST be ONLY that answer. **DO NOT** call any more tools. **DO NOT** add explanations. Just output the answer.
340
- b. **If more steps or tools are needed:** Continue reasoning, potentially go back to step 4.
341
- 7. **FINAL OUTPUT:** Once the definitive answer is derived (either directly from reasoning or after processing tool results), output **ONLY** that answer and nothing else. Stop the process immediately after outputting the answer. Do not attempt to verify it with tools unless the verification itself *is* the task.
 
342
  """
343
  # =============================================================
344
 
345
- # Initialize LLM (Using Groq)
346
- print("Initializing Groq LLM...")
347
  try:
348
- chat_llm = ChatGroq(temperature=0.01, groq_api_key=GROQ_API_KEY, model_name="llama-3.1-8b-instant")
349
- print("✅ Groq LLM initialized.")
 
 
 
 
350
  except Exception as e: print(f"Error initializing Groq: {e}"); raise
351
 
352
  self.llm_with_tools = chat_llm.bind_tools(self.tools)
353
- print("✅ Tools bound to LLM.")
354
 
355
- # Define Agent Node
356
  def agent_node(state: AgentState):
357
- # ... (agent_node implementation remains the same) ...
358
  print("--- Running Agent Node ---")
359
  ai_message: AIMessage = self.llm_with_tools.invoke(state["messages"])
360
  print(f"AI Message Raw Content: {ai_message.content}")
@@ -363,39 +331,53 @@ You have access to the following tools:
363
  else: print(f"AI content (no calls): {ai_message.pretty_repr()}")
364
  return {"messages": [ai_message]}
365
 
366
-
367
  tool_node = ToolNode(self.tools)
368
 
369
- # Create Graph
370
  print("Building agent graph...")
371
  graph_builder = StateGraph(AgentState)
372
  graph_builder.add_node("agent", agent_node)
373
  graph_builder.add_node("tools", tool_node)
374
  graph_builder.add_edge(START, "agent")
375
- graph_builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", "__end__": "__end__"})
376
  graph_builder.add_edge("tools", "agent")
377
  self.graph = graph_builder.compile()
378
  print("✅ Graph compiled.")
379
 
 
380
  def __call__(self, question: str) -> str:
381
- # ... (__call__ implementation remains the same, including cleaning) ...
382
- print(f"\n--- Starting Agent Run ---")
383
- print(f"Question (100 chars): {question[:100]}...")
384
- graph_input = {"messages": [SystemMessage(content=self.system_prompt), HumanMessage(content=question)]}
385
- final_answer_content = ""
 
 
 
 
 
386
  try:
387
  for event in self.graph.stream(graph_input, stream_mode="values", config={"recursion_limit": 25}):
388
  last_message = event["messages"][-1]
389
- if isinstance(last_message, AIMessage):
390
- has_calls = bool(last_message.tool_calls or last_message.invalid_tool_calls)
391
- if not has_calls and isinstance(last_message.content, str) and last_message.content.strip():
392
- print(f"Potential Final Response: {last_message.content[:500]}...")
393
- final_answer_content = last_message.content
394
- elif not has_calls: print("AI Message no calls, empty/non-string content.")
 
 
395
  elif isinstance(last_message, ToolMessage):
396
  print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
 
 
 
 
 
 
 
 
 
397
 
398
- cleaned_answer = final_answer_content.strip()
399
  prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
400
  original_cleaned = cleaned_answer
401
  for prefix in prefixes_to_remove:
@@ -406,26 +388,25 @@ You have access to the following tools:
406
  print(f"Warning: Prefix found but not stripped: '{original_cleaned[:100]}...'")
407
 
408
  looks_like_code = any(kw in cleaned_answer for kw in ["def ", "import ", "print(", "for ", "while ", "if ", "class ", "=>", "dict(", "list["]) or cleaned_answer.count('\n') > 3 or (cleaned_answer.startswith('[') and cleaned_answer.endswith(']')) or (cleaned_answer.startswith('{') and cleaned_answer.endswith('}'))
 
409
  if not looks_like_code:
 
410
  cleaned_answer = remove_fences_simple(cleaned_answer) # Use the helper function
 
411
  if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
412
  cleaned_answer = cleaned_answer[1:-1].strip()
413
 
414
  print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
415
- if not cleaned_answer and final_answer_content:
416
- print("Warning: Cleaned answer empty, falling back to raw.")
417
- return final_answer_content.strip()
418
- return cleaned_answer if cleaned_answer else "AGENT FAILED TO PRODUCE ANSWER"
419
  except Exception as e:
420
  print(f"Error running agent graph: {e}")
421
  import traceback; traceback.print_exc()
422
  return f"AGENT GRAPH ERROR: {e}"
 
423
 
424
 
425
  # --- (Original Template Code - Mock Questions Version) ---
426
- # ... (run_and_submit_all function remains the same) ...
427
- # ... (Gradio UI remains the same) ...
428
- # ... (__main__ block remains the same) ...
429
 
430
  def run_and_submit_all( profile: gr.OAuthProfile | None):
431
  """ MOCK RUN: Runs agent on mock Qs, displays results. DOES NOT SUBMIT. """
@@ -433,100 +414,22 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
433
  username = profile.username if profile else "local_test_user"
434
  print(f"User: {username}{'' if profile else ' (dummy)'}")
435
 
436
- submit_url = f"{DEFAULT_API_URL}/submit" # Keep for context
437
 
438
  print("Instantiating agent...")
439
  try:
440
  agent = BasicAgent()
441
  if asr_pipeline is None: print("⚠️ Global ASR Pipeline failed load.")
442
  except Exception as e: print(f"Error instantiating agent: {e}"); import traceback; traceback.print_exc(); return f"Error initializing agent: {e}", None
443
- print("Agent instantiated.")
444
 
445
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
446
  print(f"Agent code URL: {agent_code}")
447
 
448
  print("--- USING MOCK QUESTIONS ---")
449
  mock_questions_data = [
450
- {
451
- "task_id": "mock_level1_001",
452
- "question": r"""Here's a fun riddle that I'd like you to try.\n\nAn adventurer exploring an ancient tomb came across a horde of gold coins, all neatly stacked in columns. As he reached to scoop them into his backpack, a mysterious voice filled the room. \"You have fallen for my trap adventurer,\" the voice began, and suddenly the doorway to the chamber was sealed by a heavy rolling disk of stone. The adventurer tried to move the stone disk but was unable to budge the heavy stone. Trapped, he was startled when the voice again spoke. \n\n\"If you solve my riddle, I will reward you with a portion of my riches, but if you are not clever, you will never leave this treasure chamber. Before you are 200 gold coins. I pose a challenge to you, adventurer. Within these stacks of coins, all but 30 are face-up. You must divide the coins into two piles, one is yours, and one is mine. You may place as many coins as you like in either pile. You may flip any coins over, but you may not balance any coins on their edges. For every face-down coin in your pile, you will be rewarded with two gold coins. But be warned, if both piles do not contain the same number of face-down coins, the door will remain sealed for all eternity!\"\n\nThe adventurer smiled, as this would be an easy task. All he had to do was flip over every coin so it was face down, and he would win the entire treasure! As he moved to the columns of coins, however, the light suddenly faded, and he was left in total darkness. The adventurer reached forward and picked up one of the coins, and was shocked when he realized that both sides felt almost the same. Without the light, he was unable to determine which side of the coin was heads and which side was tails. He carefully replaced the coin in its original orientation and tried to think of a way to solve the puzzle. Finally, out of desperation, the adventurer removed 30 coins to create his pile. He then carefully flipped over each coin in his pile, so its orientation was inverted from its original state.\n\n\"I've finished,\" he said, and the lights returned. Looking at the two piles, he noticed that the larger pile contained 14 face-down coins.\n\nWhat was the outcome for the adventurer? If he failed the challenge, please respond with \"The adventurer died.\" Otherwise, please provide the number of coins the adventurer won at the conclusion of the riddle. If the adventurer won any coins, provide your response as the number of coins, with no other text."""
453
- },
454
- {
455
- "task_id": "mock_level1_002",
456
- "question": r"""If you use some of the letters in the given Letter Bank to spell out the sentence "I am a penguin halfway to the moon", which of the remaining unused letters would have to be changed to spell out, "The moon is made of cheese"? Return a comma-separated alphabetized list.\nLetter Bank: {OAMFETIMPECRFSHTDNIWANEPNOFAAIYOOMGUTNAHHLNEHCME}"""
457
- },
458
- {
459
- "task_id": "mock_level1_003",
460
- "question": r"""A data annotator stayed up too late creating test questions to check that a system was working properly and submitted several questions with mathematical errors. On nights when they created 15 test questions, they made 1 error. On nights when they created fewer than 15 questions, they also corrected 3 errors. On nights they created 20 questions, they made 0 errors. On nights when they created 25 or more, they made 4 errors. Over the course of five nights, the worker produced a total of 6 errors. When asked how many nights they created 15 questions, they gave three possible numbers as responses. What are the three numbers, presented in the format x, y, z in ascending order?"""
461
- },
462
- {
463
- "task_id": "mock_level1_004",
464
- "question": r"""Please solve the following crossword:\n\n|1|2|3|4|5|\n|6| | | | |\n|7| | | | |\n|8| | | | |\n|X|9| | | |\n\nI have indicated by numbers where the hints start, so you should replace numbers and spaces by the answers.\nAnd X denotes a black square that isn\u2019t to fill.\n\nACROSS\n- 1 Wooden strips on a bed frame\n- 6 _ Minhaj, Peabody-winning comedian for "Patriot Act"\n- 7 Japanese city of 2.6+ million\n- 8 Stopwatch, e.g.\n- 9 Pain in the neck\n\nDOWN\n- 1 Quick drink of whiskey\n- 2 Eye procedure\n- 3 "Same here," in a three-word phrase\n- 4 Already occupied, as a seat\n- 5 Sarcastically critical commentary. Answer by concatenating the characters you choose to fill the crossword, in row-major order."""
465
- },
466
- {
467
- "task_id": "mock_level1_005",
468
- "question": r"""I wanted to make another batch of cherry melomel. I remember liking the last recipe I tried, but I can't remember it off the top of my head. It was from the Reddit, r/mead. I remember that the user who made it had a really distinct name, I think it was StormBeforeDawn. Could you please look up the recipe for me? I'm not sure if it has been changed, so please make sure that the recipe you review wasn't updated after July 14, 2022. That's the last time I tried the recipe.\n\nWhat I want to know is how many cherries I'm supposed to use. I'm making a 10-gallon batch in two 5-gallon carboys. Please just respond with the integer number of pounds of whole cherries with pits that are supposed to be used for a 10-gallon batch."""
469
- },
470
- {
471
- "task_id": "mock_level1_006",
472
- "question": r"""Verify each of the following ISBN 13 numbers:\n\n1. 9783518188156\n2. 9788476540746\n3. 9788415091004\n4. 9788256014590\n5. 9782046407331\n\nIf any are invalid, correct them by changing the final digit. Then, return the list, comma separated, in the same order as in the question."""
473
- },
474
- {
475
- "task_id": "mock_level1_007",
476
- "question": r"""A porterhouse by any other name is centered around a letter. What does Three Dog Night think about the first natural number that starts with that letter? Give the first line from the lyrics that references it."""
477
- },
478
- {
479
- "task_id": "mock_level1_008",
480
- "question": r"""Bob has genome type Aa, and Linda has genome type Aa. Assuming that a child of theirs also has a child with someone who also has genome type Aa, what is the probability that Bob and Linda's grandchild will have Genome type Aa? Write the answer as a percentage, rounding to the nearest integer if necessary."""
481
- },
482
- {
483
- "task_id": "mock_level1_009",
484
- "question": r"""An array of candy is set out to choose from including gumballs, candy corn, gumdrops, banana taffy, chocolate chips, and gummy bears. There is one bag of each type of candy. The gumballs come in red, orange, yellow, green, blue, and brown. The candy corn is yellow, white, and orange. The gumdrops are red, green, purple, yellow, and orange. The banana taffy is yellow. The chocolate chips are brown and white. The gummy bears are red, green, yellow, and orange. Five people pass through and each selects one bag. The first selects one with only primary colors. The second selects one with no primary colors. The third selects one with all the primary colors. The fourth selects one that has neither the most nor the least colors of the remaining bags. The fifth selects the one with their favorite color, green. A second bag of the candy the first person chose is added to the remaining bag of candy. Which two candies are in the remaining bag after the addition? Give me them in a comma separated list, in alphabetical order"""
485
- },
486
- {
487
- "task_id": "mock_level1_010",
488
- "question": r"""In the year 2020, where were koi fish found in the watershed with the id 02040203? Give only the name of the pond, lake, or stream where the fish were found, and not the name of the city or county."""
489
- },
490
- {
491
- "task_id": "mock_level1_011",
492
- "question": r"""In Sonia Sanchez\u2019s poem \u201cfather\u2019s voice\u201d, what primary colour is evoked by the imagery in the beginning of the tenth stanza? Answer with a capitalized word."""
493
- },
494
- {
495
- "task_id": "mock_level1_012",
496
- "question": r"""According to Papers with Code, what was the name of the first model to go beyond 70% of accuracy on ImageNet ?"""
497
- },
498
- {
499
- "task_id": "mock_level1_013",
500
- "question": r"""What is the dimension of the boundary of the tame twindragon rounded to two decimal places?"""
501
- },
502
- {
503
- "task_id": "mock_level1_014",
504
- "question": r"""In what year was the home village of the subject of British Museum item #Bb,11.118 founded?"""
505
- },
506
- {
507
- "task_id": "mock_level1_015",
508
- "question": r"""What is the ISSN of the journal that included G. Scott's potato article that mentioned both a fast food restaurant and a Chinese politician in the title in a 2012 issue?"""
509
- },
510
- {
511
- "task_id": "mock_level1_016",
512
- "question": r"""VNV Nation has a song that shares its title with the nickname of Louis XV. What album was it released with?"""
513
- },
514
- {
515
- "task_id": "mock_level1_017",
516
- "question": r"""If I combine a Beatle's first name and a type of beer, in what category and year of Nobel Prize do I have a winner? Answer using the format CATEGORY, YEAR."""
517
- },
518
- {
519
- "task_id": "mock_level1_018",
520
- "question": r"""In the version of NumPy where the numpy.msort function was deprecated, which attribute was added to the numpy.polynomial package's polynomial classes?"""
521
- },
522
- {
523
- "task_id": "mock_level1_019",
524
- "question": r"""A word meaning dramatic or theatrical forms a species of duck when appended with two letters and then duplicated. What is that word?"""
525
- },
526
- {
527
- "task_id": "mock_level1_020",
528
- "question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
529
- },
530
  # {"task_id": "mock_audio_001", "question": "Transcribe 'sample.mp3'", "file_path": "sample.mp3"}, # Needs sample.mp3
531
  ]
532
  questions_data = mock_questions_data
@@ -572,6 +475,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
572
  results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
573
  return final_status, results_df
574
 
 
575
  # --- Build Gradio Interface ---
576
  with gr.Blocks() as demo:
577
  gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
@@ -587,17 +491,18 @@ with gr.Blocks() as demo:
587
 
588
  if __name__ == "__main__":
589
  print("\n" + "-"*30 + " App Starting " + "-"*30)
590
- # Print env info...
591
- space_host = os.getenv("SPACE_HOST"); space_id = os.getenv("SPACE_ID")
592
- if space_host: print(f"✅ SPACE_HOST: {space_host}")
593
  else: print("ℹ️ No SPACE_HOST (local?).")
594
- if space_id: print(f"✅ SPACE_ID: {space_id}")
595
  else: print("ℹ️ No SPACE_ID (local?).")
 
 
 
596
  print(f"CWD: {os.getcwd()}")
597
  try: print("Files in CWD:", os.listdir("."))
598
- except Exception as e: print(f"Warning: Error listing CWD: {e}")
599
  print("-"*(60 + len(" App Starting ")) + "\n")
600
  print("Launching Gradio Interface...")
601
  demo.queue().launch(debug=True, share=False)
602
 
603
-
 
22
  # Make sure to import ToolCall
23
  from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage, ToolCall
24
  from langgraph.prebuilt import ToolNode
25
+ from langgraph.graph import START, END, StateGraph
26
+ # Removed tools_condition, we'll use a custom one
27
  from langchain_community.tools import DuckDuckGoSearchRun
28
  from langchain_core.tools import tool, BaseTool
29
  # --- ADD GROQ IMPORT ---
 
34
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # This URL is currently not working
35
 
36
  # --- Initialize ASR Pipeline (Moved back to Global Scope) ---
37
+ asr_pipeline = None
 
38
  try:
39
  print("Loading ASR (Whisper) pipeline globally...")
40
+ device = 0 if torch.cuda.is_available() else -1
 
41
  device_name = "cuda:0" if device == 0 else "cpu"
42
  print(f"Attempting to use device: {device_name} for ASR.")
43
  asr_pipeline = pipeline(
44
  "automatic-speech-recognition",
45
  model="openai/whisper-base",
 
46
  torch_dtype=torch.float16 if device == 0 else torch.float32,
47
+ device=device
48
  )
49
  print("✅ ASR (Whisper) pipeline loaded successfully.")
50
  except Exception as e:
51
+ print(f"⚠️ Warning: Could not load ASR pipeline globally. Error: {e}")
52
  import traceback
53
+ traceback.print_exc()
54
  asr_pipeline = None
55
  # ====================================================
56
 
 
78
  print(f"--- Calling Code Interpreter with code:\n{code}\n---")
79
  output_stream = io.StringIO()
80
  error_stream = io.StringIO()
 
81
  try:
 
82
  with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
 
83
  exec(code, {"pd": pd}, {})
84
+ stdout = output_stream.getvalue(); stderr = error_stream.getvalue()
85
+ if stderr: return f"Error: {stderr}\nStdout: {stdout}"
86
+ if stdout: return f"Success:\n{stdout}"
 
 
 
 
 
87
  return "Success: Code executed without error and produced no stdout."
88
+ except Exception as e: return f"Execution failed with error: {str(e)}"
 
 
 
89
 
90
  @tool
91
  def read_file(path: str) -> str:
92
  """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
93
  print(f"--- Calling Read File Tool at path: {path} ---")
94
  try:
95
+ script_dir = os.getcwd()
 
96
  print(f"Base directory for reading: {script_dir}")
 
97
  full_path = os.path.join(script_dir, path)
98
  print(f"Attempting to read relative path: {full_path}")
99
  if not os.path.exists(full_path):
 
100
  full_path = path
101
  print(f"Attempting to read direct/absolute path: {full_path}")
102
  if not os.path.exists(full_path):
 
103
  base_path = os.path.basename(path)
104
  cwd_base_path = os.path.join(os.getcwd(), base_path)
105
  print(f"Attempting to read basename path in CWD: {cwd_base_path}")
106
+ if os.path.exists(cwd_base_path): full_path = cwd_base_path
 
107
  else:
 
108
  try: cwd_files = os.listdir(".")
109
  except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
110
  return (f"Error: File not found.\n"
 
112
  f"Tried direct/absolute: '{path}'\n"
113
  f"Tried basename in CWD: '{cwd_base_path}'\n"
114
  f"Files in CWD (.): {cwd_files}")
 
115
  print(f"Reading file: {full_path}")
116
+ with open(full_path, 'r', encoding='utf-8') as f: return f.read()
117
+ except Exception as e: return f"Error reading file {path}: {str(e)}"
 
 
118
 
119
  @tool
120
  def write_file(path: str, content: str) -> str:
121
  """Writes the given content to a file at the specified path relative to the app's current directory. Creates directories if they don't exist."""
122
  print(f"--- Calling Write File Tool at path: {path} ---")
123
  try:
 
124
  base_dir = os.getcwd()
125
  full_path = os.path.join(base_dir, path)
126
  print(f"Writing file to: {full_path}")
127
  os.makedirs(os.path.dirname(full_path), exist_ok=True)
128
+ with open(full_path, 'w', encoding='utf-8') as f: f.write(content)
 
 
129
  return f"Successfully wrote to file {path} (relative to CWD)."
130
+ except Exception as e: return f"Error writing to file {path}: {str(e)}"
 
131
 
132
  @tool
133
  def list_directory(path: str = ".") -> str:
 
137
  base_dir = os.getcwd()
138
  full_path = os.path.join(base_dir, path)
139
  print(f"Listing directory: {full_path}")
140
+ if not os.path.isdir(full_path): return f"Error: '{path}' is not a valid directory."
141
+ files = os.listdir(full_path); return "\n".join(files) if files else "Directory is empty."
142
+ except Exception as e: return f"Error listing directory {path}: {str(e)}"
 
 
 
 
143
 
144
  @tool
145
  def audio_transcription_tool(file_path: str) -> str:
146
+ """Transcribes an audio file (like .mp3 or .wav) and returns the text content."""
147
+ print(f"--- Calling Audio Transcription: {file_path} ---")
148
+ if asr_pipeline is None: return "Error: ASR pipeline unavailable."
 
 
 
 
 
149
  try:
150
+ script_dir = os.getcwd()
 
151
  full_path = os.path.join(script_dir, file_path)
 
152
  if not os.path.exists(full_path):
153
+ full_path = file_path
 
154
  if not os.path.exists(full_path):
155
  base_path = os.path.basename(file_path)
156
  cwd_base_path = os.path.join(os.getcwd(), base_path)
157
+ if os.path.exists(cwd_base_path): full_path = cwd_base_path
158
+ else: return f"Error: Audio file not found."
 
 
 
 
 
 
 
 
 
 
 
159
  transcription = asr_pipeline(full_path)
 
160
  return transcription.get("text", "Error: Transcription failed.")
161
+ except Exception as e: import traceback; traceback.print_exc(); return f"Error transcribing: {e}"
 
 
162
 
163
  @tool
164
  def get_youtube_transcript(video_url: str) -> str:
165
+ """Fetches YouTube transcript."""
166
+ print(f"--- Calling YouTube Transcript: {video_url} ---")
 
 
167
  try:
168
  video_id = None
169
  if "watch?v=" in video_url: video_id = video_url.split("v=")[1].split("&")[0]
170
  elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[1].split("?")[0]
171
+ if not video_id: return f"Error: Invalid YouTube URL."
172
  transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
173
  full_transcript = " ".join([item["text"] for item in transcript_list])
174
+ return full_transcript[:8000]
175
+ except Exception as e: return f"Error getting transcript: {e}"
 
176
 
177
  @tool
178
  def scrape_web_page(url: str) -> str:
179
+ """Fetches primary text content of a webpage."""
180
+ print(f"--- Calling Web Scraper: {url} ---")
 
 
 
181
  try:
182
  headers = {'User-Agent': 'Mozilla/5.0'}
183
+ response = requests.get(url, headers=headers, timeout=15); response.raise_for_status()
184
+ if 'html' not in response.headers.get('Content-Type', '').lower(): return f"Error: Not HTML."
 
185
  soup = BeautifulSoup(response.text, 'html.parser')
186
  for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]): tag.extract()
187
  main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body or soup
188
  text = main_content.get_text(separator='\n', strip=True)
189
+ lines = (line.strip() for line in text.splitlines()); chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
 
190
  text = '\n'.join(chunk for chunk in chunks if chunk)
191
+ return text[:8000]
192
+ except Exception as e: return f"Error scraping {url}: {e}"
193
+
194
+ # +++++++++++++++++++ NEW FINAL ANSWER TOOL +++++++++++++++++++
195
+ @tool
196
+ def final_answer_tool(answer: str) -> str:
197
+ """
198
+ Call this tool *only* when you have the final, definitive answer to the user's question.
199
+ The 'answer' argument should be the single, concise, factual answer, formatted exactly as requested by the user's prompt.
200
+ """
201
+ print(f"--- AGENT CALLING FINAL ANSWER TOOL ---")
202
+ return answer
203
+ # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
204
 
205
  # --- Helper Function for Cleaning Fences ---
 
206
  def remove_fences_simple(text):
207
  """Removes triple backtick fences and optional language identifiers."""
208
+ original_text = text
209
+ text = text.strip()
210
  if text.startswith("```") and text.endswith("```"):
211
+ text = text[3:-3].strip()
 
212
  if '\n' in text:
213
  first_line, rest = text.split('\n', 1)
 
214
  if first_line.strip().replace('_','').isalnum() and len(first_line.strip()) < 15:
215
  text = rest.strip()
216
  return text
217
+ return original_text
218
+ # --- End Helper ---
 
 
219
 
220
  # List of standalone tool functions
221
  defined_tools = [
 
226
  list_directory,
227
  audio_transcription_tool,
228
  get_youtube_transcript,
229
+ scrape_web_page,
230
+ final_answer_tool # Add the new tool to the list
231
  ]
232
 
233
  # --- LangGraph Agent State ---
234
  class AgentState(TypedDict):
235
  messages: Annotated[list[AnyMessage], add_messages]
236
 
237
+ # --- Custom Conditional Edge ---
238
+ def should_continue(state: AgentState):
239
+ """Custom logic to decide whether to continue or end."""
240
+ last_message = state['messages'][-1]
241
+ if isinstance(last_message, AIMessage):
242
+ if last_message.tool_calls:
243
+ if last_message.tool_calls[0].get("name") == "final_answer_tool":
244
+ print("--- Condition: Saw final_answer_tool, ending graph. ---")
245
+ return END
246
+ else:
247
+ print("--- Condition: Saw other tools, calling tools node. ---")
248
+ return "tools"
249
+ # This path should ideally not be taken if the prompt is followed
250
+ print("--- Condition: No tool call detected, ending graph (forcing agent to use final_answer_tool). ---")
251
+ return END
252
 
253
  # --- Basic Agent Definition ---
254
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
256
 
257
  def __init__(self):
258
  print("BasicAgent (LangGraph) initializing...")
259
+
260
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
261
  if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
262
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
263
  if not HUGGINGFACEHUB_API_TOKEN: print("⚠️ Warning: HUGGINGFACEHUB_API_TOKEN secret not set.")
264
 
265
  self.tools = defined_tools
266
+ tool_descriptions = "\n".join([
267
+ f"- {tool.name}: {tool.description}" if tool.name != 'code_interpreter' else
268
+ (f"- {tool.name}: Executes Python code. Use for calculations, data manipulation, or logic puzzles. "
269
+ "**When solving logic puzzles, write out your reasoning steps as comments in the code.** "
270
+ "'pandas' (as pd) is available.")
271
+ for tool in self.tools
272
+ ])
273
 
274
  # ==================== MODIFIED SYSTEM PROMPT ====================
275
  self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
276
+ Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question.
277
 
278
  **CRITICAL INSTRUCTIONS:**
279
+ * **DO NOT** provide the final answer as plain text.
280
+ * **THE ONLY WAY** to provide a final answer is by calling the `final_answer_tool`.
281
+ * **DO NOT** include conversational filler (e.g., "The answer is...").
282
+ * **DO NOT** explain your reasoning unless it's inside a `code_interpreter` comment.
283
 
284
  You have access to the following tools:
285
  {tool_descriptions}
286
 
287
  **TOOL USAGE PROTOCOL:**
288
+ * To call a tool, respond ONLY with a single JSON object formatted exactly like this:
289
+ [[[JSON_MARKER_START]]]
290
  {{
291
  "tool": "tool_name",
292
  "tool_input": {{ "arg_name1": "value1", ... }}
293
  }}
294
+ [[[JSON_MARKER_END]]]
295
  * Replace `tool_name` with the tool's name. Provide arguments in `tool_input`. Match names/types precisely.
296
+ * Do not add any text before or after the JSON block.
297
 
298
  **REASONING PROCESS & STOPPING CONDITION:**
299
+ 1. Analyze the question.
300
+ 2. Break down problem into steps.
301
+ 3. Determine if tools are needed.
302
+ 4. If needed, call tool using JSON format. Wait for output.
303
+ 5. Analyze tool output.
304
+ 6. **DECISION POINT:**
305
+ a. **If the tool output directly contains the final answer (or allows trivial calculation):** Your *next* response MUST be a call to `final_answer_tool` with only that answer. **DO NOT** call any more tools.
306
+ b. **If more steps or tools are needed:** Continue reasoning, go back to step 4.
307
+ c. **If a tool call results in an error:** **Do not** try the exact same tool call again. Re-evaluate, try different arguments, or a different tool. If stuck, call `final_answer_tool` with an error message.
308
+ 7. **FINAL OUTPUT:** The graph will stop *only* when you call `final_answer_tool`.
309
  """
310
  # =============================================================
311
 
312
+ print("Initializing Groq LLM Endpoint...")
 
313
  try:
314
+ chat_llm = ChatGroq(
315
+ temperature=0.01,
316
+ groq_api_key=GROQ_API_KEY,
317
+ model_name="llama-3.1-8b-instant" # Use Llama 3.1 8B Instant
318
+ )
319
+ print("✅ Groq LLM Endpoint initialized with llama-3.1-8b-instant.")
320
  except Exception as e: print(f"Error initializing Groq: {e}"); raise
321
 
322
  self.llm_with_tools = chat_llm.bind_tools(self.tools)
323
+ print("✅ Tools bound to LLM (using bind_tools).")
324
 
 
325
  def agent_node(state: AgentState):
 
326
  print("--- Running Agent Node ---")
327
  ai_message: AIMessage = self.llm_with_tools.invoke(state["messages"])
328
  print(f"AI Message Raw Content: {ai_message.content}")
 
331
  else: print(f"AI content (no calls): {ai_message.pretty_repr()}")
332
  return {"messages": [ai_message]}
333
 
 
334
  tool_node = ToolNode(self.tools)
335
 
 
336
  print("Building agent graph...")
337
  graph_builder = StateGraph(AgentState)
338
  graph_builder.add_node("agent", agent_node)
339
  graph_builder.add_node("tools", tool_node)
340
  graph_builder.add_edge(START, "agent")
341
+ graph_builder.add_conditional_edges("agent", should_continue, {"tools": "tools", END: END})
342
  graph_builder.add_edge("tools", "agent")
343
  self.graph = graph_builder.compile()
344
  print("✅ Graph compiled.")
345
 
346
+ # ++++++++++++++++++++ __call__ METHOD ++++++++++++++++++++
347
  def __call__(self, question: str) -> str:
348
+ print(f"\n--- Starting Agent Run for Question ---")
349
+ print(f"Agent received question (first 100 chars): {question[:100]}...")
350
+
351
+ graph_input = {"messages": [
352
+ SystemMessage(content=self.system_prompt),
353
+ HumanMessage(content=question)
354
+ ]}
355
+
356
+ final_answer = "AGENT FAILED TO PRODUCE ANSWER" # Default answer
357
+
358
  try:
359
  for event in self.graph.stream(graph_input, stream_mode="values", config={"recursion_limit": 25}):
360
  last_message = event["messages"][-1]
361
+
362
+ if isinstance(last_message, AIMessage) and last_message.tool_calls:
363
+ for tool_call in last_message.tool_calls:
364
+ if tool_call.get("name") == "final_answer_tool":
365
+ final_answer = tool_call['args'].get('answer', "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER")
366
+ print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
367
+ break
368
+
369
  elif isinstance(last_message, ToolMessage):
370
  print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
371
+ elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
372
+ # This might be an error or the agent failing to call final_answer_tool
373
+ print(f"AI Message (no tool call): {last_message.content[:500]}...")
374
+ # We store this in case the graph ends here, but it's not the ideal path
375
+ if isinstance(last_message.content, str) and last_message.content.strip():
376
+ final_answer = last_message.content # Fallback
377
+
378
+ # --- Cleaning step (for the final answer, wherever it came from) ---
379
+ cleaned_answer = str(final_answer).strip() # Ensure it's a string
380
 
 
381
  prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
382
  original_cleaned = cleaned_answer
383
  for prefix in prefixes_to_remove:
 
388
  print(f"Warning: Prefix found but not stripped: '{original_cleaned[:100]}...'")
389
 
390
  looks_like_code = any(kw in cleaned_answer for kw in ["def ", "import ", "print(", "for ", "while ", "if ", "class ", "=>", "dict(", "list["]) or cleaned_answer.count('\n') > 3 or (cleaned_answer.startswith('[') and cleaned_answer.endswith(']')) or (cleaned_answer.startswith('{') and cleaned_answer.endswith('}'))
391
+
392
  if not looks_like_code:
393
+ # ++++++++++++++++ USING remove_fences_simple ++++++++++++++++
394
  cleaned_answer = remove_fences_simple(cleaned_answer) # Use the helper function
395
+ # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
396
  if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
397
  cleaned_answer = cleaned_answer[1:-1].strip()
398
 
399
  print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
400
+ return cleaned_answer # Return the cleaned answer
401
+
 
 
402
  except Exception as e:
403
  print(f"Error running agent graph: {e}")
404
  import traceback; traceback.print_exc()
405
  return f"AGENT GRAPH ERROR: {e}"
406
+ # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
407
 
408
 
409
  # --- (Original Template Code - Mock Questions Version) ---
 
 
 
410
 
411
  def run_and_submit_all( profile: gr.OAuthProfile | None):
412
  """ MOCK RUN: Runs agent on mock Qs, displays results. DOES NOT SUBMIT. """
 
414
  username = profile.username if profile else "local_test_user"
415
  print(f"User: {username}{'' if profile else ' (dummy)'}")
416
 
417
+ submit_url = f"{DEFAULT_API_URL}/submit"
418
 
419
  print("Instantiating agent...")
420
  try:
421
  agent = BasicAgent()
422
  if asr_pipeline is None: print("⚠️ Global ASR Pipeline failed load.")
423
  except Exception as e: print(f"Error instantiating agent: {e}"); import traceback; traceback.print_exc(); return f"Error initializing agent: {e}", None
424
+ print("Agent instantiated successfully.")
425
 
426
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
427
  print(f"Agent code URL: {agent_code}")
428
 
429
  print("--- USING MOCK QUESTIONS ---")
430
  mock_questions_data = [
431
+ {"task_id": "mock_search_001", "question": "What is the capital of France?"},
432
+ {"task_id": "mock_code_001", "question": "Calculate 15 factorial using python. Only output the final number."},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  # {"task_id": "mock_audio_001", "question": "Transcribe 'sample.mp3'", "file_path": "sample.mp3"}, # Needs sample.mp3
434
  ]
435
  questions_data = mock_questions_data
 
475
  results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
476
  return final_status, results_df
477
 
478
+
479
  # --- Build Gradio Interface ---
480
  with gr.Blocks() as demo:
481
  gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
 
491
 
492
  if __name__ == "__main__":
493
  print("\n" + "-"*30 + " App Starting " + "-"*30)
494
+ space_host_startup = os.getenv("SPACE_HOST"); space_id_startup = os.getenv("SPACE_ID")
495
+ if space_host_startup: print(f"SPACE_HOST: {space_host_startup}\n Runtime URL: https://{space_host_startup}.hf.space")
 
496
  else: print("ℹ️ No SPACE_HOST (local?).")
497
+ if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n Repo URL: https://huggingface.co/spaces/{space_id_startup}\n Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
498
  else: print("ℹ️ No SPACE_ID (local?).")
499
+ try: script_dir = os.path.dirname(os.path.realpath(__file__))
500
+ except NameError: script_dir = os.getcwd()
501
+ print(f"Script directory: {script_dir}")
502
  print(f"CWD: {os.getcwd()}")
503
  try: print("Files in CWD:", os.listdir("."))
504
+ except FileNotFoundError: print("Warning: CWD listing failed.")
505
  print("-"*(60 + len(" App Starting ")) + "\n")
506
  print("Launching Gradio Interface...")
507
  demo.queue().launch(debug=True, share=False)
508