Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Oct 26, 2025

Commit

07b9d51

verified ·

1 Parent(s): 2358285

Update app.py

Browse files

Files changed (1) hide show

app.py +316 -488

app.py CHANGED Viewed

@@ -24,8 +24,6 @@ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMes
 from langgraph.prebuilt import ToolNode
 from langgraph.graph import START, StateGraph
 from langgraph.prebuilt import tools_condition
-# REMOVED: from langchain_huggingface import ChatHuggingFace
-# REMOVED: from langchain_huggingface import HuggingFaceEndpoint
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import tool, BaseTool
 # --- ADD GROQ IMPORT ---
@@ -33,305 +31,269 @@ from langchain_groq import ChatGroq
 # (Keep Constants as is)
 # --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- LangGraph Agent State ---
-class AgentState(TypedDict):
-    messages: Annotated[list[AnyMessage], add_messages]
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    # --- Tool Definitions as Methods ---
-    # By making tools methods, they can access self.asr_pipeline
-    @tool
-    def search_tool(self, query: str) -> str:
-        """Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
-        print(f"--- Calling Search Tool with query: {query} ---")
-        try:
-            search = DuckDuckGoSearchRun()
-            return search.run(query)
-        except Exception as e:
-            return f"Error running search: {e}"
-    @tool
-    def code_interpreter(self, code: str) -> str:
-        """
-        Executes a string of Python code and returns its stdout, stderr, and any error.
-        Use this for calculations, data manipulation (including pandas on dataframes read from files), list operations, string manipulations, or any other Python operation.
-        The code runs in a sandboxed environment. 'pandas' (as pd) and 'openpyxl' are available.
-        Ensure the code is complete and executable. If printing, use print().
-        """
-        print(f"--- Calling Code Interpreter with code:\n{code}\n---")
-        output_stream = io.StringIO()
-        error_stream = io.StringIO()
-        try:
-            # Use contextlib to redirect stdout and stderr
-            with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
-                # Execute the code. Provide 'pd' (pandas) in the globals
-                exec(code, {"pd": pd}, {})
-            stdout = output_stream.getvalue()
-            stderr = error_stream.getvalue()
-            if stderr:
-                return f"Error: {stderr}\nStdout: {stdout}"
-            if stdout:
-                 return f"Success:\n{stdout}"
-            return "Success: Code executed without error and produced no stdout."
-        except Exception as e:
-            # Capture any exception during exec
-            return f"Execution failed with error: {str(e)}"
-    @tool
-    def read_file(self, path: str) -> str:
-        """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
-        print(f"--- Calling Read File Tool at path: {path} ---")
-        try:
-            # Try finding the file relative to the app directory first
-            # Use os.path.dirname(os.path.realpath(__file__)) for robustness in different execution contexts
-            script_dir = os.path.dirname(os.path.realpath(__file__))
-            full_path = os.path.join(script_dir, path)
-            print(f"Attempting to read relative path: {full_path}")
-            if not os.path.exists(full_path):
-                 # If not found, try the direct path (might be absolute or relative to cwd)
-                 full_path = path
-                 print(f"Attempting to read direct path: {full_path}")
-                 if not os.path.exists(full_path):
-                     # Try basename for GAIA questions providing just the filename
-                     base_path = os.path.basename(path)
-                     print(f"Attempting to read basename path in cwd: {os.path.join(os.getcwd(), base_path)}")
-                     if os.path.exists(base_path): # Check relative to CWD
-                          full_path = base_path
-                     else:
-                         # List files in current and script directory for debugging
-                         try:
-                             cwd_files = os.listdir(".")
-                         except Exception:
-                             cwd_files = ["Error listing CWD"]
-                         try:
-                             script_dir_files = os.listdir(script_dir)
-                         except Exception:
-                             script_dir_files = ["Error listing script dir"]
-                         return (f"Error: File not found.\n"
-                                 f"Tried relative path: '{os.path.join(script_dir, path)}'\n"
-                                 f"Tried direct path: '{path}'\n"
-                                 f"Tried basename in CWD: '{base_path}'\n"
-                                 f"Files in current dir (.): {cwd_files}\n"
-                                 f"Files in script dir ({script_dir}): {script_dir_files}")
-            print(f"Reading file: {full_path}")
-            with open(full_path, 'r', encoding='utf-8') as f:
-                return f.read()
-        except Exception as e:
-            return f"Error reading file {path}: {str(e)}"
-    @tool
-    def write_file(self, path: str, content: str) -> str:
-        """Writes the given content to a file at the specified path relative to the app's directory. Creates directories if they don't exist."""
-        print(f"--- Calling Write File Tool at path: {path} ---")
-        try:
-            # Ensure the directory exists
-            script_dir = os.path.dirname(os.path.realpath(__file__))
-            full_path = os.path.join(script_dir, path) # Write relative to script dir
-            print(f"Writing file to: {full_path}")
-            os.makedirs(os.path.dirname(full_path), exist_ok=True)
-            with open(full_path, 'w', encoding='utf-8') as f:
-                f.write(content)
-            return f"Successfully wrote to file {path} (relative to app)."
-        except Exception as e:
-            return f"Error writing to file {path}: {str(e)}"
-    @tool
-    def list_directory(self, path: str = ".") -> str:
-        """Lists the contents (files and directories) of a directory at the specified path relative to the app's directory."""
-        print(f"--- Calling List Directory Tool at path: {path} ---")
-        try:
-            script_dir = os.path.dirname(os.path.realpath(__file__))
-            full_path = os.path.join(script_dir, path) # List relative to script dir
-            print(f"Listing directory: {full_path}")
-            if not os.path.isdir(full_path):
-                 return f"Error: '{path}' is not a valid directory relative to the app."
-            files = os.listdir(full_path)
-            return "\n".join(files) if files else "Directory is empty."
-        except Exception as e:
-            return f"Error listing directory {path}: {str(e)}"
-    @tool
-    def audio_transcription_tool(self, file_path: str) -> str:
-        """
-        Transcribes an audio file (like .mp3 or .wav) using Whisper and returns the text content.
-        Use this for questions involving audio file analysis.
-        """
-        print(f"--- Calling Audio Transcription Tool at path: {file_path} ---")
-        # Access the pipeline via self
-        if not self.asr_pipeline:
-            return "Error: Audio transcription pipeline is not available."
-        try:
-            # Try finding the file relative to the app directory first
-            script_dir = os.path.dirname(os.path.realpath(__file__))
-            full_path = os.path.join(script_dir, file_path)
-            print(f"Attempting to transcribe relative path: {full_path}")
-            if not os.path.exists(full_path):
-                 # If not found, try the direct path
-                 full_path = file_path
-                 print(f"Attempting to transcribe direct path: {full_path}")
-                 if not os.path.exists(full_path):
-                     # Try basename for GAIA questions
-                     base_path = os.path.basename(file_path)
-                     print(f"Attempting to transcribe basename path in CWD: {os.path.join(os.getcwd(), base_path)}")
-                     if os.path.exists(base_path): # Check relative to CWD
-                         full_path = base_path
-                     else:
-                         try:
-                             cwd_files = os.listdir(".")
-                         except Exception:
-                             cwd_files = ["Error listing CWD"]
-                         try:
-                             script_dir_files = os.listdir(script_dir)
-                         except Exception:
-                             script_dir_files = ["Error listing script dir"]
-                         return (f"Error: Audio file not found.\n"
-                                 f"Tried relative path: '{os.path.join(script_dir, file_path)}'\n"
-                                 f"Tried direct path: '{file_path}'\n"
-                                 f"Tried basename in CWD: '{base_path}'\n"
-                                 f"Files in current dir (.): {cwd_files}\n"
-                                 f"Files in script dir ({script_dir}): {script_dir_files}")
-            print(f"Transcribing file: {full_path}")
-            # Important: Ensure the pipeline can handle the file path directly
-            transcription = self.asr_pipeline(full_path)
-            print("--- Transcription Complete ---")
-            # The output structure might vary slightly based on pipeline version
-            return transcription.get("text", "Error: Transcription failed to produce text.")
-        except Exception as e:
-            import traceback
-            print(f"Error during audio transcription: {e}")
-            traceback.print_exc()
-            return f"Error during audio transcription: {str(e)}"
-    @tool
-    def get_youtube_transcript(self, video_url: str) -> str:
-        """
-        Fetches the transcript for a given YouTube video URL. Use this for questions about YouTube video content.
-        """
-        print(f"--- Calling YouTube Transcript Tool for URL: {video_url} ---")
-        try:
-            # Extract video ID from URL more robustly
-            video_id = None
-            if "watch?v=" in video_url:
-                video_id = video_url.split("v=")[1].split("&")[0]
-            elif "youtu.be/" in video_url:
-                video_id = video_url.split("youtu.be/")[1].split("?")[0]
-            if not video_id:
-                 return f"Error: Could not extract video ID from URL: {video_url}"
-            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-            # Combine all transcript parts into one string
-            full_transcript = " ".join([item["text"] for item in transcript_list])
-            print("--- Transcript Fetched ---")
-            # Return a limited amount to avoid overwhelming the context
-            return full_transcript[:8000]
-        except Exception as e:
-            return f"Error fetching YouTube transcript: {str(e)}"
-    @tool
-    def scrape_web_page(self, url: str) -> str:
-        """
-        Fetches the primary text content of a given web page URL, removing navigation, footer, scripts, and styles.
-        Use this when you need the full content of a webpage found via search.
-        """
-        print(f"--- Calling Web Scraper Tool for URL: {url} ---")
-        try:
-            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
-            response = requests.get(url, headers=headers, timeout=15) # Increased timeout
-            response.raise_for_status() # Raise an error for bad responses (4xx or 5xx)
-            # Check content type to avoid parsing non-HTML
-            if 'html' not in response.headers.get('Content-Type', '').lower():
-                return f"Error: URL {url} did not return HTML content."
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # Remove common non-content tags
-            for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]):
-                tag.extract()
-            # Attempt to find the main content area (heuristics, may not always work)
-            main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body
-            if not main_content:
-                 main_content = soup # Fallback to the whole soup if no main area found
-            text = main_content.get_text(separator='\n', strip=True)
-            # Clean up excessive whitespace
-            lines = (line.strip() for line in text.splitlines())
-            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-            text = '\n'.join(chunk for chunk in chunks if chunk)
-            print("--- Web Page Scraped ---")
-            # Limit context size
-            return text[:8000]
-        except requests.exceptions.RequestException as e:
-             return f"Error fetching web page {url}: {str(e)}"
-        except Exception as e:
-            return f"Error scraping web page {url}: {str(e)}"
-    # --- End of Tool Definitions ---
     def __init__(self):
         print("BasicAgent (LangGraph) initializing...")
-        # 1. Initialize ASR Pipeline *inside* init - DELAYED LOADING
-        self.asr_pipeline = None # Initialize as None first
-        try:
-            print("Loading ASR (Whisper) pipeline...")
-            device = "cuda:0" if torch.cuda.is_available() else "cpu"
-            print(f"Using device: {device} for ASR.")
-            self.asr_pipeline = pipeline(
-                "automatic-speech-recognition",
-                model="openai/whisper-base",
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                device=device
-            )
-            print("✅ ASR (Whisper) pipeline loaded successfully.")
-        except Exception as e:
-            print(f"⚠️ Warning: Could not load ASR pipeline. Audio tool will not work. Error: {e}")
-            import traceback
-            traceback.print_exc() # Print full traceback for ASR load error
-            self.asr_pipeline = None
-        # ====================================================
-        # 2. Get API Tokens from Space Secrets
-        # HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Keep if needed elsewhere
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
             raise ValueError("GROQ_API_KEY secret is not set! Please add it to your Space secrets.")
-        # 3. Collect Tool Methods
-        self.tools = [
-            self.search_tool,
-            self.code_interpreter,
-            self.read_file,
-            self.write_file,
-            self.list_directory,
-            self.audio_transcription_tool,
-            self.get_youtube_transcript,
-            self.scrape_web_page
-        ]
-        # 4. Define the Improved System Prompt with Placeholders
         tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in self.tools])
         self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
 Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question, exactly matching the format required by the benchmark (e.g., a name, a number, a specific string format, a comma-separated list).
@@ -366,156 +328,55 @@ You have access to the following tools to gather information and perform actions
 7.  Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
 """
-        # 5. Initialize the LLM (Using Groq and Mistral Instruct)
-        # ==================== MODIFIED LLM INIT ====================
         print("Initializing Groq LLM Endpoint...")
         try:
-            # Use the specific model name recommended by Groq for mixtral-8x7b-32768
             chat_llm = ChatGroq(
                 temperature=0.01,
                 groq_api_key=GROQ_API_KEY,
-                model_name="llama-3.1-8b-instant"
             )
-            print("✅ Groq LLM Endpoint initialized.")
         except Exception as e:
-            print(f"Error initializing Groq LLM: {e}")
-            raise # Reraise the exception to stop initialization if LLM fails
         # ===========================================================
-        # 6. Bind tools to the LLM
-        # We still bind tools, but we'll manually parse if it fails
         self.llm_with_tools = chat_llm.bind_tools(self.tools)
-        print("✅ Tools bound to LLM.")
-        # 7. Define the Agent Node with Manual Tool Parsing
-        # ==================== NODE WITH PLACEHOLDER REGEX ====================
         def agent_node(state: AgentState):
             print("--- Running Agent Node ---")
-            messages_with_prompt = state["messages"]
-            # Invoke the LLM (which has tools bound)
-            ai_message: AIMessage = self.llm_with_tools.invoke(messages_with_prompt)
             print(f"AI Message Raw Content: {ai_message.content}")
-            # --- Manual Tool Call Parsing Logic ---
-            tool_calls = []
-            # Check if bind_tools already populated tool_calls (ideal case)
-            if ai_message.tool_calls:
-                print(f"SUCCESS: bind_tools correctly parsed tool_calls: {ai_message.tool_calls}")
-                tool_calls = ai_message.tool_calls
-            # Fallback: Check if content contains likely JSON for tool calls
-            # Use regex to find JSON possibly wrapped in markdown
-            elif isinstance(ai_message.content, str):
-                print("Attempting manual JSON parsing from content...")
-                # --- THIS IS THE LINE WITH THE FIRST PLACEHOLDER ---
-                json_match = re.search(r"...") # Replace this line manually
-                if json_match:
-                    # Extract the first valid group that contains JSON
-                    json_str = json_match.group(1) or json_match.group(2)
-                    if json_str:
-                        try:
-                            # Attempt to strip potential leading/trailing non-JSON chars if regex was too broad
-                            json_str_cleaned = json_str.strip()
-                            # Basic validation: starts with { or [ ends with } or ]
-                            if (json_str_cleaned.startswith('{') and json_str_cleaned.endswith('}')) or \
-                               (json_str_cleaned.startswith('[') and json_str_cleaned.endswith(']')):
-                                data = json.loads(json_str_cleaned)
-                                # Check structure for single tool call (dict)
-                                if isinstance(data, dict) and "tool" in data and "tool_input" in data:
-                                    tool_name = data.get("tool")
-                                    tool_input = data.get("tool_input")
-                                    # Basic validation of tool name and input type
-                                    if isinstance(tool_name, str) and isinstance(tool_input, dict):
-                                         call_id = f"tool_{uuid.uuid4()}" # Generate unique ID
-                                         tool_calls.append(ToolCall(name=tool_name, args=tool_input, id=call_id))
-                                         print(f"Manually parsed Single Tool Call: ID={call_id}, Name={tool_name}, Args={tool_input}")
-                                         ai_message.content = "" # Clear content after successful parse
-                                    else:
-                                         print("Parsed JSON dict, but incorrect tool name type or tool_input is not a dict.")
-                                # Check structure for multiple tool calls (if model outputs a list)
-                                elif isinstance(data, list):
-                                    print("Attempting to parse list as multiple tool calls...")
-                                    parsed_list_ok = True
-                                    temp_tool_calls = []
-                                    for item in data:
-                                        if isinstance(item, dict) and "tool" in item and "tool_input" in item:
-                                            tool_name = item.get("tool")
-                                            tool_input = item.get("tool_input")
-                                            if isinstance(tool_name, str) and isinstance(tool_input, dict):
-                                                call_id = f"tool_{uuid.uuid4()}"
-                                                temp_tool_calls.append(ToolCall(name=tool_name, args=tool_input, id=call_id))
-                                                print(f"Manually parsed Multi-Tool Call item: ID={call_id}, Name={tool_name}, Args={tool_input}")
-                                            else:
-                                                parsed_list_ok = False
-                                                print("Parsed JSON list item, but incorrect tool name type or tool_input is not a dict.")
-                                                break
-                                        else:
-                                            parsed_list_ok = False
-                                            print("Parsed JSON list item, but not a valid tool call structure (missing 'tool' or 'tool_input').")
-                                            break
-                                    if parsed_list_ok and temp_tool_calls:
-                                        tool_calls.extend(temp_tool_calls)
-                                        ai_message.content = "" # Clear content if list successfully parsed
-                                else:
-                                    print("Parsed JSON, but incorrect structure (neither dict with tool/tool_input nor list of such dicts).")
-                            else:
-                                 print(f"Skipping manual parse: Cleaned JSON string ('{json_str_cleaned[:50]}...') does not start/end correctly with braces/brackets.")
-                        except json.JSONDecodeError as e:
-                            print(f"Manual JSON parsing failed: {e}. String was: '{json_str[:500]}...'") # Log the problematic string
-                        except Exception as e:
-                             print(f"Unexpected error during manual parsing: {e}")
-                             import traceback
-                             traceback.print_exc()
-                    else:
-                         print("Regex matched, but no JSON content found in capture groups.")
-                else:
-                    print("No JSON block found in content for manual parsing.")
-            else:
-                 print("AI Message content is not a string, skipping manual parse.")
-            # --- End Manual Parsing ---
-            # Attach manually parsed calls (if any) to the message
-            # This allows tools_condition to work correctly
-            if tool_calls and not ai_message.tool_calls:
-                 ai_message.tool_calls = tool_calls
-                 # Also clear invalid_tool_calls if we manually succeeded
-                 ai_message.invalid_tool_calls = [] # Use empty list instead of None
-            # Log final interpretation
-            if ai_message.tool_calls:
-                 print(f"AI Message contains tool calls (after manual check): {ai_message.tool_calls}")
-            elif ai_message.invalid_tool_calls:
-                 print(f"AI Message contains INVALID tool calls: {ai_message.invalid_tool_calls}")
-            else:
-                 print(f"AI Message Interpreted Content (no tool calls): {ai_message.pretty_repr()}")
             return {"messages": [ai_message]}
         # =======================================================
-        # 8. Define the Tool Node
         tool_node = ToolNode(self.tools)
-        # 9. Create the Graph
         print("Building agent graph...")
         graph_builder = StateGraph(AgentState)
         graph_builder.add_node("agent", agent_node)
         graph_builder.add_node("tools", tool_node)
         graph_builder.add_edge(START, "agent")
-        graph_builder.add_conditional_edges(
-            "agent",
-            tools_condition, # This condition checks ai_message.tool_calls
-            {
-                "tools": "tools",
-                "__end__": "__end__",
-            },
-        )
         graph_builder.add_edge("tools", "agent")
-        # 10. Compile the graph and store it
         self.graph = graph_builder.compile()
         print("✅ Graph compiled successfully.")
     def __call__(self, question: str) -> str:
         print(f"\n--- Starting Agent Run for Question ---")
         print(f"Agent received question (first 100 chars): {question[:100]}...")
@@ -602,8 +463,7 @@ You have access to the following tools to gather information and perform actions
             traceback.print_exc()
             return f"AGENT GRAPH ERROR: {e}"
-# --- (Original Template Code Starts Here - NO CHANGES NEEDED BELOW THIS LINE) ---
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -615,27 +475,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
-        # For local testing without login, create a dummy username
-        username = "local_test_user"
-        print("User not logged in, using dummy username for testing.")
-        # return "Please Login to Hugging Face with the button.", None # Don't block local test
     api_url = DEFAULT_API_URL
-    # questions_url = f"{api_url}/questions" # Skip fetching
-    submit_url = f"{api_url}/submit" # Keep for context
     print("Instantiating agent...")
     try:
-        # This assumes the BasicAgent class is defined in the same scope
-        # when this function is actually run in the full app.py
-        agent = BasicAgent()
-        if not hasattr(agent, 'asr_pipeline') or agent.asr_pipeline is None:
-             print("⚠️ ASR Pipeline might not have loaded correctly. Audio questions could fail.")
-    except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        import traceback; traceback.print_exc()
-        return f"Error initializing agent: {e}", None
     print("Agent instantiated successfully.")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
@@ -644,20 +496,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # ==================== MOCK QUESTIONS SECTION ====================
     print("--- USING MOCK QUESTIONS FOR TESTING ---")
     mock_questions_data = [
-        {
-            "task_id": "mock_task_search_001",
-            "question": "What is the capital of France?"
-        },
-        {
-            "task_id": "mock_task_code_001",
-            "question": "Calculate 15 factorial using python. Only output the final number."
-        },
-        # Add more mock questions here if needed, e.g., for file/audio/youtube
-        # {
-        #     "task_id": "mock_task_audio_001",
-        #     "question": "Transcribe the audio file 'sample.mp3'", # Make sure sample.mp3 exists
-        #     "file_path": "sample.mp3"
-        # },
     ]
     questions_data = mock_questions_data
     print(f"Using {len(questions_data)} mock questions.")
@@ -668,78 +509,81 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     total_questions = len(questions_data)
     print(f"Running agent on {total_questions} mock questions...")
-    questions_to_run = questions_data # Use the mock data
     for i, item in enumerate(questions_to_run):
         task_id = item.get("task_id")
         question_text = item.get("question")
-        if not task_id or question_text is None: print(f"Skipping item {i+1}: missing task_id or question: {item}"); continue
         print(f"\n--- Running Mock Task {i+1}/{len(questions_to_run)} (ID: {task_id}) ---")
         try:
             file_path = item.get("file_path")
             question_text_with_context = question_text
             if file_path:
-                 # Check existence relative to script dir first, then CWD
                  try: script_dir = os.path.dirname(os.path.realpath(__file__))
                  except NameError: script_dir = os.getcwd()
                  potential_script_path = os.path.join(script_dir, file_path)
-                 potential_cwd_path = os.path.join(os.getcwd(), file_path) # Check CWD too
                  if os.path.exists(potential_script_path): file_context = f"[Attached File (exists): {file_path}]"
                  elif os.path.exists(potential_cwd_path): file_context = f"[Attached File (exists in cwd): {file_path}]"
-                 else: file_context = f"[Attached File (path provided, NOT FOUND): {file_path}]" # Indicate if not found
                  question_text_with_context = f"{question_text}\n\n{file_context}"
                  print(f"Question includes file reference: {file_path}")
             submitted_answer = agent(question_text_with_context)
             submitted_answer_str = str(submitted_answer) if submitted_answer is not None else ""
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer_str})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer_str})
             print(f"--- Mock Task {task_id} Complete ---")
         except Exception as e:
-             print(f"FATAL ERROR running agent graph on mock task {task_id}: {e}")
-             import traceback; traceback.print_exc()
-             submitted_answer = f"AGENT CRASH ERROR: {e}"
              answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-    if not answers_payload: print("Agent did not produce any answers."); return "Agent did not produce answers.", pd.DataFrame(results_log)
-    # 4. Prepare Submission Data (for display only)
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished processing mock questions. Prepared {len(answers_payload)} answers for user '{username}'."
     print(status_update)
     print("--- MOCK RUN COMPLETE - SUBMISSION SKIPPED ---")
-    # 5. Skip Actual Submission for Mock Run
-    final_status = "--- MOCK RUN COMPLETE ---\n" + status_update + "\nSubmission to the scoring server was SKIPPED."
     results_df = pd.DataFrame(results_log)
-    # Add a column indicating mock status
     results_df['Correct'] = 'N/A (Mock)'
     return final_status, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Agent Evaluation Runner (LangGraph + Mistral)") # Updated title
-    gr.Markdown(
-        """
         **Instructions:**
-        1.  Log in to your Hugging Face account using the button below.
-        2.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, submit answers, and see the score.
         ---
         **Notes:**
-        * The full evaluation can take **several hours**. Use the logs tab to monitor progress.
-        * This agent uses `mistralai/Mistral-7B-Instruct-v0.2` and multiple tools.
-        * Make sure your `HUGGINGFACEHUB_API_TOKEN` secret is set correctly in Settings.
-        """
-    )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions, Agent Answers, and Results", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
@@ -748,35 +592,19 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup:
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
-    else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    # Add detailed path info for debugging file access
-    print(f"Script directory (__file__): {os.path.dirname(os.path.realpath(__file__))}")
-    print(f"Current working directory (os.getcwd()): {os.getcwd()}")
-    # List files only if the directory exists
-    try:
-        print("Files in current working directory:", os.listdir("."))
-    except FileNotFoundError:
-        print("Warning: Could not list current working directory.")
     print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for GAIA Agent Evaluation...")
-    # Set queue=True to handle multiple clicks better, though only one run should happen at a time.
     demo.queue().launch(debug=True, share=False)

 from langgraph.prebuilt import ToolNode
 from langgraph.graph import START, StateGraph
 from langgraph.prebuilt import tools_condition
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import tool, BaseTool
 # --- ADD GROQ IMPORT ---
 # (Keep Constants as is)
 # --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # This URL is currently not working
+# --- Initialize ASR Pipeline (Moved back to Global Scope) ---
+# Load the model once when the app starts for efficiency
+asr_pipeline = None # Initialize as None first
+try:
+    print("Loading ASR (Whisper) pipeline globally...")
+    # Decide device based on availability, default to CPU if unsure
+    device = 0 if torch.cuda.is_available() else -1 # device=0 for GPU, -1 for CPU
+    device_name = "cuda:0" if device == 0 else "cpu"
+    print(f"Attempting to use device: {device_name} for ASR.")
+    asr_pipeline = pipeline(
+        "automatic-speech-recognition",
+        model="openai/whisper-base",
+        # Use float16 only if CUDA is definitely available and working
+        torch_dtype=torch.float16 if device == 0 else torch.float32,
+        device=device # Pass device index or -1
+    )
+    print("✅ ASR (Whisper) pipeline loaded successfully.")
+except Exception as e:
+    print(f"⚠️ Warning: Could not load ASR pipeline globally. Audio tool will not work. Error: {e}")
+    import traceback
+    traceback.print_exc() # Print full traceback for ASR load error
+    asr_pipeline = None
+# ====================================================
+# --- Tool Definitions (Standalone Functions) ---
+@tool
+def search_tool(query: str) -> str:
+    """Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
+    print(f"--- Calling Search Tool with query: {query} ---")
+    try:
+        search = DuckDuckGoSearchRun()
+        return search.run(query)
+    except Exception as e:
+        return f"Error running search: {e}"
+@tool
+def code_interpreter(code: str) -> str:
+    """
+    Executes a string of Python code and returns its stdout, stderr, and any error.
+    Use this for calculations, data manipulation (including pandas on dataframes read from files), list operations, string manipulations, or any other Python operation.
+    The code runs in a sandboxed environment. 'pandas' (as pd) and 'openpyxl' are available.
+    Ensure the code is complete and executable. If printing, use print().
+    """
+    print(f"--- Calling Code Interpreter with code:\n{code}\n---")
+    output_stream = io.StringIO()
+    error_stream = io.StringIO()
+    try:
+        # Use contextlib to redirect stdout and stderr
+        with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
+            # Execute the code. Provide 'pd' (pandas) in the globals
+            exec(code, {"pd": pd}, {})
+        stdout = output_stream.getvalue()
+        stderr = error_stream.getvalue()
+        if stderr:
+            return f"Error: {stderr}\nStdout: {stdout}"
+        if stdout:
+             return f"Success:\n{stdout}"
+        return "Success: Code executed without error and produced no stdout."
+    except Exception as e:
+        # Capture any exception during exec
+        return f"Execution failed with error: {str(e)}"
+@tool
+def read_file(path: str) -> str:
+    """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
+    print(f"--- Calling Read File Tool at path: {path} ---")
+    try:
+        # Use getcwd() as the primary base for relative paths in Spaces
+        script_dir = os.getcwd() # Changed from __file__ for broader compatibility
+        print(f"Base directory for reading: {script_dir}")
+        full_path = os.path.join(script_dir, path)
+        print(f"Attempting to read relative path: {full_path}")
+        if not os.path.exists(full_path):
+             # If not found, try the direct path (might be absolute)
+             full_path = path
+             print(f"Attempting to read direct/absolute path: {full_path}")
+             if not os.path.exists(full_path):
+                  # Try basename in CWD as last resort (GAIA might just give filename)
+                  base_path = os.path.basename(path)
+                  cwd_base_path = os.path.join(os.getcwd(), base_path)
+                  print(f"Attempting to read basename path in CWD: {cwd_base_path}")
+                  if os.path.exists(cwd_base_path):
+                       full_path = cwd_base_path
+                  else:
+                      # List files for debugging
+                      try: cwd_files = os.listdir(".")
+                      except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
+                      return (f"Error: File not found.\n"
+                              f"Tried relative: '{os.path.join(script_dir, path)}'\n"
+                              f"Tried direct/absolute: '{path}'\n"
+                              f"Tried basename in CWD: '{cwd_base_path}'\n"
+                              f"Files in CWD (.): {cwd_files}")
+        print(f"Reading file: {full_path}")
+        with open(full_path, 'r', encoding='utf-8') as f:
+            return f.read()
+    except Exception as e:
+        return f"Error reading file {path}: {str(e)}"
+@tool
+def write_file(path: str, content: str) -> str:
+    """Writes the given content to a file at the specified path relative to the app's current directory. Creates directories if they don't exist."""
+    print(f"--- Calling Write File Tool at path: {path} ---")
+    try:
+        # Ensure the directory exists relative to CWD
+        base_dir = os.getcwd()
+        full_path = os.path.join(base_dir, path)
+        print(f"Writing file to: {full_path}")
+        os.makedirs(os.path.dirname(full_path), exist_ok=True)
+        with open(full_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        return f"Successfully wrote to file {path} (relative to CWD)."
+    except Exception as e:
+        return f"Error writing to file {path}: {str(e)}"
+@tool
+def list_directory(path: str = ".") -> str:
+    """Lists the contents (files and directories) of a directory at the specified path relative to the app's current directory."""
+    print(f"--- Calling List Directory Tool at path: {path} ---")
+    try:
+        base_dir = os.getcwd()
+        full_path = os.path.join(base_dir, path)
+        print(f"Listing directory: {full_path}")
+        if not os.path.isdir(full_path):
+             return f"Error: '{path}' is not a valid directory relative to CWD."
+        files = os.listdir(full_path)
+        return "\n".join(files) if files else "Directory is empty."
+    except Exception as e:
+        return f"Error listing directory {path}: {str(e)}"
+@tool
+def audio_transcription_tool(file_path: str) -> str:
+    """
+    Transcribes an audio file (like .mp3 or .wav) using Whisper and returns the text content.
+    Use this for questions involving audio file analysis.
+    """
+    print(f"--- Calling Audio Transcription Tool at path: {file_path} ---")
+    # Access the globally loaded pipeline
+    if asr_pipeline is None:
+        return "Error: Audio transcription pipeline is not available or failed to load."
+    try:
+        # Use the same path resolution logic as read_file
+        script_dir = os.getcwd() # Base directory
+        full_path = os.path.join(script_dir, file_path)
+        print(f"Attempting to transcribe relative path: {full_path}")
+        if not os.path.exists(full_path):
+             full_path = file_path # Try direct/absolute
+             print(f"Attempting to transcribe direct/absolute path: {full_path}")
+             if not os.path.exists(full_path):
+                  base_path = os.path.basename(file_path)
+                  cwd_base_path = os.path.join(os.getcwd(), base_path)
+                  print(f"Attempting to transcribe basename path in CWD: {cwd_base_path}")
+                  if os.path.exists(cwd_base_path):
+                       full_path = cwd_base_path
+                  else:
+                      try: cwd_files = os.listdir(".")
+                      except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
+                      return (f"Error: Audio file not found.\n"
+                              f"Tried relative: '{os.path.join(script_dir, file_path)}'\n"
+                              f"Tried direct/absolute: '{file_path}'\n"
+                              f"Tried basename in CWD: '{cwd_base_path}'\n"
+                              f"Files in CWD (.): {cwd_files}")
+        print(f"Transcribing file: {full_path}")
+        transcription = asr_pipeline(full_path)
+        print("--- Transcription Complete ---")
+        return transcription.get("text", "Error: Transcription failed.")
+    except Exception as e:
+        import traceback; traceback.print_exc()
+        return f"Error during audio transcription: {str(e)}"
+@tool
+def get_youtube_transcript(video_url: str) -> str:
+    """
+    Fetches the transcript for a given YouTube video URL. Use this for questions about YouTube video content.
+    """
+    print(f"--- Calling YouTube Transcript Tool for URL: {video_url} ---")
+    try:
+        video_id = None
+        if "watch?v=" in video_url: video_id = video_url.split("v=")[1].split("&")[0]
+        elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[1].split("?")[0]
+        if not video_id: return f"Error: Could not extract video ID from URL: {video_url}"
+        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+        full_transcript = " ".join([item["text"] for item in transcript_list])
+        print("--- Transcript Fetched ---")
+        return full_transcript[:8000] # Limit context
+    except Exception as e: return f"Error fetching YouTube transcript: {str(e)}"
+@tool
+def scrape_web_page(url: str) -> str:
+    """
+    Fetches the primary text content of a given web page URL, removing navigation, footer, scripts, and styles.
+    Use this when you need the full content of a webpage found via search.
+    """
+    print(f"--- Calling Web Scraper Tool for URL: {url} ---")
+    try:
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        response = requests.get(url, headers=headers, timeout=15)
+        response.raise_for_status()
+        if 'html' not in response.headers.get('Content-Type', '').lower(): return f"Error: URL {url} did not return HTML."
+        soup = BeautifulSoup(response.text, 'html.parser')
+        for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]): tag.extract()
+        main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body or soup
+        text = main_content.get_text(separator='\n', strip=True)
+        lines = (line.strip() for line in text.splitlines())
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        text = '\n'.join(chunk for chunk in chunks if chunk)
+        print("--- Web Page Scraped ---")
+        return text[:8000] # Limit context
+    except requests.exceptions.RequestException as e: return f"Error fetching web page {url}: {str(e)}"
+    except Exception as e: return f"Error scraping web page {url}: {str(e)}"
+# --- End of Tool Definitions ---
+# List of standalone tool functions
+defined_tools = [
+    search_tool,
+    code_interpreter,
+    read_file,
+    write_file,
+    list_directory,
+    audio_transcription_tool,
+    get_youtube_transcript,
+    scrape_web_page
+]
+# --- LangGraph Agent State ---
+class AgentState(TypedDict):
+    messages: Annotated[list[AnyMessage], add_messages]
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+class BasicAgent:
     def __init__(self):
         print("BasicAgent (LangGraph) initializing...")
+        # 1. Get API Tokens from Space Secrets
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
             raise ValueError("GROQ_API_KEY secret is not set! Please add it to your Space secrets.")
+        # Keep HF Token check just in case
+        HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+        if not HUGGINGFACEHUB_API_TOKEN:
+            print("⚠️ Warning: HUGGINGFACEHUB_API_TOKEN secret not set.")
+        # 2. Use the globally defined tools list
+        self.tools = defined_tools # Use the list of functions
+        # 3. Define the Improved System Prompt
         tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in self.tools])
+        # Use placeholders for JSON markers
         self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
 Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question, exactly matching the format required by the benchmark (e.g., a name, a number, a specific string format, a comma-separated list).
 7.  Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
 """
+        # 4. Initialize the LLM (Using Groq and Llama 3.1 8B Instant)
         print("Initializing Groq LLM Endpoint...")
         try:
             chat_llm = ChatGroq(
                 temperature=0.01,
                 groq_api_key=GROQ_API_KEY,
+                model_name="llama-3.1-8b-instant" # Use Llama 3.1 8B Instant
             )
+            print("✅ Groq LLM Endpoint initialized with llama-3.1-8b-instant.")
         except Exception as e:
+            print(f"Error initializing Groq LLM: {e}"); raise
         # ===========================================================
+        # 5. Bind tools to the LLM
+        # bind_tools should work correctly with ChatGroq and standalone functions
         self.llm_with_tools = chat_llm.bind_tools(self.tools)
+        print("✅ Tools bound to LLM (using bind_tools).")
+        # 6. Define the Agent Node (Simplified)
         def agent_node(state: AgentState):
             print("--- Running Agent Node ---")
+            ai_message: AIMessage = self.llm_with_tools.invoke(state["messages"])
             print(f"AI Message Raw Content: {ai_message.content}")
+            if ai_message.tool_calls: print(f"AI Message tool calls via bind_tools: {ai_message.tool_calls}")
+            elif ai_message.invalid_tool_calls: print(f"AI Message INVALID tool calls via bind_tools: {ai_message.invalid_tool_calls}")
+            else: print(f"AI Message content (no calls): {ai_message.pretty_repr()}")
             return {"messages": [ai_message]}
         # =======================================================
+        # 7. Define the Tool Node
+        # Pass the list of standalone functions
         tool_node = ToolNode(self.tools)
+        # 8. Create the Graph
         print("Building agent graph...")
         graph_builder = StateGraph(AgentState)
         graph_builder.add_node("agent", agent_node)
         graph_builder.add_node("tools", tool_node)
         graph_builder.add_edge(START, "agent")
+        graph_builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", "__end__": "__end__"})
         graph_builder.add_edge("tools", "agent")
+        # 9. Compile the graph and store it
         self.graph = graph_builder.compile()
         print("✅ Graph compiled successfully.")
+    # >>>>> __call__ METHOD REMOVED FROM HERE <<<<<
     def __call__(self, question: str) -> str:
         print(f"\n--- Starting Agent Run for Question ---")
         print(f"Agent received question (first 100 chars): {question[:100]}...")
             traceback.print_exc()
             return f"AGENT GRAPH ERROR: {e}"
+# --- (Original Template Code Starts Here - Modified for Mock Questions) ---
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
         username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
+        username = "local_test_user"; print("User not logged in, using dummy username.")
     api_url = DEFAULT_API_URL
+    submit_url = f"{api_url}/submit"
     print("Instantiating agent...")
     try:
+        agent = BasicAgent() # Assumes BasicAgent class is defined above
+        # Check global asr_pipeline status
+        if asr_pipeline is None:
+             print("⚠️ ASR Pipeline might not have loaded correctly globally. Audio questions could fail.")
+    except Exception as e: print(f"Error instantiating agent: {e}"); import traceback; traceback.print_exc(); return f"Error initializing agent: {e}", None
     print("Agent instantiated successfully.")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
     # ==================== MOCK QUESTIONS SECTION ====================
     print("--- USING MOCK QUESTIONS FOR TESTING ---")
     mock_questions_data = [
+        {"task_id": "mock_task_search_001", "question": "What is the capital of France?"},
+        {"task_id": "mock_task_code_001", "question": "Calculate 15 factorial using python. Only output the final number."},
+        # {"task_id": "mock_task_audio_001", "question": "Transcribe the audio file 'sample.mp3'", "file_path": "sample.mp3"}, # Make sure sample.mp3 exists
     ]
     questions_data = mock_questions_data
     print(f"Using {len(questions_data)} mock questions.")
     total_questions = len(questions_data)
     print(f"Running agent on {total_questions} mock questions...")
+    questions_to_run = questions_data
     for i, item in enumerate(questions_to_run):
         task_id = item.get("task_id")
         question_text = item.get("question")
+        if not task_id or question_text is None: print(f"Skipping item {i+1}: missing ID or question."); continue
         print(f"\n--- Running Mock Task {i+1}/{len(questions_to_run)} (ID: {task_id}) ---")
         try:
             file_path = item.get("file_path")
             question_text_with_context = question_text
             if file_path:
                  try: script_dir = os.path.dirname(os.path.realpath(__file__))
                  except NameError: script_dir = os.getcwd()
                  potential_script_path = os.path.join(script_dir, file_path)
+                 potential_cwd_path = os.path.join(os.getcwd(), file_path)
+                 file_context = f"[Attached File (path provided): {file_path}]"
                  if os.path.exists(potential_script_path): file_context = f"[Attached File (exists): {file_path}]"
                  elif os.path.exists(potential_cwd_path): file_context = f"[Attached File (exists in cwd): {file_path}]"
+                 else: file_context = f"[Attached File (path provided, NOT FOUND): {file_path}]"
                  question_text_with_context = f"{question_text}\n\n{file_context}"
                  print(f"Question includes file reference: {file_path}")
+            # >>>>> This line will now cause an error because agent has no __call__ method <<<<<
             submitted_answer = agent(question_text_with_context)
+            # <<<<< ERROR HERE <<<<<
             submitted_answer_str = str(submitted_answer) if submitted_answer is not None else ""
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer_str})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer_str})
             print(f"--- Mock Task {task_id} Complete ---")
         except Exception as e:
+             # If the error is because __call__ is missing, catch it specifically
+             if isinstance(e, TypeError) and "'BasicAgent' object is not callable" in str(e):
+                 print(f"ERROR: agent object is not callable because __call__ method was removed.")
+                 submitted_answer = "ERROR: Agent has no __call__ method"
+             else:
+                 print(f"FATAL ERROR running agent graph on mock task {task_id}: {e}")
+                 import traceback; traceback.print_exc()
+                 submitted_answer = f"AGENT CRASH ERROR: {e}"
              answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+    if not answers_payload: print("Agent did not produce answers."); return "Agent did not produce answers.", pd.DataFrame(results_log)
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished mock questions. Prepared {len(answers_payload)} answers for '{username}'."
     print(status_update)
     print("--- MOCK RUN COMPLETE - SUBMISSION SKIPPED ---")
+    final_status = "--- MOCK RUN COMPLETE ---\n" + status_update + "\nSubmission SKIPPED."
     results_df = pd.DataFrame(results_log)
     results_df['Correct'] = 'N/A (Mock)'
     return final_status, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner (LangGraph + Groq Llama3.1 - MOCK TEST)") # Updated title
+    gr.Markdown( """
         **Instructions:**
+        1.  Login is optional for this MOCK test run.
+        2.  Click 'Run Mock Evaluation' to run the agent on a few hardcoded questions.
         ---
         **Notes:**
+        * This uses Groq for LLM inference (Llama 3.1 8B Instant). Ensure `GROQ_API_KEY` is set as an environment variable or Space secret.
+        * This version **DOES NOT** fetch questions from the official server and **DOES NOT** submit results. It only runs locally on mock questions to test the agent's logic.
+        * Check the terminal/logs to see tool calls and agent reasoning.
+        """)
     gr.LoginButton()
+    run_button = gr.Button("Run Mock Evaluation") # Changed button text
+    status_output = gr.Textbox(label="Run Status / Mock Result", lines=5, interactive=False) # Changed label
+    results_table = gr.DataFrame(label="Mock Questions, Agent Answers, and Results", wrap=True) # Changed label
     run_button.click(
         fn=run_and_submit_all,
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
+    if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n   Runtime URL: https://{space_host_startup}.hf.space")
+    else: print("ℹ️ SPACE_HOST env var not found (likely local run).")
+    if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n   Repo URL: https://huggingface.co/spaces/{space_id_startup}\n   Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else: print("ℹ️ SPACE_ID env var not found (likely local run).")
+    try: script_dir = os.path.dirname(os.path.realpath(__file__))
+    except NameError: script_dir = os.getcwd()
+    print(f"Script directory: {script_dir}")
+    print(f"CWD: {os.getcwd()}")
+    try: print("Files in CWD:", os.listdir("."))
+    except FileNotFoundError: print("Warning: CWD listing failed.")
     print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for GAIA Agent Mock Evaluation...")
     demo.queue().launch(debug=True, share=False)