Final_Assignment_Template

Sleeping

App Files Files Community

pmeyhoefer commited on May 2, 2025

Commit

d2d0f74

verified ·

1 Parent(s): bf09a7d

Update app.py

Browse files

Files changed (1) hide show

app.py +151 -84

app.py CHANGED Viewed

@@ -4,11 +4,11 @@ import logging
 import gradio as gr
 import requests
 import pandas as pd
-# We still need the openai library, even if we change the endpoint
 from openai import OpenAI
 from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
-from smolagents.models import OpenAIServerModel # Assuming this can handle base_url
 # --- Logging ---
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
@@ -18,43 +18,49 @@ logger = logging.getLogger(__name__)
 DEFAULT_API_URL   = "https://agents-course-unit4-scoring.hf.space" # Keep this for submission
 # --- GitHub Models Configuration ---
-# Use GITHUB_TOKEN environment variable for authentication
 GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
 if not GITHUB_TOKEN:
-    # If running locally and GITHUB_TOKEN is not set, you might fall back
-    # to another mechanism or raise an error. For HF Spaces, secrets are needed.
     raise RuntimeError("Please set GITHUB_TOKEN in your Space secrets.")
-# GitHub Models endpoint
 GITHUB_ENDPOINT = "https://models.github.ai/inference"
-# Specify the model ID compatible with the GitHub endpoint
-# Check GitHub Models documentation for available models. 'gpt-4.1' might not be the correct identifier.
-# Let's assume a common format like 'openai/gpt-4o' or similar, adjust as needed.
-# Using 'openai/gpt-4.1' as a placeholder based on your original code, VERIFY THIS with GitHub Models docs.
-MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini") # Renamed for clarity, adjust if needed
-# --- Configure OpenAI SDK (for tools if needed, now using GitHub endpoint) ---
-# This client might be used by tools OR potentially by OpenAIServerModel internally
-# depending on its implementation. Configuring it ensures consistency.
-# Note: If OpenAIServerModel directly instantiates its own client using the parameters
-# we provide later, this specific 'client' instance might not be used by the agent itself.
 try:
     client = OpenAI(
         base_url=GITHUB_ENDPOINT,
         api_key=GITHUB_TOKEN,
     )
-    # Optional: Test connection or a simple call here if needed during setup
-    # client.models.list() # Example call, might need adjustment for GitHub's API structure
 except Exception as e:
     logger.error(f"Failed to initialize OpenAI client for GitHub Models: {e}")
     # Decide how to handle this - raise error, log warning, etc.
-    raise RuntimeError(f"OpenAI client initialization failed for GitHub Models: {e}") from e
 # --- Tools ---
-# Tools remain the same, assuming they don't directly depend on the *specific* OpenAI API endpoint
-# unless they internally use the globally configured 'client' (which we just updated).
 @tool
 def summarize_query(query: str) -> str:
@@ -65,8 +71,7 @@ def summarize_query(query: str) -> str:
     Returns:
         str: A concise, improved version.
     """
-    # This tool currently doesn't use an LLM, so it's unaffected by the endpoint change.
-    # If it *did* use the 'client' instance, it would now point to GitHub Models.
     return f"Summarize and reframe: {query}"
 @tool
@@ -74,110 +79,130 @@ def wikipedia_search(page: str) -> str:
     """
     Fetches the summary extract of an English Wikipedia page.
     Args:
-        page (str): e.g. 'Mercedes_Sosa_discography'
     Returns:
-        str: The page’s extract text.
     """
     try:
         url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page}"
-        r = requests.get(url, timeout=10)
-        r.raise_for_status()
-        return r.json().get("extract", "")
     except Exception as e:
-        logger.exception("Wikipedia lookup failed")
         return f"Wikipedia error: {e}"
-search_tool    = DuckDuckGoSearchTool()
-wiki_tool      = wikipedia_search
-summarize_tool = summarize_query
 # --- ReACT Prompt ---
-# The prompt itself doesn't need to change as it describes the agent's *behavior*
 instruction_prompt = """
 You are a ReACT agent with three tools:
- • DuckDuckGoSearchTool(query: str)
  • wikipedia_search(page: str)
  • summarize_query(query: str)
 Internally, for each question:
 1. Thought: decide which tool to call.
 2. Action: call the chosen tool.
 3. Observation: record the result.
-4. If empty/irrelevant:
-   Thought: retry with summarize_query + DuckDuckGoSearchTool.
    Record new Observation.
-5. Thought: integrate observations.
 Finally, output your answer with the following template:
 FINAL ANSWER: [YOUR FINAL ANSWER].
 YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
 If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
 If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
 """
 # --- Build the Agent with OpenAIServerModel pointing to GitHub Models ---
-# *** Key Change Here ***
-# We configure OpenAIServerModel to use the GitHub endpoint and token.
-# We assume OpenAIServerModel accepts 'api_base' or 'base_url' and passes it
-# to the underlying OpenAI client it creates. 'base_url' is the modern parameter.
-# If this doesn't work, you might need to check the smolagents documentation
-# or source for how to specify a custom endpoint, or potentially subclass/modify it.
 try:
     model = OpenAIServerModel(
-        model_id=MODEL_ID,         # Use the model ID for GitHub
-        api_key=GITHUB_TOKEN,      # Use the GitHub token as the API key
-        api_base=GITHUB_ENDPOINT   # Specify the GitHub endpoint *** Use api_base or base_url ***
-        # Try base_url if api_base doesn't work:
-        # base_url=GITHUB_ENDPOINT
-        # Add any other necessary parameters required by OpenAIServerModel or the underlying client
-        # e.g., model_kwargs if needed
     )
-    logger.info(f"Configured OpenAIServerModel with GitHub endpoint: {GITHUB_ENDPOINT} and model: {MODEL_ID}")
-except TypeError as te:
-    logger.error(f"TypeError configuring OpenAIServerModel: {te}. Trying with 'base_url' instead of 'api_base'.")
-    # Fallback attempt using base_url if api_base caused a TypeError
     try:
         model = OpenAIServerModel(
             model_id=MODEL_ID,
             api_key=GITHUB_TOKEN,
-            base_url=GITHUB_ENDPOINT # Use base_url
         )
-        logger.info(f"Successfully configured OpenAIServerModel with GitHub endpoint using 'base_url'.")
     except Exception as e:
-        logger.error(f"Failed to configure OpenAIServerModel with both 'api_base' and 'base_url': {e}")
         raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
 except Exception as e:
     logger.error(f"Failed to configure OpenAIServerModel: {e}")
     raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
 smart_agent = CodeAgent(
-    tools=[search_tool, wiki_tool, summarize_tool],
-    model=model # Pass the configured model instance
 )
 # --- Gradio Wrapper ---
 class BasicAgent:
     def __init__(self):
-        # Updated log message
         logger.info(f"Initialized SmolAgent with GitHub Model: {MODEL_ID} via {GITHUB_ENDPOINT}")
     def __call__(self, question: str) -> str:
         if not question.strip():
             return "AGENT ERROR: empty question"
         prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
         try:
             # The agent uses the 'model' instance we configured above
-            return smart_agent.run(prompt)
         except Exception as e:
             logger.exception("Agent run error")
-            # Provide more specific error if possible, e.g., AuthenticationError from OpenAI client
             return f"AGENT ERROR: {e}"
 # --- Submission Logic ---
-# This part remains largely the same, as it interacts with the external scoring service (DEFAULT_API_URL)
-# It just uses the 'agent' which now internally calls GitHub Models.
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
@@ -185,60 +210,101 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     username   = profile.username
     space_id   = os.getenv("SPACE_ID", "")
-    # Link to the code, unchanged
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    # Instantiate the agent wrapper, which now uses the GitHub-configured model
-    agent      = BasicAgent()
     # fetch questions (unchanged)
     try:
         resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
         resp.raise_for_status()
-        questions = resp.json() or []
     except Exception as e:
         logger.exception("Failed fetch")
         return f"Error fetching questions: {e}", None
     logs, payload = [], []
     for item in questions:
         tid = item.get("task_id")
         q   = item.get("question")
         if not tid or not q:
             continue
-        # Run the agent (now using GitHub Models)
-        ans = agent(q)
-        logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
-        payload.append({"task_id": tid, "submitted_answer": ans})
     if not payload:
         return "Agent did not produce any answers.", pd.DataFrame(logs)
-    # submit answers (unchanged)
     try:
         post = requests.post(
             f"{DEFAULT_API_URL}/submit",
-            json={"username": username, "agent_code": agent_code, "answers": payload},
             timeout=60
         )
         post.raise_for_status()
         result = post.json()
         status = (
             f"Submission Successful!\n"
             f"User: {result.get('username')}\n"
-            f"Score: {result.get('score','N/A')}%\n"
             f"({result.get('correct_count','?')}/"
             f"{result.get('total_attempted','?')})\n"
             f"Message: {result.get('message','')}"
         )
         return status, pd.DataFrame(logs)
     except Exception as e:
         logger.exception("Submit failed")
-        return f"Submission Failed: {e}", pd.DataFrame(logs)
 # --- Gradio App ---
 with gr.Blocks() as demo:
-    gr.Markdown("# SmolAgent GAIA Runner (using GitHub Models) 🚀") # Updated title
     gr.Markdown("""
 **Instructions:**
 1. Clone this space.
@@ -246,17 +312,18 @@ with gr.Blocks() as demo:
 3. Optionally, set `MODEL_ID` if you want to use a model other than the default (e.g., `openai/gpt-4o`). Verify the correct model identifier for GitHub Models.
 4. Log in to Hugging Face.
 5. Click **Run Evaluation & Submit All Answers**.
-""") # Updated instructions
     gr.LoginButton()
     btn = gr.Button("Run Evaluation & Submit All Answers")
     out_status = gr.Textbox(label="Status", lines=5, interactive=False)
-    out_table  = gr.DataFrame(label="Questions & Answers", wrap=True)
     btn.click(run_and_submit_all, outputs=[out_status, out_table])
 if __name__ == "__main__":
-    # Check GITHUB_TOKEN presence before launching
     if not GITHUB_TOKEN:
         logger.error("GITHUB_TOKEN environment variable not set. Cannot start.")
     else:
         logger.info("Launching Gradio App...")
-        demo.launch(debug=True, share=False) # Set debug=False for production

 import gradio as gr
 import requests
 import pandas as pd
 from openai import OpenAI
 from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
+# Assuming OpenAIServerModel correctly handles base_url/api_base
+from smolagents.models import OpenAIServerModel
 # --- Logging ---
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 DEFAULT_API_URL   = "https://agents-course-unit4-scoring.hf.space" # Keep this for submission
 # --- GitHub Models Configuration ---
 GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
 if not GITHUB_TOKEN:
     raise RuntimeError("Please set GITHUB_TOKEN in your Space secrets.")
 GITHUB_ENDPOINT = "https://models.github.ai/inference"
+# Verify this model ID with GitHub Models documentation. Using mini for potentially faster/cheaper tests.
+MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini") # Changed to mini based on logs
+# --- Configure OpenAI SDK (Optional - for tools if needed, points to GitHub) ---
+# If tools don't use this client directly, this might be redundant,
+# but it doesn't hurt to have it configured consistently.
 try:
     client = OpenAI(
         base_url=GITHUB_ENDPOINT,
         api_key=GITHUB_TOKEN,
     )
 except Exception as e:
     logger.error(f"Failed to initialize OpenAI client for GitHub Models: {e}")
     # Decide how to handle this - raise error, log warning, etc.
+    # For now, just log and proceed, as the agent itself uses OpenAIServerModel
+    pass
 # --- Tools ---
+# Instantiate the search tool ONCE
+search_tool_instance = DuckDuckGoSearchTool()
+@tool
+def duckduckgo_search(query: str) -> str:
+    """
+    Performs a DuckDuckGo search for the given query and returns the results.
+    Args:
+        query (str): The search query.
+    Returns:
+        str: The search results.
+    """
+    try:
+        # Call the instantiated search tool
+        return search_tool_instance(query=query)
+    except Exception as e:
+        logger.exception(f"DuckDuckGoSearchTool failed for query: {query}")
+        return f"Search Error: {e}"
 @tool
 def summarize_query(query: str) -> str:
     Returns:
         str: A concise, improved version.
     """
+    # Assuming this doesn't need an LLM call. If it did, it would use 'client'.
     return f"Summarize and reframe: {query}"
 @tool
     """
     Fetches the summary extract of an English Wikipedia page.
     Args:
+        page (str): e.g. 'Mercedes_Sosa_discography' or 'Mercedes_Sosa'
     Returns:
+        str: The page’s extract text or an error message.
     """
+    # Make page names URL-safe (replace spaces with underscores)
+    page = page.replace(" ", "_")
     try:
         url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page}"
+        headers = {'User-Agent': 'SmolAgentGAIARunner/1.0 (https://huggingface.co/spaces/YOUR_SPACE_ID)'} # Good practice
+        r = requests.get(url, headers=headers, timeout=10)
+        r.raise_for_status() # Raises HTTPError for 4xx/5xx
+        data = r.json()
+        extract = data.get("extract", "")
+        if not extract and data.get("title") and data.get("type") == "disambiguation":
+            # Handle disambiguation pages better if needed, maybe return links?
+             return f"Wikipedia page '{page}' is a disambiguation page. Try a more specific query."
+        elif not extract:
+             return f"Wikipedia page '{page}' found, but has no summary extract."
+        return extract
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 404:
+             logger.warning(f"Wikipedia page not found: {page}")
+             return f"Wikipedia page '{page}' not found."
+        else:
+             logger.exception(f"Wikipedia lookup failed for page: {page}")
+             return f"Wikipedia HTTP error {e.response.status_code}: {e}"
     except Exception as e:
+        logger.exception(f"Wikipedia lookup failed for page: {page}")
         return f"Wikipedia error: {e}"
+# No longer need separate variable names for the functions if they match the @tool name
+# wiki_tool      = wikipedia_search # Redundant if function name is clear
+# summarize_tool = summarize_query # Redundant
 # --- ReACT Prompt ---
+# *** IMPORTANT: Update the prompt to use the NEW function name 'duckduckgo_search' ***
 instruction_prompt = """
 You are a ReACT agent with three tools:
+ • duckduckgo_search(query: str)
  • wikipedia_search(page: str)
  • summarize_query(query: str)
 Internally, for each question:
 1. Thought: decide which tool to call.
 2. Action: call the chosen tool.
 3. Observation: record the result.
+4. If empty/irrelevant (e.g., 'page not found', empty search results, or 404 error):
+   Thought: Re-evaluate. Should I try summarizing the query first with summarize_query and then searching with duckduckgo_search? Or try a different Wikipedia page name? Or maybe the information isn't available via these tools.
+   Action: Call the chosen alternative tool (or conclude if necessary).
    Record new Observation.
+5. Thought: integrate observations. If multiple searches were needed, synthesize the results.
 Finally, output your answer with the following template:
 FINAL ANSWER: [YOUR FINAL ANSWER].
 YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
 If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
 If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+Only output the FINAL ANSWER line once all thinking is done.
 """
 # --- Build the Agent with OpenAIServerModel pointing to GitHub Models ---
 try:
+    # Try with base_url first, as it's the modern OpenAI SDK parameter
     model = OpenAIServerModel(
+        model_id=MODEL_ID,
+        api_key=GITHUB_TOKEN,
+        base_url=GITHUB_ENDPOINT # Use base_url
+        # You might need to pass model_kwargs if specific settings are required
+        # model_kwargs={'temperature': 0.7} # Example
     )
+    logger.info(f"Configured OpenAIServerModel with GitHub endpoint using 'base_url'.")
+except TypeError:
+    logger.warning("Configuring OpenAIServerModel with 'base_url' failed, trying 'api_base'.")
+    # Fallback attempt using api_base if base_url caused a TypeError
     try:
         model = OpenAIServerModel(
             model_id=MODEL_ID,
             api_key=GITHUB_TOKEN,
+            api_base=GITHUB_ENDPOINT # Use api_base
         )
+        logger.info(f"Successfully configured OpenAIServerModel with GitHub endpoint using 'api_base'.")
     except Exception as e:
+        logger.error(f"Failed to configure OpenAIServerModel with both 'base_url' and 'api_base': {e}")
         raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
 except Exception as e:
     logger.error(f"Failed to configure OpenAIServerModel: {e}")
     raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
+# *** Pass the list of FUNCTION objects to the CodeAgent ***
 smart_agent = CodeAgent(
+    tools=[duckduckgo_search, wikipedia_search, summarize_query], # Use the function names directly
+    model=model
+    # Check smolagents docs if there's a way to pass globals/context for execution
+    # e.g., execution_globals={'duckduckgo_search': duckduckgo_search, ...} might be needed
+    # but often passing the functions in the 'tools' list is enough if they are decorated correctly.
 )
 # --- Gradio Wrapper ---
 class BasicAgent:
     def __init__(self):
         logger.info(f"Initialized SmolAgent with GitHub Model: {MODEL_ID} via {GITHUB_ENDPOINT}")
     def __call__(self, question: str) -> str:
         if not question.strip():
             return "AGENT ERROR: empty question"
+        # Ensure the prompt ends correctly before adding the question
         prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
+        logger.info(f"Running agent with prompt:\n-------\n{prompt}\n-------")
         try:
             # The agent uses the 'model' instance we configured above
+            result = smart_agent.run(prompt)
+            logger.info(f"Agent returned: {result}")
+            # Basic check if the agent failed to produce a final answer
+            if "FINAL ANSWER:" not in result:
+                 logger.warning("Agent did not produce a 'FINAL ANSWER:' block.")
+                 # You might return a generic error or the raw output
+                 return f"AGENT WARNING: No 'FINAL ANSWER:' found. Raw output: {result}"
+            return result # Return the full output including FINAL ANSWER:
         except Exception as e:
             logger.exception("Agent run error")
             return f"AGENT ERROR: {e}"
 # --- Submission Logic ---
+# (No changes needed here, it uses the BasicAgent instance)
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
     username   = profile.username
     space_id   = os.getenv("SPACE_ID", "")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    agent      = BasicAgent() # Instantiates the agent with the corrected tool setup
     # fetch questions (unchanged)
     try:
         resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
         resp.raise_for_status()
+        questions_data = resp.json()
+        if not isinstance(questions_data, list):
+             logger.error(f"Fetched questions is not a list: {questions_data}")
+             return "Error: Fetched questions format is incorrect.", None
+        questions = questions_data or []
+        logger.info(f"Fetched {len(questions)} questions.")
     except Exception as e:
         logger.exception("Failed fetch")
         return f"Error fetching questions: {e}", None
     logs, payload = [], []
     for item in questions:
+        if not isinstance(item, dict):
+             logger.warning(f"Skipping invalid question item: {item}")
+             continue
         tid = item.get("task_id")
         q   = item.get("question")
         if not tid or not q:
+            logger.warning(f"Skipping question with missing task_id or question: {item}")
             continue
+        logger.info(f"Processing Task ID: {tid}, Question: {q}")
+        ans_raw = agent(q) # Run the agent
+        # Extract only the final answer part for submission
+        final_ans_marker = "FINAL ANSWER:"
+        if final_ans_marker in ans_raw:
+            submitted_ans = ans_raw.split(final_ans_marker, 1)[1].strip()
+        elif "AGENT ERROR:" in ans_raw or "AGENT WARNING:" in ans_raw:
+             submitted_ans = f"ERROR ({ans_raw})" # Submit error message
+        else:
+             logger.warning(f"Could not extract final answer from raw output for Task ID {tid}. Raw: {ans_raw}")
+             submitted_ans = f"ERROR (Could not parse agent output)" # Fallback
+        logger.info(f"Task ID: {tid}, Submitted Answer: {submitted_ans}")
+        logs.append({"Task ID": tid, "Question": q, "Submitted Answer": submitted_ans, "Raw Output": ans_raw})
+        payload.append({"task_id": tid, "submitted_answer": submitted_ans})
     if not payload:
+        logger.warning("Agent did not produce any valid answers to submit.")
         return "Agent did not produce any answers.", pd.DataFrame(logs)
+    logger.info(f"Submitting {len(payload)} answers...")
+    # submit answers (unchanged, uses extracted answer)
     try:
+        submit_payload = {"username": username, "agent_code": agent_code, "answers": payload}
+        logger.debug(f"Submission Payload: {submit_payload}") # Log payload for debugging if needed
         post = requests.post(
             f"{DEFAULT_API_URL}/submit",
+            json=submit_payload,
             timeout=60
         )
         post.raise_for_status()
         result = post.json()
+        logger.info(f"Submission Result: {result}")
+        score_percent = result.get('score', 'N/A')
+        # Ensure score is formatted reasonably if it's a number
+        try:
+            score_percent = f"{float(score_percent):.2f}" if score_percent != 'N/A' else 'N/A'
+        except (ValueError, TypeError):
+             pass # Keep as 'N/A' or original string if conversion fails
         status = (
             f"Submission Successful!\n"
             f"User: {result.get('username')}\n"
+            f"Score: {score_percent}%\n"
             f"({result.get('correct_count','?')}/"
             f"{result.get('total_attempted','?')})\n"
             f"Message: {result.get('message','')}"
         )
         return status, pd.DataFrame(logs)
+    except requests.exceptions.RequestException as e:
+        logger.exception("Submit failed")
+        # Try to get more info from the response if possible
+        error_details = str(e)
+        if e.response is not None:
+             error_details += f" | Status Code: {e.response.status_code} | Response: {e.response.text[:500]}" # Limit response size
+        return f"Submission Failed: {error_details}", pd.DataFrame(logs)
     except Exception as e:
         logger.exception("Submit failed")
+        return f"Submission Failed with unexpected error: {e}", pd.DataFrame(logs)
 # --- Gradio App ---
+# (No changes needed here)
 with gr.Blocks() as demo:
+    gr.Markdown("# SmolAgent GAIA Runner (using GitHub Models) 🚀")
     gr.Markdown("""
 **Instructions:**
 1. Clone this space.
 3. Optionally, set `MODEL_ID` if you want to use a model other than the default (e.g., `openai/gpt-4o`). Verify the correct model identifier for GitHub Models.
 4. Log in to Hugging Face.
 5. Click **Run Evaluation & Submit All Answers**.
+""")
     gr.LoginButton()
     btn = gr.Button("Run Evaluation & Submit All Answers")
     out_status = gr.Textbox(label="Status", lines=5, interactive=False)
+    out_table  = gr.DataFrame(label="Questions & Answers", wrap=True, height=400) # Increased height maybe
     btn.click(run_and_submit_all, outputs=[out_status, out_table])
 if __name__ == "__main__":
     if not GITHUB_TOKEN:
         logger.error("GITHUB_TOKEN environment variable not set. Cannot start.")
     else:
         logger.info("Launching Gradio App...")
+        # share=True needed for public link as mentioned in logs
+        # debug=True provides more verbose Gradio logging if needed
+        demo.launch(debug=True, share=True)