pmeyhoefer commited on
Commit
d2d0f74
·
verified ·
1 Parent(s): bf09a7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -84
app.py CHANGED
@@ -4,11 +4,11 @@ import logging
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
- # We still need the openai library, even if we change the endpoint
8
  from openai import OpenAI
9
 
10
  from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
11
- from smolagents.models import OpenAIServerModel # Assuming this can handle base_url
 
12
 
13
  # --- Logging ---
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
@@ -18,43 +18,49 @@ logger = logging.getLogger(__name__)
18
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # Keep this for submission
19
 
20
  # --- GitHub Models Configuration ---
21
- # Use GITHUB_TOKEN environment variable for authentication
22
  GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
23
  if not GITHUB_TOKEN:
24
- # If running locally and GITHUB_TOKEN is not set, you might fall back
25
- # to another mechanism or raise an error. For HF Spaces, secrets are needed.
26
  raise RuntimeError("Please set GITHUB_TOKEN in your Space secrets.")
27
 
28
- # GitHub Models endpoint
29
  GITHUB_ENDPOINT = "https://models.github.ai/inference"
 
 
30
 
31
- # Specify the model ID compatible with the GitHub endpoint
32
- # Check GitHub Models documentation for available models. 'gpt-4.1' might not be the correct identifier.
33
- # Let's assume a common format like 'openai/gpt-4o' or similar, adjust as needed.
34
- # Using 'openai/gpt-4.1' as a placeholder based on your original code, VERIFY THIS with GitHub Models docs.
35
- MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini") # Renamed for clarity, adjust if needed
36
-
37
- # --- Configure OpenAI SDK (for tools if needed, now using GitHub endpoint) ---
38
- # This client might be used by tools OR potentially by OpenAIServerModel internally
39
- # depending on its implementation. Configuring it ensures consistency.
40
- # Note: If OpenAIServerModel directly instantiates its own client using the parameters
41
- # we provide later, this specific 'client' instance might not be used by the agent itself.
42
  try:
43
  client = OpenAI(
44
  base_url=GITHUB_ENDPOINT,
45
  api_key=GITHUB_TOKEN,
46
  )
47
- # Optional: Test connection or a simple call here if needed during setup
48
- # client.models.list() # Example call, might need adjustment for GitHub's API structure
49
  except Exception as e:
50
  logger.error(f"Failed to initialize OpenAI client for GitHub Models: {e}")
51
  # Decide how to handle this - raise error, log warning, etc.
52
- raise RuntimeError(f"OpenAI client initialization failed for GitHub Models: {e}") from e
 
53
 
54
 
55
  # --- Tools ---
56
- # Tools remain the same, assuming they don't directly depend on the *specific* OpenAI API endpoint
57
- # unless they internally use the globally configured 'client' (which we just updated).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  @tool
60
  def summarize_query(query: str) -> str:
@@ -65,8 +71,7 @@ def summarize_query(query: str) -> str:
65
  Returns:
66
  str: A concise, improved version.
67
  """
68
- # This tool currently doesn't use an LLM, so it's unaffected by the endpoint change.
69
- # If it *did* use the 'client' instance, it would now point to GitHub Models.
70
  return f"Summarize and reframe: {query}"
71
 
72
  @tool
@@ -74,110 +79,130 @@ def wikipedia_search(page: str) -> str:
74
  """
75
  Fetches the summary extract of an English Wikipedia page.
76
  Args:
77
- page (str): e.g. 'Mercedes_Sosa_discography'
78
  Returns:
79
- str: The page’s extract text.
80
  """
 
 
81
  try:
82
  url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page}"
83
- r = requests.get(url, timeout=10)
84
- r.raise_for_status()
85
- return r.json().get("extract", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- logger.exception("Wikipedia lookup failed")
88
  return f"Wikipedia error: {e}"
89
 
90
- search_tool = DuckDuckGoSearchTool()
91
- wiki_tool = wikipedia_search
92
- summarize_tool = summarize_query
93
 
94
  # --- ReACT Prompt ---
95
- # The prompt itself doesn't need to change as it describes the agent's *behavior*
96
  instruction_prompt = """
97
  You are a ReACT agent with three tools:
98
- DuckDuckGoSearchTool(query: str)
99
  • wikipedia_search(page: str)
100
  • summarize_query(query: str)
101
  Internally, for each question:
102
  1. Thought: decide which tool to call.
103
  2. Action: call the chosen tool.
104
  3. Observation: record the result.
105
- 4. If empty/irrelevant:
106
- Thought: retry with summarize_query + DuckDuckGoSearchTool.
 
107
  Record new Observation.
108
- 5. Thought: integrate observations.
109
  Finally, output your answer with the following template:
110
  FINAL ANSWER: [YOUR FINAL ANSWER].
111
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
112
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
113
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
114
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
 
115
  """
116
 
117
  # --- Build the Agent with OpenAIServerModel pointing to GitHub Models ---
118
-
119
- # *** Key Change Here ***
120
- # We configure OpenAIServerModel to use the GitHub endpoint and token.
121
- # We assume OpenAIServerModel accepts 'api_base' or 'base_url' and passes it
122
- # to the underlying OpenAI client it creates. 'base_url' is the modern parameter.
123
- # If this doesn't work, you might need to check the smolagents documentation
124
- # or source for how to specify a custom endpoint, or potentially subclass/modify it.
125
  try:
 
126
  model = OpenAIServerModel(
127
- model_id=MODEL_ID, # Use the model ID for GitHub
128
- api_key=GITHUB_TOKEN, # Use the GitHub token as the API key
129
- api_base=GITHUB_ENDPOINT # Specify the GitHub endpoint *** Use api_base or base_url ***
130
- # Try base_url if api_base doesn't work:
131
- # base_url=GITHUB_ENDPOINT
132
- # Add any other necessary parameters required by OpenAIServerModel or the underlying client
133
- # e.g., model_kwargs if needed
134
  )
135
- logger.info(f"Configured OpenAIServerModel with GitHub endpoint: {GITHUB_ENDPOINT} and model: {MODEL_ID}")
136
- except TypeError as te:
137
- logger.error(f"TypeError configuring OpenAIServerModel: {te}. Trying with 'base_url' instead of 'api_base'.")
138
- # Fallback attempt using base_url if api_base caused a TypeError
139
  try:
140
  model = OpenAIServerModel(
141
  model_id=MODEL_ID,
142
  api_key=GITHUB_TOKEN,
143
- base_url=GITHUB_ENDPOINT # Use base_url
144
  )
145
- logger.info(f"Successfully configured OpenAIServerModel with GitHub endpoint using 'base_url'.")
146
  except Exception as e:
147
- logger.error(f"Failed to configure OpenAIServerModel with both 'api_base' and 'base_url': {e}")
148
  raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
149
  except Exception as e:
150
  logger.error(f"Failed to configure OpenAIServerModel: {e}")
151
  raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
152
 
153
-
154
  smart_agent = CodeAgent(
155
- tools=[search_tool, wiki_tool, summarize_tool],
156
- model=model # Pass the configured model instance
 
 
 
157
  )
158
 
159
  # --- Gradio Wrapper ---
160
 
161
  class BasicAgent:
162
  def __init__(self):
163
- # Updated log message
164
  logger.info(f"Initialized SmolAgent with GitHub Model: {MODEL_ID} via {GITHUB_ENDPOINT}")
165
 
166
  def __call__(self, question: str) -> str:
167
  if not question.strip():
168
  return "AGENT ERROR: empty question"
 
169
  prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
 
170
  try:
171
  # The agent uses the 'model' instance we configured above
172
- return smart_agent.run(prompt)
 
 
 
 
 
 
 
173
  except Exception as e:
174
  logger.exception("Agent run error")
175
- # Provide more specific error if possible, e.g., AuthenticationError from OpenAI client
176
  return f"AGENT ERROR: {e}"
177
 
178
  # --- Submission Logic ---
179
- # This part remains largely the same, as it interacts with the external scoring service (DEFAULT_API_URL)
180
- # It just uses the 'agent' which now internally calls GitHub Models.
181
 
182
  def run_and_submit_all(profile: gr.OAuthProfile | None):
183
  if not profile:
@@ -185,60 +210,101 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
185
 
186
  username = profile.username
187
  space_id = os.getenv("SPACE_ID", "")
188
- # Link to the code, unchanged
189
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
190
- # Instantiate the agent wrapper, which now uses the GitHub-configured model
191
- agent = BasicAgent()
192
 
193
  # fetch questions (unchanged)
194
  try:
195
  resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
196
  resp.raise_for_status()
197
- questions = resp.json() or []
 
 
 
 
 
198
  except Exception as e:
199
  logger.exception("Failed fetch")
200
  return f"Error fetching questions: {e}", None
201
 
202
  logs, payload = [], []
203
  for item in questions:
 
 
 
204
  tid = item.get("task_id")
205
  q = item.get("question")
206
  if not tid or not q:
 
207
  continue
208
- # Run the agent (now using GitHub Models)
209
- ans = agent(q)
210
- logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
211
- payload.append({"task_id": tid, "submitted_answer": ans})
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  if not payload:
 
214
  return "Agent did not produce any answers.", pd.DataFrame(logs)
215
 
216
- # submit answers (unchanged)
 
217
  try:
 
 
218
  post = requests.post(
219
  f"{DEFAULT_API_URL}/submit",
220
- json={"username": username, "agent_code": agent_code, "answers": payload},
221
  timeout=60
222
  )
223
  post.raise_for_status()
224
  result = post.json()
 
 
 
 
 
 
 
 
225
  status = (
226
  f"Submission Successful!\n"
227
  f"User: {result.get('username')}\n"
228
- f"Score: {result.get('score','N/A')}%\n"
229
  f"({result.get('correct_count','?')}/"
230
  f"{result.get('total_attempted','?')})\n"
231
  f"Message: {result.get('message','')}"
232
  )
233
  return status, pd.DataFrame(logs)
 
 
 
 
 
 
 
234
  except Exception as e:
235
  logger.exception("Submit failed")
236
- return f"Submission Failed: {e}", pd.DataFrame(logs)
 
237
 
238
  # --- Gradio App ---
 
239
 
240
  with gr.Blocks() as demo:
241
- gr.Markdown("# SmolAgent GAIA Runner (using GitHub Models) 🚀") # Updated title
242
  gr.Markdown("""
243
  **Instructions:**
244
  1. Clone this space.
@@ -246,17 +312,18 @@ with gr.Blocks() as demo:
246
  3. Optionally, set `MODEL_ID` if you want to use a model other than the default (e.g., `openai/gpt-4o`). Verify the correct model identifier for GitHub Models.
247
  4. Log in to Hugging Face.
248
  5. Click **Run Evaluation & Submit All Answers**.
249
- """) # Updated instructions
250
  gr.LoginButton()
251
  btn = gr.Button("Run Evaluation & Submit All Answers")
252
  out_status = gr.Textbox(label="Status", lines=5, interactive=False)
253
- out_table = gr.DataFrame(label="Questions & Answers", wrap=True)
254
  btn.click(run_and_submit_all, outputs=[out_status, out_table])
255
 
256
  if __name__ == "__main__":
257
- # Check GITHUB_TOKEN presence before launching
258
  if not GITHUB_TOKEN:
259
  logger.error("GITHUB_TOKEN environment variable not set. Cannot start.")
260
  else:
261
  logger.info("Launching Gradio App...")
262
- demo.launch(debug=True, share=False) # Set debug=False for production
 
 
 
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
 
7
  from openai import OpenAI
8
 
9
  from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
10
+ # Assuming OpenAIServerModel correctly handles base_url/api_base
11
+ from smolagents.models import OpenAIServerModel
12
 
13
  # --- Logging ---
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 
18
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # Keep this for submission
19
 
20
  # --- GitHub Models Configuration ---
 
21
  GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
22
  if not GITHUB_TOKEN:
 
 
23
  raise RuntimeError("Please set GITHUB_TOKEN in your Space secrets.")
24
 
 
25
  GITHUB_ENDPOINT = "https://models.github.ai/inference"
26
+ # Verify this model ID with GitHub Models documentation. Using mini for potentially faster/cheaper tests.
27
+ MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini") # Changed to mini based on logs
28
 
29
+ # --- Configure OpenAI SDK (Optional - for tools if needed, points to GitHub) ---
30
+ # If tools don't use this client directly, this might be redundant,
31
+ # but it doesn't hurt to have it configured consistently.
 
 
 
 
 
 
 
 
32
  try:
33
  client = OpenAI(
34
  base_url=GITHUB_ENDPOINT,
35
  api_key=GITHUB_TOKEN,
36
  )
 
 
37
  except Exception as e:
38
  logger.error(f"Failed to initialize OpenAI client for GitHub Models: {e}")
39
  # Decide how to handle this - raise error, log warning, etc.
40
+ # For now, just log and proceed, as the agent itself uses OpenAIServerModel
41
+ pass
42
 
43
 
44
  # --- Tools ---
45
+
46
+ # Instantiate the search tool ONCE
47
+ search_tool_instance = DuckDuckGoSearchTool()
48
+
49
+ @tool
50
+ def duckduckgo_search(query: str) -> str:
51
+ """
52
+ Performs a DuckDuckGo search for the given query and returns the results.
53
+ Args:
54
+ query (str): The search query.
55
+ Returns:
56
+ str: The search results.
57
+ """
58
+ try:
59
+ # Call the instantiated search tool
60
+ return search_tool_instance(query=query)
61
+ except Exception as e:
62
+ logger.exception(f"DuckDuckGoSearchTool failed for query: {query}")
63
+ return f"Search Error: {e}"
64
 
65
  @tool
66
  def summarize_query(query: str) -> str:
 
71
  Returns:
72
  str: A concise, improved version.
73
  """
74
+ # Assuming this doesn't need an LLM call. If it did, it would use 'client'.
 
75
  return f"Summarize and reframe: {query}"
76
 
77
  @tool
 
79
  """
80
  Fetches the summary extract of an English Wikipedia page.
81
  Args:
82
+ page (str): e.g. 'Mercedes_Sosa_discography' or 'Mercedes_Sosa'
83
  Returns:
84
+ str: The page’s extract text or an error message.
85
  """
86
+ # Make page names URL-safe (replace spaces with underscores)
87
+ page = page.replace(" ", "_")
88
  try:
89
  url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page}"
90
+ headers = {'User-Agent': 'SmolAgentGAIARunner/1.0 (https://huggingface.co/spaces/YOUR_SPACE_ID)'} # Good practice
91
+ r = requests.get(url, headers=headers, timeout=10)
92
+ r.raise_for_status() # Raises HTTPError for 4xx/5xx
93
+ data = r.json()
94
+ extract = data.get("extract", "")
95
+ if not extract and data.get("title") and data.get("type") == "disambiguation":
96
+ # Handle disambiguation pages better if needed, maybe return links?
97
+ return f"Wikipedia page '{page}' is a disambiguation page. Try a more specific query."
98
+ elif not extract:
99
+ return f"Wikipedia page '{page}' found, but has no summary extract."
100
+ return extract
101
+ except requests.exceptions.HTTPError as e:
102
+ if e.response.status_code == 404:
103
+ logger.warning(f"Wikipedia page not found: {page}")
104
+ return f"Wikipedia page '{page}' not found."
105
+ else:
106
+ logger.exception(f"Wikipedia lookup failed for page: {page}")
107
+ return f"Wikipedia HTTP error {e.response.status_code}: {e}"
108
  except Exception as e:
109
+ logger.exception(f"Wikipedia lookup failed for page: {page}")
110
  return f"Wikipedia error: {e}"
111
 
112
+ # No longer need separate variable names for the functions if they match the @tool name
113
+ # wiki_tool = wikipedia_search # Redundant if function name is clear
114
+ # summarize_tool = summarize_query # Redundant
115
 
116
  # --- ReACT Prompt ---
117
+ # *** IMPORTANT: Update the prompt to use the NEW function name 'duckduckgo_search' ***
118
  instruction_prompt = """
119
  You are a ReACT agent with three tools:
120
+ duckduckgo_search(query: str)
121
  • wikipedia_search(page: str)
122
  • summarize_query(query: str)
123
  Internally, for each question:
124
  1. Thought: decide which tool to call.
125
  2. Action: call the chosen tool.
126
  3. Observation: record the result.
127
+ 4. If empty/irrelevant (e.g., 'page not found', empty search results, or 404 error):
128
+ Thought: Re-evaluate. Should I try summarizing the query first with summarize_query and then searching with duckduckgo_search? Or try a different Wikipedia page name? Or maybe the information isn't available via these tools.
129
+ Action: Call the chosen alternative tool (or conclude if necessary).
130
  Record new Observation.
131
+ 5. Thought: integrate observations. If multiple searches were needed, synthesize the results.
132
  Finally, output your answer with the following template:
133
  FINAL ANSWER: [YOUR FINAL ANSWER].
134
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
135
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
136
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
137
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
138
+ Only output the FINAL ANSWER line once all thinking is done.
139
  """
140
 
141
  # --- Build the Agent with OpenAIServerModel pointing to GitHub Models ---
 
 
 
 
 
 
 
142
  try:
143
+ # Try with base_url first, as it's the modern OpenAI SDK parameter
144
  model = OpenAIServerModel(
145
+ model_id=MODEL_ID,
146
+ api_key=GITHUB_TOKEN,
147
+ base_url=GITHUB_ENDPOINT # Use base_url
148
+ # You might need to pass model_kwargs if specific settings are required
149
+ # model_kwargs={'temperature': 0.7} # Example
 
 
150
  )
151
+ logger.info(f"Configured OpenAIServerModel with GitHub endpoint using 'base_url'.")
152
+ except TypeError:
153
+ logger.warning("Configuring OpenAIServerModel with 'base_url' failed, trying 'api_base'.")
154
+ # Fallback attempt using api_base if base_url caused a TypeError
155
  try:
156
  model = OpenAIServerModel(
157
  model_id=MODEL_ID,
158
  api_key=GITHUB_TOKEN,
159
+ api_base=GITHUB_ENDPOINT # Use api_base
160
  )
161
+ logger.info(f"Successfully configured OpenAIServerModel with GitHub endpoint using 'api_base'.")
162
  except Exception as e:
163
+ logger.error(f"Failed to configure OpenAIServerModel with both 'base_url' and 'api_base': {e}")
164
  raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
165
  except Exception as e:
166
  logger.error(f"Failed to configure OpenAIServerModel: {e}")
167
  raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
168
 
169
+ # *** Pass the list of FUNCTION objects to the CodeAgent ***
170
  smart_agent = CodeAgent(
171
+ tools=[duckduckgo_search, wikipedia_search, summarize_query], # Use the function names directly
172
+ model=model
173
+ # Check smolagents docs if there's a way to pass globals/context for execution
174
+ # e.g., execution_globals={'duckduckgo_search': duckduckgo_search, ...} might be needed
175
+ # but often passing the functions in the 'tools' list is enough if they are decorated correctly.
176
  )
177
 
178
  # --- Gradio Wrapper ---
179
 
180
  class BasicAgent:
181
  def __init__(self):
 
182
  logger.info(f"Initialized SmolAgent with GitHub Model: {MODEL_ID} via {GITHUB_ENDPOINT}")
183
 
184
  def __call__(self, question: str) -> str:
185
  if not question.strip():
186
  return "AGENT ERROR: empty question"
187
+ # Ensure the prompt ends correctly before adding the question
188
  prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
189
+ logger.info(f"Running agent with prompt:\n-------\n{prompt}\n-------")
190
  try:
191
  # The agent uses the 'model' instance we configured above
192
+ result = smart_agent.run(prompt)
193
+ logger.info(f"Agent returned: {result}")
194
+ # Basic check if the agent failed to produce a final answer
195
+ if "FINAL ANSWER:" not in result:
196
+ logger.warning("Agent did not produce a 'FINAL ANSWER:' block.")
197
+ # You might return a generic error or the raw output
198
+ return f"AGENT WARNING: No 'FINAL ANSWER:' found. Raw output: {result}"
199
+ return result # Return the full output including FINAL ANSWER:
200
  except Exception as e:
201
  logger.exception("Agent run error")
 
202
  return f"AGENT ERROR: {e}"
203
 
204
  # --- Submission Logic ---
205
+ # (No changes needed here, it uses the BasicAgent instance)
 
206
 
207
  def run_and_submit_all(profile: gr.OAuthProfile | None):
208
  if not profile:
 
210
 
211
  username = profile.username
212
  space_id = os.getenv("SPACE_ID", "")
 
213
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
214
+ agent = BasicAgent() # Instantiates the agent with the corrected tool setup
 
215
 
216
  # fetch questions (unchanged)
217
  try:
218
  resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
219
  resp.raise_for_status()
220
+ questions_data = resp.json()
221
+ if not isinstance(questions_data, list):
222
+ logger.error(f"Fetched questions is not a list: {questions_data}")
223
+ return "Error: Fetched questions format is incorrect.", None
224
+ questions = questions_data or []
225
+ logger.info(f"Fetched {len(questions)} questions.")
226
  except Exception as e:
227
  logger.exception("Failed fetch")
228
  return f"Error fetching questions: {e}", None
229
 
230
  logs, payload = [], []
231
  for item in questions:
232
+ if not isinstance(item, dict):
233
+ logger.warning(f"Skipping invalid question item: {item}")
234
+ continue
235
  tid = item.get("task_id")
236
  q = item.get("question")
237
  if not tid or not q:
238
+ logger.warning(f"Skipping question with missing task_id or question: {item}")
239
  continue
240
+
241
+ logger.info(f"Processing Task ID: {tid}, Question: {q}")
242
+ ans_raw = agent(q) # Run the agent
243
+
244
+ # Extract only the final answer part for submission
245
+ final_ans_marker = "FINAL ANSWER:"
246
+ if final_ans_marker in ans_raw:
247
+ submitted_ans = ans_raw.split(final_ans_marker, 1)[1].strip()
248
+ elif "AGENT ERROR:" in ans_raw or "AGENT WARNING:" in ans_raw:
249
+ submitted_ans = f"ERROR ({ans_raw})" # Submit error message
250
+ else:
251
+ logger.warning(f"Could not extract final answer from raw output for Task ID {tid}. Raw: {ans_raw}")
252
+ submitted_ans = f"ERROR (Could not parse agent output)" # Fallback
253
+
254
+ logger.info(f"Task ID: {tid}, Submitted Answer: {submitted_ans}")
255
+ logs.append({"Task ID": tid, "Question": q, "Submitted Answer": submitted_ans, "Raw Output": ans_raw})
256
+ payload.append({"task_id": tid, "submitted_answer": submitted_ans})
257
 
258
  if not payload:
259
+ logger.warning("Agent did not produce any valid answers to submit.")
260
  return "Agent did not produce any answers.", pd.DataFrame(logs)
261
 
262
+ logger.info(f"Submitting {len(payload)} answers...")
263
+ # submit answers (unchanged, uses extracted answer)
264
  try:
265
+ submit_payload = {"username": username, "agent_code": agent_code, "answers": payload}
266
+ logger.debug(f"Submission Payload: {submit_payload}") # Log payload for debugging if needed
267
  post = requests.post(
268
  f"{DEFAULT_API_URL}/submit",
269
+ json=submit_payload,
270
  timeout=60
271
  )
272
  post.raise_for_status()
273
  result = post.json()
274
+ logger.info(f"Submission Result: {result}")
275
+ score_percent = result.get('score', 'N/A')
276
+ # Ensure score is formatted reasonably if it's a number
277
+ try:
278
+ score_percent = f"{float(score_percent):.2f}" if score_percent != 'N/A' else 'N/A'
279
+ except (ValueError, TypeError):
280
+ pass # Keep as 'N/A' or original string if conversion fails
281
+
282
  status = (
283
  f"Submission Successful!\n"
284
  f"User: {result.get('username')}\n"
285
+ f"Score: {score_percent}%\n"
286
  f"({result.get('correct_count','?')}/"
287
  f"{result.get('total_attempted','?')})\n"
288
  f"Message: {result.get('message','')}"
289
  )
290
  return status, pd.DataFrame(logs)
291
+ except requests.exceptions.RequestException as e:
292
+ logger.exception("Submit failed")
293
+ # Try to get more info from the response if possible
294
+ error_details = str(e)
295
+ if e.response is not None:
296
+ error_details += f" | Status Code: {e.response.status_code} | Response: {e.response.text[:500]}" # Limit response size
297
+ return f"Submission Failed: {error_details}", pd.DataFrame(logs)
298
  except Exception as e:
299
  logger.exception("Submit failed")
300
+ return f"Submission Failed with unexpected error: {e}", pd.DataFrame(logs)
301
+
302
 
303
  # --- Gradio App ---
304
+ # (No changes needed here)
305
 
306
  with gr.Blocks() as demo:
307
+ gr.Markdown("# SmolAgent GAIA Runner (using GitHub Models) 🚀")
308
  gr.Markdown("""
309
  **Instructions:**
310
  1. Clone this space.
 
312
  3. Optionally, set `MODEL_ID` if you want to use a model other than the default (e.g., `openai/gpt-4o`). Verify the correct model identifier for GitHub Models.
313
  4. Log in to Hugging Face.
314
  5. Click **Run Evaluation & Submit All Answers**.
315
+ """)
316
  gr.LoginButton()
317
  btn = gr.Button("Run Evaluation & Submit All Answers")
318
  out_status = gr.Textbox(label="Status", lines=5, interactive=False)
319
+ out_table = gr.DataFrame(label="Questions & Answers", wrap=True, height=400) # Increased height maybe
320
  btn.click(run_and_submit_all, outputs=[out_status, out_table])
321
 
322
  if __name__ == "__main__":
 
323
  if not GITHUB_TOKEN:
324
  logger.error("GITHUB_TOKEN environment variable not set. Cannot start.")
325
  else:
326
  logger.info("Launching Gradio App...")
327
+ # share=True needed for public link as mentioned in logs
328
+ # debug=True provides more verbose Gradio logging if needed
329
+ demo.launch(debug=True, share=True)