pmeyhoefer commited on
Commit
81d72bd
·
verified ·
1 Parent(s): d2d0f74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -149
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import logging
 
3
 
4
  import gradio as gr
5
  import requests
@@ -7,7 +8,6 @@ import pandas as pd
7
  from openai import OpenAI
8
 
9
  from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
10
- # Assuming OpenAIServerModel correctly handles base_url/api_base
11
  from smolagents.models import OpenAIServerModel
12
 
13
  # --- Logging ---
@@ -15,7 +15,7 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess
15
  logger = logging.getLogger(__name__)
16
 
17
  # --- Constants ---
18
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # Keep this for submission
19
 
20
  # --- GitHub Models Configuration ---
21
  GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
@@ -23,24 +23,19 @@ if not GITHUB_TOKEN:
23
  raise RuntimeError("Please set GITHUB_TOKEN in your Space secrets.")
24
 
25
  GITHUB_ENDPOINT = "https://models.github.ai/inference"
26
- # Verify this model ID with GitHub Models documentation. Using mini for potentially faster/cheaper tests.
27
- MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini") # Changed to mini based on logs
28
 
29
- # --- Configure OpenAI SDK (Optional - for tools if needed, points to GitHub) ---
30
- # If tools don't use this client directly, this might be redundant,
31
- # but it doesn't hurt to have it configured consistently.
32
  try:
33
  client = OpenAI(
34
  base_url=GITHUB_ENDPOINT,
35
  api_key=GITHUB_TOKEN,
36
  )
37
  except Exception as e:
38
- logger.error(f"Failed to initialize OpenAI client for GitHub Models: {e}")
39
- # Decide how to handle this - raise error, log warning, etc.
40
- # For now, just log and proceed, as the agent itself uses OpenAIServerModel
41
  pass
42
 
43
-
44
  # --- Tools ---
45
 
46
  # Instantiate the search tool ONCE
@@ -50,14 +45,23 @@ search_tool_instance = DuckDuckGoSearchTool()
50
  def duckduckgo_search(query: str) -> str:
51
  """
52
  Performs a DuckDuckGo search for the given query and returns the results.
 
53
  Args:
54
  query (str): The search query.
55
  Returns:
56
- str: The search results.
57
  """
 
58
  try:
59
  # Call the instantiated search tool
60
- return search_tool_instance(query=query)
 
 
 
 
 
 
 
61
  except Exception as e:
62
  logger.exception(f"DuckDuckGoSearchTool failed for query: {query}")
63
  return f"Search Error: {e}"
@@ -65,172 +69,188 @@ def duckduckgo_search(query: str) -> str:
65
  @tool
66
  def summarize_query(query: str) -> str:
67
  """
68
- Reframes an unclear search query to improve relevance.
69
  Args:
70
  query (str): The original search query.
71
  Returns:
72
- str: A concise, improved version.
73
  """
74
- # Assuming this doesn't need an LLM call. If it did, it would use 'client'.
 
75
  return f"Summarize and reframe: {query}"
76
 
77
  @tool
78
  def wikipedia_search(page: str) -> str:
79
  """
80
- Fetches the summary extract of an English Wikipedia page.
81
  Args:
82
- page (str): e.g. 'Mercedes_Sosa_discography' or 'Mercedes_Sosa'
83
  Returns:
84
- str: The page’s extract text or an error message.
85
  """
86
- # Make page names URL-safe (replace spaces with underscores)
87
- page = page.replace(" ", "_")
88
  try:
89
- url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page}"
90
- headers = {'User-Agent': 'SmolAgentGAIARunner/1.0 (https://huggingface.co/spaces/YOUR_SPACE_ID)'} # Good practice
91
- r = requests.get(url, headers=headers, timeout=10)
 
 
92
  r.raise_for_status() # Raises HTTPError for 4xx/5xx
93
  data = r.json()
94
  extract = data.get("extract", "")
95
- if not extract and data.get("title") and data.get("type") == "disambiguation":
96
- # Handle disambiguation pages better if needed, maybe return links?
97
- return f"Wikipedia page '{page}' is a disambiguation page. Try a more specific query."
98
- elif not extract:
99
- return f"Wikipedia page '{page}' found, but has no summary extract."
 
 
 
 
 
 
 
 
100
  return extract
101
  except requests.exceptions.HTTPError as e:
102
  if e.response.status_code == 404:
103
- logger.warning(f"Wikipedia page not found: {page}")
104
- return f"Wikipedia page '{page}' not found."
105
  else:
106
- logger.exception(f"Wikipedia lookup failed for page: {page}")
107
- return f"Wikipedia HTTP error {e.response.status_code}: {e}"
 
 
 
108
  except Exception as e:
109
- logger.exception(f"Wikipedia lookup failed for page: {page}")
110
- return f"Wikipedia error: {e}"
111
 
112
- # No longer need separate variable names for the functions if they match the @tool name
113
- # wiki_tool = wikipedia_search # Redundant if function name is clear
114
- # summarize_tool = summarize_query # Redundant
115
 
116
  # --- ReACT Prompt ---
117
- # *** IMPORTANT: Update the prompt to use the NEW function name 'duckduckgo_search' ***
118
  instruction_prompt = """
119
  You are a ReACT agent with three tools:
120
- • duckduckgo_search(query: str)
121
  • wikipedia_search(page: str)
122
  • summarize_query(query: str)
123
  Internally, for each question:
124
- 1. Thought: decide which tool to call.
125
- 2. Action: call the chosen tool.
126
- 3. Observation: record the result.
127
- 4. If empty/irrelevant (e.g., 'page not found', empty search results, or 404 error):
128
- Thought: Re-evaluate. Should I try summarizing the query first with summarize_query and then searching with duckduckgo_search? Or try a different Wikipedia page name? Or maybe the information isn't available via these tools.
129
- Action: Call the chosen alternative tool (or conclude if necessary).
130
- Record new Observation.
131
- 5. Thought: integrate observations. If multiple searches were needed, synthesize the results.
132
- Finally, output your answer with the following template:
133
  FINAL ANSWER: [YOUR FINAL ANSWER].
134
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
135
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
136
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
137
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
138
- Only output the FINAL ANSWER line once all thinking is done.
139
  """
140
 
141
  # --- Build the Agent with OpenAIServerModel pointing to GitHub Models ---
142
  try:
143
- # Try with base_url first, as it's the modern OpenAI SDK parameter
144
  model = OpenAIServerModel(
145
  model_id=MODEL_ID,
146
  api_key=GITHUB_TOKEN,
147
- base_url=GITHUB_ENDPOINT # Use base_url
148
- # You might need to pass model_kwargs if specific settings are required
149
- # model_kwargs={'temperature': 0.7} # Example
150
  )
151
- logger.info(f"Configured OpenAIServerModel with GitHub endpoint using 'base_url'.")
152
- except TypeError:
153
- logger.warning("Configuring OpenAIServerModel with 'base_url' failed, trying 'api_base'.")
154
- # Fallback attempt using api_base if base_url caused a TypeError
155
- try:
156
- model = OpenAIServerModel(
157
- model_id=MODEL_ID,
158
- api_key=GITHUB_TOKEN,
159
- api_base=GITHUB_ENDPOINT # Use api_base
160
- )
161
- logger.info(f"Successfully configured OpenAIServerModel with GitHub endpoint using 'api_base'.")
162
- except Exception as e:
163
- logger.error(f"Failed to configure OpenAIServerModel with both 'base_url' and 'api_base': {e}")
164
- raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
165
  except Exception as e:
166
- logger.error(f"Failed to configure OpenAIServerModel: {e}")
167
  raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
168
 
169
- # *** Pass the list of FUNCTION objects to the CodeAgent ***
170
  smart_agent = CodeAgent(
171
- tools=[duckduckgo_search, wikipedia_search, summarize_query], # Use the function names directly
172
  model=model
173
- # Check smolagents docs if there's a way to pass globals/context for execution
174
- # e.g., execution_globals={'duckduckgo_search': duckduckgo_search, ...} might be needed
175
- # but often passing the functions in the 'tools' list is enough if they are decorated correctly.
176
  )
 
177
 
178
  # --- Gradio Wrapper ---
179
 
180
  class BasicAgent:
181
  def __init__(self):
182
- logger.info(f"Initialized SmolAgent with GitHub Model: {MODEL_ID} via {GITHUB_ENDPOINT}")
183
 
184
  def __call__(self, question: str) -> str:
185
- if not question.strip():
 
 
186
  return "AGENT ERROR: empty question"
187
- # Ensure the prompt ends correctly before adding the question
188
- prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
189
- logger.info(f"Running agent with prompt:\n-------\n{prompt}\n-------")
 
 
 
190
  try:
191
- # The agent uses the 'model' instance we configured above
 
192
  result = smart_agent.run(prompt)
193
- logger.info(f"Agent returned: {result}")
194
- # Basic check if the agent failed to produce a final answer
 
 
 
195
  if "FINAL ANSWER:" not in result:
196
- logger.warning("Agent did not produce a 'FINAL ANSWER:' block.")
197
- # You might return a generic error or the raw output
198
- return f"AGENT WARNING: No 'FINAL ANSWER:' found. Raw output: {result}"
199
- return result # Return the full output including FINAL ANSWER:
 
 
 
200
  except Exception as e:
201
- logger.exception("Agent run error")
202
- return f"AGENT ERROR: {e}"
 
 
203
 
204
  # --- Submission Logic ---
205
- # (No changes needed here, it uses the BasicAgent instance)
206
 
207
  def run_and_submit_all(profile: gr.OAuthProfile | None):
208
  if not profile:
209
- return "Please log in to Hugging Face.", None
 
210
 
211
  username = profile.username
212
  space_id = os.getenv("SPACE_ID", "")
213
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
214
- agent = BasicAgent() # Instantiates the agent with the corrected tool setup
 
 
 
215
 
216
- # fetch questions (unchanged)
217
  try:
218
- resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
 
219
  resp.raise_for_status()
220
  questions_data = resp.json()
221
  if not isinstance(questions_data, list):
222
- logger.error(f"Fetched questions is not a list: {questions_data}")
223
- return "Error: Fetched questions format is incorrect.", None
224
  questions = questions_data or []
225
- logger.info(f"Fetched {len(questions)} questions.")
226
  except Exception as e:
227
- logger.exception("Failed fetch")
228
  return f"Error fetching questions: {e}", None
229
 
 
 
 
 
230
  logs, payload = [], []
231
- for item in questions:
 
232
  if not isinstance(item, dict):
233
- logger.warning(f"Skipping invalid question item: {item}")
234
  continue
235
  tid = item.get("task_id")
236
  q = item.get("question")
@@ -238,92 +258,114 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
238
  logger.warning(f"Skipping question with missing task_id or question: {item}")
239
  continue
240
 
241
- logger.info(f"Processing Task ID: {tid}, Question: {q}")
242
  ans_raw = agent(q) # Run the agent
243
 
244
  # Extract only the final answer part for submission
245
  final_ans_marker = "FINAL ANSWER:"
 
246
  if final_ans_marker in ans_raw:
 
247
  submitted_ans = ans_raw.split(final_ans_marker, 1)[1].strip()
248
- elif "AGENT ERROR:" in ans_raw or "AGENT WARNING:" in ans_raw:
249
- submitted_ans = f"ERROR ({ans_raw})" # Submit error message
 
 
 
 
 
250
  else:
251
- logger.warning(f"Could not extract final answer from raw output for Task ID {tid}. Raw: {ans_raw}")
252
- submitted_ans = f"ERROR (Could not parse agent output)" # Fallback
253
-
254
- logger.info(f"Task ID: {tid}, Submitted Answer: {submitted_ans}")
255
- logs.append({"Task ID": tid, "Question": q, "Submitted Answer": submitted_ans, "Raw Output": ans_raw})
 
 
 
 
 
256
  payload.append({"task_id": tid, "submitted_answer": submitted_ans})
257
 
258
  if not payload:
259
  logger.warning("Agent did not produce any valid answers to submit.")
260
- return "Agent did not produce any answers.", pd.DataFrame(logs)
 
 
 
 
261
 
262
- logger.info(f"Submitting {len(payload)} answers...")
263
- # submit answers (unchanged, uses extracted answer)
 
264
  try:
265
  submit_payload = {"username": username, "agent_code": agent_code, "answers": payload}
266
- logger.debug(f"Submission Payload: {submit_payload}") # Log payload for debugging if needed
267
  post = requests.post(
268
  f"{DEFAULT_API_URL}/submit",
269
  json=submit_payload,
270
- timeout=60
271
  )
272
- post.raise_for_status()
273
  result = post.json()
274
- logger.info(f"Submission Result: {result}")
 
275
  score_percent = result.get('score', 'N/A')
276
- # Ensure score is formatted reasonably if it's a number
277
- try:
278
- score_percent = f"{float(score_percent):.2f}" if score_percent != 'N/A' else 'N/A'
279
- except (ValueError, TypeError):
280
- pass # Keep as 'N/A' or original string if conversion fails
281
 
282
  status = (
283
  f"Submission Successful!\n"
284
- f"User: {result.get('username')}\n"
285
  f"Score: {score_percent}%\n"
286
- f"({result.get('correct_count','?')}/"
287
- f"{result.get('total_attempted','?')})\n"
288
- f"Message: {result.get('message','')}"
289
  )
290
- return status, pd.DataFrame(logs)
 
 
291
  except requests.exceptions.RequestException as e:
292
- logger.exception("Submit failed")
293
- # Try to get more info from the response if possible
294
  error_details = str(e)
295
  if e.response is not None:
296
- error_details += f" | Status Code: {e.response.status_code} | Response: {e.response.text[:500]}" # Limit response size
297
- return f"Submission Failed: {error_details}", pd.DataFrame(logs)
298
  except Exception as e:
299
- logger.exception("Submit failed")
300
- return f"Submission Failed with unexpected error: {e}", pd.DataFrame(logs)
301
 
302
 
303
  # --- Gradio App ---
304
- # (No changes needed here)
305
 
306
  with gr.Blocks() as demo:
307
  gr.Markdown("# SmolAgent GAIA Runner (using GitHub Models) 🚀")
308
  gr.Markdown("""
309
  **Instructions:**
310
- 1. Clone this space.
311
- 2. In Settings Secrets, add `GITHUB_TOKEN` (your GitHub access token with appropriate permissions for GitHub Models).
312
- 3. Optionally, set `MODEL_ID` if you want to use a model other than the default (e.g., `openai/gpt-4o`). Verify the correct model identifier for GitHub Models.
313
- 4. Log in to Hugging Face.
314
- 5. Click **Run Evaluation & Submit All Answers**.
315
  """)
316
  gr.LoginButton()
317
  btn = gr.Button("Run Evaluation & Submit All Answers")
318
- out_status = gr.Textbox(label="Status", lines=5, interactive=False)
319
- out_table = gr.DataFrame(label="Questions & Answers", wrap=True, height=400) # Increased height maybe
320
- btn.click(run_and_submit_all, outputs=[out_status, out_table])
 
 
 
 
 
 
 
321
 
322
  if __name__ == "__main__":
323
  if not GITHUB_TOKEN:
324
- logger.error("GITHUB_TOKEN environment variable not set. Cannot start.")
325
- else:
326
- logger.info("Launching Gradio App...")
327
- # share=True needed for public link as mentioned in logs
328
- # debug=True provides more verbose Gradio logging if needed
329
- demo.launch(debug=True, share=True)
 
 
1
  import os
2
  import logging
3
+ import traceback # Import traceback for better error logging
4
 
5
  import gradio as gr
6
  import requests
 
8
  from openai import OpenAI
9
 
10
  from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
 
11
  from smolagents.models import OpenAIServerModel
12
 
13
  # --- Logging ---
 
15
  logger = logging.getLogger(__name__)
16
 
17
  # --- Constants ---
18
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
20
  # --- GitHub Models Configuration ---
21
  GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
 
23
  raise RuntimeError("Please set GITHUB_TOKEN in your Space secrets.")
24
 
25
  GITHUB_ENDPOINT = "https://models.github.ai/inference"
26
+ MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini") # Using mini as per logs
 
27
 
28
+ # --- Configure OpenAI SDK (Optional) ---
29
+ # Less critical if tools don't directly use it
 
30
  try:
31
  client = OpenAI(
32
  base_url=GITHUB_ENDPOINT,
33
  api_key=GITHUB_TOKEN,
34
  )
35
  except Exception as e:
36
+ logger.error(f"Ignoring error during optional OpenAI client init for GitHub Models: {e}")
 
 
37
  pass
38
 
 
39
  # --- Tools ---
40
 
41
  # Instantiate the search tool ONCE
 
45
  def duckduckgo_search(query: str) -> str:
46
  """
47
  Performs a DuckDuckGo search for the given query and returns the results.
48
+ Use this for general web searches.
49
  Args:
50
  query (str): The search query.
51
  Returns:
52
+ str: The search results, or an error message.
53
  """
54
+ logger.info(f"Executing duckduckgo_search with query: {query}")
55
  try:
56
  # Call the instantiated search tool
57
+ result = search_tool_instance(query=query)
58
+ logger.info(f"DuckDuckGo search returned {len(result)} characters.")
59
+ # Maybe truncate long results if they cause issues downstream?
60
+ # max_len = 2000
61
+ # if len(result) > max_len:
62
+ # logger.warning(f"Truncating DuckDuckGo result from {len(result)} to {max_len} chars.")
63
+ # result = result[:max_len] + "... (truncated)"
64
+ return result
65
  except Exception as e:
66
  logger.exception(f"DuckDuckGoSearchTool failed for query: {query}")
67
  return f"Search Error: {e}"
 
69
  @tool
70
  def summarize_query(query: str) -> str:
71
  """
72
+ Reframes an unclear search query to improve relevance. Often useful before calling duckduckgo_search if the initial query is vague.
73
  Args:
74
  query (str): The original search query.
75
  Returns:
76
+ str: A concise, improved version prepended with 'Summarize and reframe:'.
77
  """
78
+ logger.info(f"Executing summarize_query with query: {query}")
79
+ # This still doesn't use an LLM, it's just a placeholder/reframing instruction
80
  return f"Summarize and reframe: {query}"
81
 
82
  @tool
83
  def wikipedia_search(page: str) -> str:
84
  """
85
+ Fetches the summary extract of an English Wikipedia page. Use specific page titles.
86
  Args:
87
+ page (str): The exact Wikipedia page title (e.g., 'Mercedes_Sosa', 'List_of_Mercedes_Sosa_albums'). Spaces will be replaced by underscores.
88
  Returns:
89
+ str: The page’s extract text or an error message (e.g., 'Wikipedia page '[page]' not found.').
90
  """
91
+ page_safe = page.replace(" ", "_")
92
+ logger.info(f"Executing wikipedia_search with page: {page} (URL-safe: {page_safe})")
93
  try:
94
+ url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_safe}"
95
+ # Add a more specific user agent if running in HF Spaces
96
+ space_id = os.getenv("SPACE_ID", "unknown-space")
97
+ headers = {'User-Agent': f'SmolAgentGAIARunner/1.1 ({space_id})'}
98
+ r = requests.get(url, headers=headers, timeout=12)
99
  r.raise_for_status() # Raises HTTPError for 4xx/5xx
100
  data = r.json()
101
  extract = data.get("extract", "")
102
+ if not extract:
103
+ # Handle disambiguation or empty pages
104
+ page_title = data.get("title", page)
105
+ page_type = data.get("type", "standard")
106
+ if page_type == "disambiguation":
107
+ logger.warning(f"Wikipedia page '{page_title}' is a disambiguation page.")
108
+ # Try to get description which might list options
109
+ description = data.get("description", "disambiguation page.")
110
+ return f"Wikipedia page '{page_title}' is a {description}. Try a more specific page title."
111
+ else: # Standard page but no extract
112
+ logger.warning(f"Wikipedia page '{page_title}' found, but has no summary extract.")
113
+ return f"Wikipedia page '{page_title}' found, but has no summary extract."
114
+ logger.info(f"Wikipedia search for '{page}' returned {len(extract)} characters.")
115
  return extract
116
  except requests.exceptions.HTTPError as e:
117
  if e.response.status_code == 404:
118
+ logger.warning(f"Wikipedia page not found: {page_safe}")
119
+ return f"Wikipedia page '{page_safe}' not found."
120
  else:
121
+ logger.exception(f"Wikipedia lookup failed for page: {page_safe} with status {e.response.status_code}")
122
+ return f"Wikipedia HTTP error {e.response.status_code} for page '{page_safe}': {e}"
123
+ except requests.exceptions.RequestException as e:
124
+ logger.exception(f"Wikipedia network request failed for page: {page_safe}")
125
+ return f"Wikipedia network error for page '{page_safe}': {e}"
126
  except Exception as e:
127
+ logger.exception(f"Unexpected Wikipedia lookup error for page: {page_safe}")
128
+ return f"Unexpected Wikipedia error for page '{page_safe}': {e}"
129
 
 
 
 
130
 
131
  # --- ReACT Prompt ---
132
+ # *** THIS IS THE CRITICAL FIX: Ensure the tool name here matches the @tool function ***
133
  instruction_prompt = """
134
  You are a ReACT agent with three tools:
135
+ • duckduckgo_search(query: str) # Correct function name
136
  • wikipedia_search(page: str)
137
  • summarize_query(query: str)
138
  Internally, for each question:
139
+ 1. Thought: Decide which tool is most appropriate. If searching the web, use duckduckgo_search. If looking for encyclopedic info on a specific topic/entity, try wikipedia_search first with the most likely page title. If a search or lookup fails or returns irrelevant info, think about why and try reformulating the query or using a different tool. Maybe use summarize_query on a complex question before searching.
140
+ 2. Action: Call the chosen tool with the correct arguments. For wikipedia_search, use page titles like 'Entity_Name' or 'List_of_Entity_Albums'.
141
+ 3. Observation: Record the result returned by the tool. Note error messages like 'page not found' or 'Search Error'.
142
+ 4. Thought: Analyze the observation. Was the information found? Is it relevant? If not, what should be the next step? Try duckduckgo_search if Wikipedia failed? Try a different Wikipedia page title (e.g., 'List_of_Mercedes_Sosa_albums' instead of 'Mercedes_Sosa_discography')? If search results are messy, maybe try summarize_query on the topic and search again?
143
+ 5. Action: Execute the next action based on the thought.
144
+ 6. Repeat steps 3-5 until the answer is found or you determine it cannot be found with the available tools.
145
+ 7. Thought: Synthesize all observations into a final answer based *only* on the information gathered.
146
+ Finally, output your answer with the following template *exactly*:
 
147
  FINAL ANSWER: [YOUR FINAL ANSWER].
148
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
149
+ If you are asked for a number, output only the number (e.g., 42). No commas in numbers (e.g., 1000 not 1,000). No units ($ or %).
150
+ If you are asked for a string, use minimal words, no articles (a, an, the), no abbreviations (e.g., New York City not NYC). Write digits as words (e.g., seven not 7) unless the question implies numerical output.
151
+ If you are asked for a comma separated list, apply the above rules to each element. Example: red,blue,three.
 
152
  """
153
 
154
  # --- Build the Agent with OpenAIServerModel pointing to GitHub Models ---
155
  try:
 
156
  model = OpenAIServerModel(
157
  model_id=MODEL_ID,
158
  api_key=GITHUB_TOKEN,
159
+ base_url=GITHUB_ENDPOINT,
160
+ # Add timeout if needed, e.g., request_timeout=60
161
+ # Add model_kwargs if needed, e.g. model_kwargs={'temperature': 0.5}
162
  )
163
+ logger.info(f"Configured OpenAIServerModel(id={MODEL_ID}, endpoint={GITHUB_ENDPOINT})")
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  except Exception as e:
165
+ logger.exception("Failed to configure OpenAIServerModel")
166
  raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
167
 
168
+ # Pass the list of FUNCTION objects decorated with @tool
169
  smart_agent = CodeAgent(
170
+ tools=[duckduckgo_search, wikipedia_search, summarize_query],
171
  model=model
 
 
 
172
  )
173
+ logger.info(f"CodeAgent initialized with tools: {[t.__name__ for t in smart_agent.tools]}")
174
 
175
  # --- Gradio Wrapper ---
176
 
177
  class BasicAgent:
178
  def __init__(self):
179
+ logger.info(f"BasicAgent initialized, using SmolAgent with model {MODEL_ID}")
180
 
181
  def __call__(self, question: str) -> str:
182
+ question = question.strip()
183
+ if not question:
184
+ logger.error("Agent called with empty question.")
185
  return "AGENT ERROR: empty question"
186
+
187
+ # Use the updated instruction_prompt
188
+ prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question
189
+ # Log the exact prompt being sent (optional, can be verbose)
190
+ # logger.debug(f"--- Sending Prompt to Agent ---\n{prompt}\n-----------------------------")
191
+
192
  try:
193
+ logger.info(f"Running agent for question: '{question}'")
194
+ # The agent uses the 'model' instance and tools configured above
195
  result = smart_agent.run(prompt)
196
+ # Log the raw result (optional, can be verbose)
197
+ # logger.debug(f"--- Raw Agent Result ---\n{result}\n--------------------------")
198
+ logger.info(f"Agent finished run for question: '{question}'")
199
+
200
+ # Basic check if the agent failed to produce a final answer format
201
  if "FINAL ANSWER:" not in result:
202
+ logger.warning(f"Agent output for question '{question}' did not contain 'FINAL ANSWER:'. Raw output: {result}")
203
+ # Decide how to handle this - return error or raw output?
204
+ # Returning raw output might be better for debugging but fail submission check.
205
+ # Let's return a specific error for submission.
206
+ return f"AGENT ERROR: Malformed response - No 'FINAL ANSWER:' block found."
207
+ return result # Return the full raw output including thought process and FINAL ANSWER
208
+
209
  except Exception as e:
210
+ logger.exception(f"Agent run failed for question '{question}'")
211
+ # Get traceback details
212
+ tb_str = traceback.format_exc()
213
+ return f"AGENT ERROR: Exception during run: {e}\nTraceback:\n{tb_str}"
214
 
215
  # --- Submission Logic ---
 
216
 
217
  def run_and_submit_all(profile: gr.OAuthProfile | None):
218
  if not profile:
219
+ logger.warning("Submission attempt failed: User not logged in.")
220
+ return "Please log in to Hugging Face to submit.", None
221
 
222
  username = profile.username
223
  space_id = os.getenv("SPACE_ID", "")
224
+ if not space_id:
225
+ logger.warning("SPACE_ID environment variable not set. Agent code URL will be incomplete.")
226
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Agent code URL unavailable (SPACE_ID not set)"
227
+ logger.info(f"Starting evaluation run for user '{username}'")
228
+ agent = BasicAgent()
229
 
230
+ # Fetch questions
231
  try:
232
+ logger.info(f"Fetching questions from {DEFAULT_API_URL}/questions")
233
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
234
  resp.raise_for_status()
235
  questions_data = resp.json()
236
  if not isinstance(questions_data, list):
237
+ logger.error(f"Fetched questions is not a list: {type(questions_data)}")
238
+ return f"Error: Fetched questions format is incorrect (expected list, got {type(questions_data)}).", None
239
  questions = questions_data or []
240
+ logger.info(f"Fetched {len(questions)} questions successfully.")
241
  except Exception as e:
242
+ logger.exception("Failed to fetch questions")
243
  return f"Error fetching questions: {e}", None
244
 
245
+ if not questions:
246
+ logger.warning("No questions fetched or questions list is empty.")
247
+ return "No questions were fetched from the server.", None
248
+
249
  logs, payload = [], []
250
+ question_count = len(questions)
251
+ for i, item in enumerate(questions):
252
  if not isinstance(item, dict):
253
+ logger.warning(f"Skipping invalid question item (not a dict): {item}")
254
  continue
255
  tid = item.get("task_id")
256
  q = item.get("question")
 
258
  logger.warning(f"Skipping question with missing task_id or question: {item}")
259
  continue
260
 
261
+ logger.info(f"Processing question {i+1}/{question_count} - Task ID: {tid}")
262
  ans_raw = agent(q) # Run the agent
263
 
264
  # Extract only the final answer part for submission
265
  final_ans_marker = "FINAL ANSWER:"
266
+ submitted_ans = f"ERROR (Agent did not produce output with {final_ans_marker})" # Default if parsing fails
267
  if final_ans_marker in ans_raw:
268
+ # Split and take the part *after* the marker
269
  submitted_ans = ans_raw.split(final_ans_marker, 1)[1].strip()
270
+ # Optional: Basic validation/cleanup of the extracted answer?
271
+ # e.g., remove leading/trailing quotes if not needed
272
+ # submitted_ans = submitted_ans.strip(' "')
273
+ elif "AGENT ERROR:" in ans_raw:
274
+ # If agent returned an error string, submit that
275
+ submitted_ans = ans_raw # Keep the AGENT ERROR message
276
+ logger.warning(f"Agent returned an error for Task ID {tid}: {submitted_ans}")
277
  else:
278
+ logger.warning(f"Could not extract final answer from raw output for Task ID {tid}. Raw: {ans_raw[:500]}...") # Log snippet
279
+
280
+ logger.info(f"Task ID: {tid}, Question: '{q}', Submitted Answer: '{submitted_ans}'")
281
+ # Store more info for the Gradio table, including the raw output for debugging
282
+ logs.append({
283
+ "Task ID": tid,
284
+ "Question": q,
285
+ "Submitted Answer": submitted_ans,
286
+ "Agent Raw Output": ans_raw # Show the full thought process in the table
287
+ })
288
  payload.append({"task_id": tid, "submitted_answer": submitted_ans})
289
 
290
  if not payload:
291
  logger.warning("Agent did not produce any valid answers to submit.")
292
+ # Check if logs have entries to display potential errors
293
+ if logs:
294
+ return "Agent ran but did not produce any answers in the expected format.", pd.DataFrame(logs)
295
+ else:
296
+ return "Agent did not produce any answers.", None
297
 
298
+
299
+ logger.info(f"Submitting {len(payload)} answers for user '{username}'...")
300
+ # Submit answers
301
  try:
302
  submit_payload = {"username": username, "agent_code": agent_code, "answers": payload}
303
+ # logger.debug(f"Submission Payload: {submit_payload}") # Careful logging PII
304
  post = requests.post(
305
  f"{DEFAULT_API_URL}/submit",
306
  json=submit_payload,
307
+ timeout=90 # Increased timeout for submission
308
  )
309
+ post.raise_for_status() # Check for HTTP errors from submission endpoint
310
  result = post.json()
311
+ logger.info(f"Submission successful. Result: {result}")
312
+
313
  score_percent = result.get('score', 'N/A')
314
+ try: # Format score nicely
315
+ score_percent = f"{float(score_percent):.2f}" if isinstance(score_percent, (int, float)) else score_percent
316
+ except (ValueError, TypeError): pass
 
 
317
 
318
  status = (
319
  f"Submission Successful!\n"
320
+ f"User: {result.get('username', 'N/A')}\n"
321
  f"Score: {score_percent}%\n"
322
+ f"Correct: {result.get('correct_count','?')} / Attempted: {result.get('total_attempted','?')}\n"
323
+ f"Message: {result.get('message','(No message)')}"
 
324
  )
325
+ # Update logs DataFrame with final status if needed, though usually not necessary
326
+ return status, pd.DataFrame(logs) # Return status and the detailed logs
327
+
328
  except requests.exceptions.RequestException as e:
329
+ logger.exception("Submission request failed")
 
330
  error_details = str(e)
331
  if e.response is not None:
332
+ error_details += f" | Status Code: {e.response.status_code} | Response: {e.response.text[:500]}"
333
+ return f"Submission Failed: {error_details}", pd.DataFrame(logs) # Return error and logs
334
  except Exception as e:
335
+ logger.exception("Submission failed with unexpected error")
336
+ return f"Submission Failed with unexpected error: {e}", pd.DataFrame(logs) # Return error and logs
337
 
338
 
339
  # --- Gradio App ---
 
340
 
341
  with gr.Blocks() as demo:
342
  gr.Markdown("# SmolAgent GAIA Runner (using GitHub Models) 🚀")
343
  gr.Markdown("""
344
  **Instructions:**
345
+ 1. Ensure `GITHUB_TOKEN` secret is set. Optionally set `MODEL_ID`.
346
+ 2. Log in to Hugging Face below.
347
+ 3. Click **Run Evaluation & Submit All Answers**.
348
+ 4. Check the Status and the Questions & Answers table for results. The raw agent output includes the thinking process.
 
349
  """)
350
  gr.LoginButton()
351
  btn = gr.Button("Run Evaluation & Submit All Answers")
352
+ out_status = gr.Textbox(label="Submission Status", lines=5, interactive=False)
353
+ # *** FIX: Remove the 'height' argument ***
354
+ out_table = gr.DataFrame(
355
+ label="Questions & Answers Log",
356
+ wrap=True,
357
+ # Add headers if you want to control column names/order explicitly
358
+ headers=["Task ID", "Question", "Submitted Answer", "Agent Raw Output"],
359
+ column_widths=["10%", "30%", "20%", "40%"] # Adjust widths as needed
360
+ )
361
+ btn.click(run_and_submit_all, outputs=[out_status, out_table], api_name="run_submit") # Add api_name
362
 
363
  if __name__ == "__main__":
364
  if not GITHUB_TOKEN:
365
+ logger.error("GITHUB_TOKEN environment variable not set. Cannot start effectively.")
366
+ # Optionally raise error or exit? For now, just log.
367
+ logger.info("Launching Gradio App...")
368
+ # share=True is needed for public link if running on HF Spaces
369
+ # debug=True provides more verbose Gradio logging
370
+ demo.launch(debug=True, share=True)
371
+