pmeyhoefer commited on
Commit
d7730f0
ยท
verified ยท
1 Parent(s): a0349ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -97
app.py CHANGED
@@ -4,13 +4,14 @@ import traceback
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
- from openai import OpenAI
8
  from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
9
  from smolagents.models import OpenAIServerModel
10
 
 
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
12
  logger = logging.getLogger(__name__)
13
 
 
14
  SUBMISSION_URL = "https://agents-course-unit4-scoring.hf.space"
15
  GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
16
  if not GITHUB_TOKEN:
@@ -18,71 +19,53 @@ if not GITHUB_TOKEN:
18
  GITHUB_ENDPOINT = "https://models.github.ai/inference"
19
  MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini")
20
 
 
21
  try:
22
  search_tool_instance = DuckDuckGoSearchTool()
23
  logger.info("DuckDuckGoSearchTool initialized successfully.")
24
  except Exception as e:
25
- logger.error(f"Failed to instantiate DuckDuckGoSearchTool: {e}. Web search will not work.")
26
  search_tool_instance = None
27
 
28
  @tool
29
  def web_search(query: str) -> str:
30
- """
31
- Performs a web search using DuckDuckGo. Use this for general questions or current info.
32
- Args:
33
- query (str): The search query string.
34
- """
35
- logger.info(f"Executing web_search with query: '{query[:100]}...'")
36
  if search_tool_instance is None:
37
  return "Search Error: Tool not initialized."
38
  try:
39
  result = search_tool_instance(query=query)
40
- logger.info(f"web_search returned {len(result)} chars.")
41
- max_len = 3000
42
- return result[:max_len] + "... (truncated)" if len(result) > max_len else result
43
  except Exception as e:
44
- logger.exception(f"web_search failed for query: {query}")
45
  return f"Search Error: {e}"
46
 
47
  @tool
48
  def wikipedia_lookup(page_title: str) -> str:
49
- """
50
- Fetches the summary intro text of an English Wikipedia page. Use exact titles.
51
- Args:
52
- page_title (str): The exact title of the Wikipedia page (e.g., 'Albert Einstein').
53
- """
54
  page_safe = page_title.replace(" ", "_")
55
- logger.info(f"Executing wikipedia_lookup for page: '{page_title}' (URL: {page_safe})")
56
  try:
57
  url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_safe}"
58
  headers = {'User-Agent': f'GAIAgent/1.2 ({os.getenv("SPACE_ID", "unknown")})'}
59
  r = requests.get(url, headers=headers, timeout=15)
60
  r.raise_for_status()
61
  data = r.json()
62
- extract = data.get("extract", "")
63
- if extract:
64
- logger.info(f"Wikipedia found summary ({len(extract)} chars) for '{page_title}'.")
65
  return extract
66
- else:
67
- page_type = data.get("type", "standard")
68
- title = data.get("title", page_title)
69
- if page_type == "disambiguation":
70
- logger.warning(f"Wikipedia page '{title}' is disambiguation.")
71
- return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
72
- else:
73
- logger.warning(f"Wikipedia page '{title}' found but has no summary.")
74
- return f"Wikipedia Error: Page '{title}' found but has no summary."
75
  except requests.exceptions.HTTPError as e:
76
- if e.response.status_code == 404:
77
- logger.warning(f"Wikipedia page not found: {page_safe}")
78
- return f"Wikipedia Error: Page '{page_safe}' not found."
79
- else:
80
- logger.error(f"Wikipedia HTTP error {e.response.status_code} for {page_safe}")
81
- return f"Wikipedia Error: HTTP {e.response.status_code} for page '{page_safe}'."
82
  except Exception as e:
83
- logger.exception(f"wikipedia_lookup failed for page: {page_safe}")
84
- return f"Wikipedia Error: Unexpected error: {e}"
85
 
 
86
  REACT_INSTRUCTION_PROMPT = """You are a helpful assistant using tools to answer questions.
87
  Available Tools:
88
  - web_search(query: str): Searches the web. Use for general info or current events.
@@ -102,108 +85,123 @@ Formatting Rules for FINAL ANSWER:
102
  Let's begin!
103
  """
104
 
105
- logger.info(f"Initializing LLM connection: {MODEL_ID} @ {GITHUB_ENDPOINT}")
 
106
  try:
107
- logger.info("Attempting to configure OpenAIServerModel with 'api_base' (and no request_timeout)...")
108
  llm_model = OpenAIServerModel(
109
  model_id=MODEL_ID,
110
  api_key=GITHUB_TOKEN,
111
  api_base=GITHUB_ENDPOINT
112
- # Removed request_timeout=60
113
  )
114
- logger.info("LLM connection configured using 'api_base'.")
115
- except Exception as e:
116
- logger.exception("CRITICAL: Failed to configure OpenAIServerModel (tried with api_base)")
117
- raise RuntimeError(f"Could not configure SmolAgents model using api_base: {e}") from e
118
-
119
- logger.info("Initializing CodeAgent...")
120
- try:
121
  agent = CodeAgent(
122
  tools=[web_search, wikipedia_lookup],
123
  model=llm_model
124
  )
125
- logger.info("CodeAgent initialized OK.")
126
  except Exception as e:
127
- logger.exception("CRITICAL: Failed to initialize CodeAgent")
128
- raise RuntimeError(f"Could not initialize CodeAgent: {e}") from e
129
 
130
  def run_agent_on_question(question: str) -> str:
 
131
  question = question.strip()
132
- if not question: return "AGENT_ERROR: Question cannot be empty."
133
-
134
- full_prompt = REACT_INSTRUCTION_PROMPT.strip() + "\n\nQUESTION: " + question
135
- logger.info(f"--- Running Agent for Question: '{question}' ---")
136
- logger.info(f"CRITICAL_DEBUG: Using prompt beginning:\n{full_prompt[:400]}\n...")
137
 
 
138
  try:
139
- raw_result = agent.run(full_prompt)
140
- logger.info(f"Agent run completed. Output length: {len(raw_result)}")
141
- return raw_result
142
  except Exception as e:
143
- logger.exception(f"Agent run failed for question '{question}'")
144
- return f"AGENT_ERROR: Exception during run: {e}\n{traceback.format_exc()}"
145
 
146
  def evaluate_and_submit():
147
- logger.info("๐Ÿš€ Starting evaluation run...")
 
148
  username = os.getenv("HF_USERNAME", "unknown_user")
149
- if username == "unknown_user": logger.warning("Could not get HF username reliably.")
150
- logger.info(f"Running as user (best effort): {username}")
151
-
152
  try:
153
- resp = requests.get(f"{SUBMISSION_URL}/questions", timeout=20)
154
- resp.raise_for_status()
155
- questions = resp.json()
156
- if not isinstance(questions, list): raise ValueError("Invalid format")
157
- logger.info(f"โœ… Fetched {len(questions)} questions.")
158
  except Exception as e:
159
  logger.exception("Failed to fetch questions")
160
  return f"โŒ Error fetching questions: {e}", pd.DataFrame()
161
 
162
- if not questions: return "โ„น๏ธ No questions fetched.", pd.DataFrame()
 
163
 
 
164
  results_log = []
165
  answers_payload = []
 
166
  for i, item in enumerate(questions):
167
- task_id = item.get("task_id"); question_text = item.get("question")
168
- if not task_id or not question_text: continue
169
- logger.info(f"Processing Q {i+1}/{len(questions)} (ID: {task_id})...")
170
- raw_agent_output = run_agent_on_question(question_text)
171
- final_answer = "AGENT_ERROR: No 'FINAL ANSWER:' marker."
172
- marker = "FINAL ANSWER:";
173
- if marker in raw_agent_output: final_answer = raw_agent_output.split(marker, 1)[1].strip()
174
- elif "AGENT_ERROR:" in raw_agent_output: final_answer = raw_agent_output
175
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer, "Full Output": raw_agent_output})
 
 
 
 
 
 
 
 
 
 
 
 
176
  answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
177
 
178
  results_df = pd.DataFrame(results_log)
179
- if not answers_payload: return "โš ๏ธ Agent ran but produced no answers.", results_df
 
180
 
 
181
  logger.info(f"Submitting {len(answers_payload)} answers...")
182
- space_id = os.getenv("SPACE_ID", "NA"); agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id != "NA" else "NA"
183
- submit_data = {"username": username, "agent_code": agent_code_url, "answers": answers_payload}
 
184
  try:
185
- response = requests.post(f"{SUBMISSION_URL}/submit", json=submit_data, timeout=90)
186
- response.raise_for_status(); result = response.json()
187
- logger.info(f"โœ… Submission successful! Response: {result}")
188
- score = result.get('score', 'N/A'); score_str = f"{float(score):.2f}%" if isinstance(score, (int, float)) else str(score)
189
- status = (f"โœ… Success! Score: {score_str} ({result.get('correct_count','?')}/{result.get('total_attempted','?')}). Msg: {result.get('message','')}")
190
- return status, results_df
 
 
 
 
 
191
  except Exception as e:
192
- logger.exception("Submission failed")
193
  err_msg = f"โŒ Submission Failed: {e}"
194
- if hasattr(e, 'response') and e.response is not None: err_msg += f" | Response: {e.response.text[:300]}"
 
195
  return err_msg, results_df
196
 
197
- logger.info("Setting up Gradio interface...")
198
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
199
  gr.Markdown("# ๐Ÿš€ Agent Evaluation Runner ๐Ÿš€\nEnsure `GITHUB_TOKEN` secret is set. Click Run to start.")
200
  run_button = gr.Button("โ–ถ๏ธ Run Evaluation & Submit All Answers", variant="primary")
201
- status_textbox = gr.Textbox(label="๐Ÿ“Š Status", lines=4, interactive=False)
202
- results_df_display = gr.DataFrame(label="๐Ÿ“‹ Detailed Log", headers=["Task ID", "Question", "Submitted Answer", "Full Output"], wrap=True, column_widths=["10%", "25%", "20%", "45%"])
203
- run_button.click(fn=evaluate_and_submit, inputs=None, outputs=[status_textbox, results_df_display])
204
- logger.info("Gradio interface setup complete.")
 
 
 
 
205
 
206
  if __name__ == "__main__":
207
  logger.info("Launching Gradio application...")
208
  demo.launch(debug=True, share=False)
209
- logger.info("Gradio application launched.") __name__ == "__main__": logger.info("Launching Gradio application...") demo.launch(debug=True, share=False) logger.info("Gradio application launched.")
 
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
 
7
  from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
8
  from smolagents.models import OpenAIServerModel
9
 
10
+ # Setup logging
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
12
  logger = logging.getLogger(__name__)
13
 
14
+ # Constants
15
  SUBMISSION_URL = "https://agents-course-unit4-scoring.hf.space"
16
  GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
17
  if not GITHUB_TOKEN:
 
19
  GITHUB_ENDPOINT = "https://models.github.ai/inference"
20
  MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini")
21
 
22
+ # Initialize search tool
23
  try:
24
  search_tool_instance = DuckDuckGoSearchTool()
25
  logger.info("DuckDuckGoSearchTool initialized successfully.")
26
  except Exception as e:
27
+ logger.error(f"Failed to initialize DuckDuckGoSearchTool: {e}")
28
  search_tool_instance = None
29
 
30
  @tool
31
  def web_search(query: str) -> str:
32
+ """Performs a web search using DuckDuckGo."""
33
+ logger.info(f"Searching: '{query[:50]}...'")
 
 
 
 
34
  if search_tool_instance is None:
35
  return "Search Error: Tool not initialized."
36
  try:
37
  result = search_tool_instance(query=query)
38
+ return result[:3000] + "... (truncated)" if len(result) > 3000 else result
 
 
39
  except Exception as e:
40
+ logger.exception("Search failed")
41
  return f"Search Error: {e}"
42
 
43
  @tool
44
  def wikipedia_lookup(page_title: str) -> str:
45
+ """Fetches the summary intro text of an English Wikipedia page."""
 
 
 
 
46
  page_safe = page_title.replace(" ", "_")
47
+ logger.info(f"Wikipedia lookup: '{page_title}'")
48
  try:
49
  url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_safe}"
50
  headers = {'User-Agent': f'GAIAgent/1.2 ({os.getenv("SPACE_ID", "unknown")})'}
51
  r = requests.get(url, headers=headers, timeout=15)
52
  r.raise_for_status()
53
  data = r.json()
54
+
55
+ if extract := data.get("extract", ""):
 
56
  return extract
57
+
58
+ title = data.get("title", page_title)
59
+ if data.get("type") == "disambiguation":
60
+ return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
61
+ return f"Wikipedia Error: Page '{title}' found but has no summary."
 
 
 
 
62
  except requests.exceptions.HTTPError as e:
63
+ status_code = e.response.status_code
64
+ return f"Wikipedia Error: {'Page not found' if status_code == 404 else f'HTTP {status_code}'} for '{page_title}'."
 
 
 
 
65
  except Exception as e:
66
+ return f"Wikipedia Error: {e}"
 
67
 
68
+ # Agent prompt
69
  REACT_INSTRUCTION_PROMPT = """You are a helpful assistant using tools to answer questions.
70
  Available Tools:
71
  - web_search(query: str): Searches the web. Use for general info or current events.
 
85
  Let's begin!
86
  """
87
 
88
+ # Initialize LLM and agent
89
+ logger.info(f"Initializing LLM and agent: {MODEL_ID}")
90
  try:
 
91
  llm_model = OpenAIServerModel(
92
  model_id=MODEL_ID,
93
  api_key=GITHUB_TOKEN,
94
  api_base=GITHUB_ENDPOINT
 
95
  )
96
+
 
 
 
 
 
 
97
  agent = CodeAgent(
98
  tools=[web_search, wikipedia_lookup],
99
  model=llm_model
100
  )
101
+ logger.info("Agent initialization complete")
102
  except Exception as e:
103
+ logger.exception("CRITICAL: Agent initialization failed")
104
+ raise RuntimeError(f"Agent initialization failed: {e}") from e
105
 
106
  def run_agent_on_question(question: str) -> str:
107
+ """Run the agent on a question and return the result."""
108
  question = question.strip()
109
+ if not question:
110
+ return "AGENT_ERROR: Empty question"
 
 
 
111
 
112
+ logger.info(f"Running agent on: '{question}'")
113
  try:
114
+ return agent.run(f"{REACT_INSTRUCTION_PROMPT.strip()}\n\nQUESTION: {question}")
 
 
115
  except Exception as e:
116
+ logger.exception("Agent run failed")
117
+ return f"AGENT_ERROR: {e}\n{traceback.format_exc()}"
118
 
119
  def evaluate_and_submit():
120
+ """Evaluate all questions and submit answers."""
121
+ logger.info("๐Ÿš€ Starting evaluation...")
122
  username = os.getenv("HF_USERNAME", "unknown_user")
123
+
124
+ # Fetch questions
 
125
  try:
126
+ questions = requests.get(f"{SUBMISSION_URL}/questions", timeout=20).json()
127
+ if not isinstance(questions, list):
128
+ raise ValueError("Invalid response format")
129
+ logger.info(f"โœ… Fetched {len(questions)} questions")
 
130
  except Exception as e:
131
  logger.exception("Failed to fetch questions")
132
  return f"โŒ Error fetching questions: {e}", pd.DataFrame()
133
 
134
+ if not questions:
135
+ return "โ„น๏ธ No questions received", pd.DataFrame()
136
 
137
+ # Process questions
138
  results_log = []
139
  answers_payload = []
140
+
141
  for i, item in enumerate(questions):
142
+ task_id, question_text = item.get("task_id"), item.get("question")
143
+ if not task_id or not question_text:
144
+ continue
145
+
146
+ logger.info(f"Processing Q{i+1}/{len(questions)}: ID={task_id}")
147
+ raw_output = run_agent_on_question(question_text)
148
+
149
+ # Extract final answer
150
+ if "FINAL ANSWER:" in raw_output:
151
+ final_answer = raw_output.split("FINAL ANSWER:", 1)[1].strip()
152
+ elif "AGENT_ERROR:" in raw_output:
153
+ final_answer = raw_output
154
+ else:
155
+ final_answer = "AGENT_ERROR: No final answer found"
156
+
157
+ results_log.append({
158
+ "Task ID": task_id,
159
+ "Question": question_text,
160
+ "Submitted Answer": final_answer,
161
+ "Full Output": raw_output
162
+ })
163
  answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
164
 
165
  results_df = pd.DataFrame(results_log)
166
+ if not answers_payload:
167
+ return "โš ๏ธ No answers generated", results_df
168
 
169
+ # Submit answers
170
  logger.info(f"Submitting {len(answers_payload)} answers...")
171
+ space_id = os.getenv("SPACE_ID", "NA")
172
+ agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id != "NA" else "NA"
173
+
174
  try:
175
+ response = requests.post(
176
+ f"{SUBMISSION_URL}/submit",
177
+ json={"username": username, "agent_code": agent_code_url, "answers": answers_payload},
178
+ timeout=90
179
+ ).json()
180
+
181
+ score = response.get('score', 'N/A')
182
+ score_str = f"{float(score):.2f}%" if isinstance(score, (int, float)) else str(score)
183
+ return (f"โœ… Success! Score: {score_str} "
184
+ f"({response.get('correct_count','?')}/{response.get('total_attempted','?')}). "
185
+ f"Msg: {response.get('message','')}"), results_df
186
  except Exception as e:
 
187
  err_msg = f"โŒ Submission Failed: {e}"
188
+ if hasattr(e, 'response') and e.response:
189
+ err_msg += f" | Response: {e.response.text[:300]}"
190
  return err_msg, results_df
191
 
192
+ # Gradio interface
193
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
194
  gr.Markdown("# ๐Ÿš€ Agent Evaluation Runner ๐Ÿš€\nEnsure `GITHUB_TOKEN` secret is set. Click Run to start.")
195
  run_button = gr.Button("โ–ถ๏ธ Run Evaluation & Submit All Answers", variant="primary")
196
+ status_box = gr.Textbox(label="๐Ÿ“Š Status", lines=4, interactive=False)
197
+ results_display = gr.DataFrame(
198
+ label="๐Ÿ“‹ Detailed Log",
199
+ headers=["Task ID", "Question", "Submitted Answer", "Full Output"],
200
+ wrap=True,
201
+ column_widths=["10%", "25%", "20%", "45%"]
202
+ )
203
+ run_button.click(fn=evaluate_and_submit, outputs=[status_box, results_display])
204
 
205
  if __name__ == "__main__":
206
  logger.info("Launching Gradio application...")
207
  demo.launch(debug=True, share=False)