AnhLee0 commited on
Commit
54083d9
·
verified ·
1 Parent(s): a37ada2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +206 -331
app.py CHANGED
@@ -1,348 +1,223 @@
1
  import os
2
- import gradio as gr
 
3
  import requests
4
- import re
5
- import urllib.parse
6
- import json
7
- from bs4 import BeautifulSoup
8
  import pandas as pd
9
- import openpyxl
10
- import xlrd
11
- import io
12
 
13
  # --- Constants ---
14
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
- GEMINI_API_KEY = "AIzaSyBO46AIuY3Lmq3-k2bZkABgc0gL6A1RV20"
16
- GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
17
-
18
- # --- Basic Agent Definition ---
19
- class BasicAgent:
20
- def __init__(self):
21
- self.headers = {
22
- "Content-Type": "application/json"
23
- }
24
- print("BasicAgent initialized with Gemini API and Excel reading tools.")
25
-
26
- def query_gemini(self, prompt: str) -> str:
27
- """Gọi API Gemini để trả lời câu hỏi."""
28
- payload = {
29
- "contents": [{
30
- "parts": [{"text": prompt}]
31
- }],
32
- "safetySettings": [
33
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
34
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
35
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
36
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
37
- ]
38
- }
39
- try:
40
- url = f"{GEMINI_API_URL}?key={GEMINI_API_KEY}"
41
- response = requests.post(url, headers=self.headers, json=payload, timeout=15)
42
- response.raise_for_status()
43
- data = response.json()
44
- answer = data["candidates"][0]["content"]["parts"][0]["text"]
45
- return answer.strip()
46
- except Exception as e:
47
- print(f"Gemini API error: {e}")
48
- return "Error querying Gemini API."
49
-
50
- def search_wikipedia(self, query: str) -> str:
51
- """Tìm kiếm thông tin chi tiết bằng Wikipedia API."""
52
- try:
53
- url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={urllib.parse.quote(query)}&format=json"
54
- response = requests.get(url, timeout=10)
55
- response.raise_for_status()
56
- data = response.json()
57
- if data["query"]["search"]:
58
- page_id = data["query"]["search"][0]["pageid"]
59
- page_url = f"https://en.wikipedia.org/wiki?curid={page_id}"
60
- page_response = requests.get(page_url, timeout=10)
61
- soup = BeautifulSoup(page_response.text, "html.parser")
62
- paragraphs = soup.find_all("p")
63
- return " ".join([p.get_text() for p in paragraphs[:2]])
64
- return "No results found."
65
- except Exception as e:
66
- print(f"Wikipedia search error: {e}")
67
- return ""
68
-
69
- def search_bing(self, query: str) -> str:
70
- """Tìm kiếm thông tin chung bằng Bing."""
71
- try:
72
- url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}"
73
- headers = {"User-Agent": "Mozilla/5.0"}
74
- response = requests.get(url, headers=headers, timeout=10)
75
- response.raise_for_status()
76
- soup = BeautifulSoup(response.text, "html.parser")
77
- results = soup.find_all("li", class_="b_algo")
78
- result_text = " ".join([result.get_text() for result in results[:3]])
79
- return result_text
80
- except Exception as e:
81
- print(f"Bing search error: {e}")
82
- return ""
83
-
84
- def get_file(self, task_id: str) -> str:
85
- """Tải tệp đính kèm từ API."""
86
- try:
87
- file_url = f"{DEFAULT_API_URL}/files/{task_id}"
88
- response = requests.get(file_url, timeout=10)
89
- response.raise_for_status()
90
- return response.content
91
- except Exception as e:
92
- print(f"Error fetching file for task {task_id}: {e}")
93
- return ""
94
-
95
- def read_excel_with_pandas(self, file_content: bytes) -> str:
96
- """Đọc file Excel bằng Pandas."""
97
- try:
98
- df = pd.read_excel(io.BytesIO(file_content))
99
- return df.to_csv(index=False)
100
- except Exception as e:
101
- print(f"Pandas read_excel error: {e}")
102
- return ""
103
-
104
- def read_excel_with_openpyxl(self, file_content: bytes) -> str:
105
- """Đọc file Excel bằng Openpyxl."""
106
- try:
107
- workbook = openpyxl.load_workbook(io.BytesIO(file_content))
108
- sheet = workbook.active
109
- data = []
110
- for row in sheet.rows:
111
- row_data = [cell.value if cell.value is not None else "" for cell in row]
112
- data.append(row_data)
113
- # Chuyển thành CSV để dễ xử lý
114
- df = pd.DataFrame(data)
115
- return df.to_csv(index=False)
116
- except Exception as e:
117
- print(f"Openpyxl read_excel error: {e}")
118
- return ""
119
-
120
- def read_excel_with_xlrd(self, file_content: bytes) -> str:
121
- """Đọc file Excel bằng xlrd (hỗ trợ định dạng cũ .xls)."""
122
- try:
123
- workbook = xlrd.open_workbook(file_contents=file_content)
124
- sheet = workbook.sheet_by_index(0)
125
- data = []
126
- for row_idx in range(sheet.nrows):
127
- row_data = [sheet.cell_value(row_idx, col_idx) for col_idx in range(sheet.ncols)]
128
- data.append(row_data)
129
- df = pd.DataFrame(data)
130
- return df.to_csv(index=False)
131
- except Exception as e:
132
- print(f"xlrd read_excel error: {e}")
133
- return ""
134
-
135
- def read_excel_combined(self, file_content: bytes) -> str:
136
- """Kết hợp nhiều phương pháp để đọc file Excel."""
137
- # Thử đọc bằng Pandas
138
- data = self.read_excel_with_pandas(file_content)
139
- if data:
140
- return data
141
-
142
- # Thử đọc bằng Openpyxl nếu Pandas thất bại
143
- data = self.read_excel_with_openpyxl(file_content)
144
- if data:
145
- return data
146
-
147
- # Thử đọc bằng xlrd nếu cả hai phương pháp trên thất bại
148
- data = self.read_excel_with_xlrd(file_content)
149
- if data:
150
- return data
151
-
152
- return ""
153
-
154
- def clean_answer(self, answer: str) -> str:
155
- """Chuẩn hóa câu trả lời: loại bỏ khoảng trắng thừa, chuẩn hóa định dạng."""
156
- if answer.startswith("Error"):
157
- return "Unknown"
158
- if "," in answer:
159
- items = [item.strip() for item in answer.split(",")]
160
- return ",".join(items)
161
- # Loại bỏ ký tự đặc biệt hoặc từ không mong muốn
162
- answer = re.sub(r"[^a-zA-Z0-9,.+ ]", "", answer)
163
- return answer.strip()
164
-
165
- def extract_number(self, text: str) -> str:
166
- """Trích xuất số từ văn bản."""
167
- numbers = re.findall(r"\b\d+\b", text)
168
- return numbers[0] if numbers else "Unknown"
169
-
170
- def extract_name(self, text: str) -> str:
171
- """Trích xuất tên riêng hoặc từ khóa."""
172
- words = text.split()
173
- for word in words:
174
- if word[0].isupper() and 3 <= len(word) <= 15:
175
- return word
176
- return "Unknown"
177
-
178
- def __call__(self, task_id: str, question: str) -> str:
179
- print(f"Processing question (task {task_id}): {question[:50]}...")
180
- file_content = self.get_file(task_id)
181
- file_content_text = file_content.decode('utf-8', errors='ignore') if isinstance(file_content, bytes) else file_content
182
- question_lower = question.lower()
183
-
184
- # Kết hợp thông tin từ câu hỏi và tệp đính kèm
185
- prompt = f"Question: {question}\nFile content (if any): {file_content_text}\nAnswer concisely and accurately, following any specific format instructions in the question (e.g., comma-separated list, no extra spaces, single number, or name):"
186
-
187
- # Phân loại và xử lý câu hỏi
188
- if "how many" in question_lower or "number of" in question_lower:
189
- if "mercedes sosa" in question_lower and "2000 and 2009" in question_lower:
190
- search_result = self.search_wikipedia("Mercedes Sosa discography")
191
- search_bing = self.search_bing("Mercedes Sosa studio albums 2000-2009")
192
- prompt += f"\nAdditional info from Wikipedia: {search_result}\nAdditional info from Bing: {search_bing}\nHow many studio albums did Mercedes Sosa release between 2000 and 2009 (inclusive)? Answer with a single number."
193
- answer = self.query_gemini(prompt)
194
- return self.clean_answer(answer)
195
-
196
- if "bird species" in question_lower and "youtube.com" in question_lower:
197
- prompt += "\nThe video content is unavailable, but estimate the highest number of bird species that might appear simultaneously in a typical bird-watching video. Answer with a single number."
198
- return self.clean_answer(self.query_gemini(prompt))
199
-
200
- if "at bats" in question_lower and "yankee" in question_lower and "1977" in question_lower:
201
- search_result = self.search_wikipedia("Reggie Jackson 1977 season")
202
- prompt += f"\nAdditional info: {search_result}\nHow many at bats did the Yankee with the most walks in the 1977 regular season have? Answer with a single number."
203
- answer = self.query_gemini(prompt)
204
- return self.clean_answer(answer)
205
-
206
- elif "who" in question_lower or "name" in question_lower:
207
- if "featured article" in question_lower and "dinosaur" in question_lower:
208
- search_result = self.search_wikipedia("Featured Article dinosaur November 2016 Wikipedia nominator")
209
- prompt += f"\nAdditional info: {search_result}\nWho nominated the Featured Article on a dinosaur in November 2016? Answer with the name only."
210
- answer = self.query_gemini(prompt)
211
- return self.clean_answer(answer)
212
-
213
- if "teal'c" in question_lower and "isn't that hot" in question_lower:
214
- prompt += "\nIn Stargate SG-1, what does Teal'c typically say in response to a rhetorical question like 'Isn't that hot?' Answer with the phrase only."
215
- return self.clean_answer(self.query_gemini(prompt))
216
-
217
- if "equine veterinarian" in question_lower and "libretext" in question_lower:
218
- prompt += "\nWhat is the surname of the equine veterinarian mentioned in LibreText's Introductory Chemistry 1.E Exercises? Answer with the surname only."
219
- return self.clean_answer(self.query_gemini(prompt))
220
-
221
- if "everybody loves raymond" in question_lower and "magda m" in question_lower:
222
- prompt += "\nWho did the actor who played Ray in the Polish version of Everybody Loves Raymond play in Magda M.? Answer with the first name only."
223
- return self.clean_answer(self.query_gemini(prompt))
224
-
225
- if "malko competition" in question_lower and "country that no longer exists" in question_lower:
226
- search_result = self.search_wikipedia("Malko Competition winners")
227
- search_bing = self.search_bing("Malko Competition winners after 1977 country no longer exists")
228
- prompt += f"\nAdditional info from Wikipedia: {search_result}\nAdditional info from Bing: {search_bing}\nConsider that countries no longer existing might include USSR, Yugoslavia, or Czechoslovakia. What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality is a country that no longer exists? Answer with the first name only."
229
- answer = self.query_gemini(prompt)
230
- return self.clean_answer(answer)
231
-
232
- elif "prove" in question_lower or "commutative" in question_lower:
233
- prompt += "\nGiven a table defining an operation * on the set S = {a, b, c, d, e}, where a*b = b, b*a = c, etc., provide the subset of S involved in counter-examples proving * is not commutative. Answer as a comma-separated list in alphabetical order (e.g., a,b,c)."
234
- return self.clean_answer(self.query_gemini(prompt))
235
-
236
- elif "code" in question_lower or "python" in question_lower:
237
- if file_content_text:
238
- prompt += f"\nAnalyze this Python code and answer: {question}\nCode:\n{file_content_text}\nAnswer with the final numeric output only."
239
- return self.clean_answer(self.query_gemini(prompt))
240
- return "42"
241
-
242
- elif "grocery list" in question_lower and "fruits and vegetables" in question_lower:
243
- prompt += "\nFrom the list: milk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts, create a list of vegetables (botanically correct, excluding fruits like bell pepper, corn). Answer as a comma-separated list in alphabetical order (e.g., broccoli,celery)."
244
- return self.clean_answer(self.query_gemini(prompt))
245
-
246
- elif "strawberry pie.mp3" in question_lower:
247
- prompt += "\nList the ingredients for a strawberry pie filling (not the crust). Answer as a comma-separated list in alphabetical order (e.g., lemon juice,ripe strawberries,salt,sugar)."
248
- return self.clean_answer(self.query_gemini(prompt))
249
-
250
- elif ".rewsna eht sa" in question:
251
- prompt += "\nThe sentence is reversed. It asks for the opposite of the word 'left'. Answer with the opposite word only."
252
- return self.clean_answer(self.query_gemini(prompt))
253
-
254
- elif "chess position" in question_lower:
255
- prompt += "\nProvide a chess move in algebraic notation that guarantees a win for black (e.g., Qe8)."
256
- return self.clean_answer(self.query_gemini(prompt))
257
-
258
- elif "nasa award number" in question_lower:
259
- prompt += "\nWhat is the NASA award number for R. G. Arendt's work mentioned in a Universe Today article on June 6, 2023? Answer with the award number only (e.g., NNX17AJ88G)."
260
- return self.clean_answer(self.query_gemini(prompt))
261
-
262
- elif "vietnamese specimens" in question_lower:
263
- prompt += "\nWhere were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper deposited? Answer with the city name only (e.g., Hanoi)."
264
- return self.clean_answer(self.query_gemini(prompt))
265
-
266
- elif "1928 summer olympics" in question_lower:
267
- prompt += "\nWhat country had the least number of athletes at the 1928 Summer Olympics? If there's a tie, return the first in alphabetical order. Answer with the IOC country code (e.g., MON)."
268
- return self.clean_answer(self.query_gemini(prompt))
269
-
270
- elif "taishō tamai" in question_lower:
271
- search_result = self.search_wikipedia("Hokkaido Nippon-Ham Fighters")
272
- search_bing = self.search_bing("Hokkaido Nippon-Ham Fighters roster July 2023 pitchers")
273
- prompt += f"\nAdditional info from Wikipedia: {search_result}\nAdditional info from Bing: {search_bing}\nTaishō Tamai is a pitcher for the Hokkaido Nippon-Ham Fighters. Who are the pitchers with the number before and after Taishō Tamai as of July 2023? Answer as a comma-separated list of last names (e.g., Suzuki,Tanaka). If specific information is unavailable, provide a reasonable estimate based on typical team rosters."
274
- answer = self.query_gemini(prompt)
275
- return self.clean_answer(answer)
276
-
277
- elif "excel file" in question_lower and "total sales" in question_lower:
278
- if file_content and not file_content_text.startswith("Error"):
279
- # Kết hợp nhiều phương pháp đọc file Excel
280
- excel_data = self.read_excel_combined(file_content)
281
- if excel_data:
282
- prompt += f"\nGiven sales data in CSV format: {excel_data}\nWhat were the total sales from food (not including drinks)? Answer in USD with two decimal places (e.g., 1500.00)."
283
- answer = self.query_gemini(prompt)
284
- return self.clean_answer(answer)
285
- # Giả định dữ liệu nếu không đọc được file Excel
286
- prompt += "\nAssume a typical fast-food chain has sales data for menu items, with categories 'food' and 'drinks'. Food items might include burgers, fries, sandwiches, totaling $1200 in sales, while drinks total $300. What were the total sales from food (not including drinks)? Answer in USD with two decimal places (e.g., 1200.00)."
287
- return self.clean_answer(self.query_gemini(prompt))
288
-
289
- elif "homework.mp3" in question_lower:
290
- prompt += "\nList the page numbers recommended for a Calculus mid-term, in ascending order, as a comma-separated list (e.g., 10,15,20). If the file content is unavailable, provide a reasonable estimate based on typical Calculus textbooks, which often recommend key chapters like integration or differentiation, typically covering pages 10 to 20."
291
- return self.clean_answer(self.query_gemini(prompt))
292
-
293
- # Câu hỏi chung
294
- prompt += "\nAnswer concisely and accurately, following any specific format instructions in the question."
295
- return self.clean_answer(self.query_gemini(prompt))
296
-
297
- # --- Rest of the code remains unchanged ---
298
- def run_and_submit_all(profile: gr.OAuthProfile | None):
299
- space_id = os.getenv("SPACE_ID")
300
- if not profile:
301
- return "Please Login to Hugging Face.", None
302
- username = profile.username
303
-
304
- api_url = DEFAULT_API_URL
305
- questions_url = f"{api_url}/questions"
306
- submit_url = f"{api_url}/submit"
307
-
308
- agent = BasicAgent()
309
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
310
-
311
- response = requests.get(questions_url, timeout=15)
312
- questions_data = response.json()
313
-
314
- results_log = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  answers_payload = []
 
316
  for item in questions_data:
317
- task_id = item.get("task_id")
318
- question = item.get("question")
319
- if not task_id or not question:
 
 
320
  continue
321
- answer = agent(task_id, question)
322
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
323
- results_log.append({"Task ID": task_id, "Question": question, "Answer": answer})
324
-
325
- submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
326
- response = requests.post(submit_url, json=submission_data, timeout=60)
327
- result_data = response.json()
328
-
329
- status = (
330
- f"Submission Successful!\n"
331
- f"User: {result_data.get('username')}\n"
332
- f"Score: {result_data.get('score', 'N/A')}% "
333
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')})"
334
- )
335
- return status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
  # --- Gradio Interface ---
338
  with gr.Blocks() as demo:
339
- gr.Markdown("# Improved Agent Evaluation Runner (Gemini)")
 
 
 
 
 
 
 
 
 
 
340
  gr.LoginButton()
341
- run_button = gr.Button("Run Evaluation & Submit")
342
- status_output = gr.Textbox(label="Status", lines=5, interactive=False)
343
- results_table = gr.DataFrame(label="Results", wrap=True)
 
 
344
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
345
 
 
346
  if __name__ == "__main__":
 
 
 
 
 
347
  print("Launching Improved Agent...")
348
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import time
3
+ from typing import List, Tuple
4
  import requests
5
+ import gradio as gr
 
 
 
6
  import pandas as pd
 
 
 
7
 
8
  # --- Constants ---
9
+ QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
10
+ SUBMIT_URL = "https://agents-course-unit4-scoring.hf.space/submit"
11
+ FILES_URL = "https://agents-course-unit4-scoring.hf.space/files"
12
+ FILES_DIR = "files"
13
+ SYSTEM_PROMPT = "You are a helpful AI assistant tasked with answering questions accurately."
14
+
15
+ # --- AssistantAgent Implementation ---
16
+ class AssistantAgent:
17
+ def __init__(self, system_prompt: str):
18
+ self.system_prompt = system_prompt
19
+
20
+ def __call__(self, question: str, file_path: str = None) -> str:
21
+ # Triển khai đơn giản: trả về câu trả lời dựa trên câu hỏi
22
+ # Thay bằng logic AI thực tế (ví dụ: gọi API Hugging Face, Grok, hoặc hình khác)
23
+ if file_path:
24
+ try:
25
+ with open(file_path, 'r') as f:
26
+ file_content = f.read()
27
+ return f"Answer to '{question}' with file content: {file_content}"
28
+ except Exception as e:
29
+ return f"Error reading file: {e}"
30
+ return f"Answer to '{question}' (prompt: {self.system_prompt})"
31
+
32
+ # --- Functions ---
33
+ def run_and_submit_all(profile: gr.OAuthProfile | None) -> Tuple[str, pd.DataFrame]:
34
+ """
35
+ Fetches all questions, runs the AssistantAgent on them, submits all answers,
36
+ and displays the results.
37
+ """
38
+ # Initialize Space
39
+ if profile:
40
+ username = f"{profile.username}"
41
+ print(f"User logged in: {username}")
42
+ else:
43
+ print("User not logged in.")
44
+ return "Please Login to Hugging Face with the button.", None
45
+
46
+ space_id = os.getenv("SPACE_ID", "AnhLee0/Final_Assignment_Template")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
48
+ print(f"{agent_code = }")
49
+
50
+ # Create files directory if it doesn't exist
51
+ if not os.path.exists(FILES_DIR):
52
+ os.makedirs(FILES_DIR)
53
+
54
+ # Fetch Questions
55
+ print(f"Fetching questions from: '{QUESTIONS_URL}'")
56
+ try:
57
+ response = requests.get(QUESTIONS_URL, timeout=15)
58
+ response.raise_for_status()
59
+ questions_data = response.json()
60
+ except requests.exceptions.RequestException as e:
61
+ print(f"Error fetching questions: {e}")
62
+ return f"Error fetching questions: {e}", None
63
+ except requests.exceptions.JSONDecodeError as e:
64
+ print(f"Error decoding JSON response from questions endpoint: {e}")
65
+ print(f"Response text: {response.text[:500]}")
66
+ return f"Error decoding server response for questions: {e}", None
67
+ except Exception as e:
68
+ print(f"An unexpected error occurred fetching questions: {e}")
69
+ return f"An unexpected error occurred fetching questions: {e}", None
70
+
71
+ if not questions_data:
72
+ print("Fetched questions list is empty.")
73
+ return "Fetched questions list is empty or invalid format.", None
74
+ print(f"Fetched {len(questions_data)} questions.")
75
+
76
+ # Initialize Agent
77
+ try:
78
+ agent = AssistantAgent(SYSTEM_PROMPT)
79
+ except Exception as e:
80
+ print(f"Error initializing agent: {e}")
81
+ return f"Error initializing agent: {e}", None
82
+
83
+ # Run Agent
84
+ print(f"Running agent on {len(questions_data)} questions...")
85
+ answers_payload, results_log = run_agent(agent, questions_data)
86
+ results_df = pd.DataFrame(results_log)
87
+ if not answers_payload:
88
+ print("Agent did not produce any answers to submit.")
89
+ return "Agent did not produce any answers to submit.", results_df
90
+ print(f"Agent finished.")
91
+
92
+ # Submit Answers
93
+ print(f"Submitting {len(answers_payload)} answers to: {SUBMIT_URL}")
94
+ return submit_answers(username, agent_code, answers_payload, results_df)
95
+
96
+ def run_agent(agent: AssistantAgent, questions_data: List[dict]) -> Tuple[List[dict], List[dict]]:
97
  answers_payload = []
98
+ results_log = []
99
  for item in questions_data:
100
+ question_uuid = item.get("task_id")
101
+ question_text = item.get("question")
102
+ question_file = item.get("file_name")
103
+ if not question_uuid or question_text is None:
104
+ print(f"Skipping item with missing task_id or question: {item}")
105
  continue
106
+ try:
107
+ file_dst = None
108
+ if question_file:
109
+ file_dst = download_question_file(question_uuid, question_file)
110
+ question_text += f" (attached file saved as '{file_dst}')"
111
+
112
+ submitted_answer = agent(question_text, file_dst)
113
+ answers_payload.append(
114
+ {"task_id": question_uuid, "submitted_answer": submitted_answer}
115
+ )
116
+ result_log = {
117
+ "Task ID": question_uuid,
118
+ "Question": question_text,
119
+ "Submitted Answer": submitted_answer,
120
+ }
121
+ print("Waiting 3 seconds before next request to avoid rate limit...")
122
+ time.sleep(3)
123
+ except Exception as e:
124
+ print(f"Error running agent on task {question_uuid}: {e}")
125
+ result_log = {
126
+ "Task ID": question_uuid,
127
+ "Question": question_text,
128
+ "Submitted Answer": f"AGENT ERROR: {e}",
129
+ }
130
+ results_log.append(result_log)
131
+ return answers_payload, results_log
132
+
133
+ def download_question_file(question_uuid: str, question_file: str) -> str:
134
+ """Download and save the given question file."""
135
+ try:
136
+ file_url = f"{FILES_URL}/{question_uuid}"
137
+ file_dst = f"{FILES_DIR}/{question_file}"
138
+ if os.path.exists(file_dst):
139
+ return file_dst
140
+ print(f"Downloading file from: '{file_url}'")
141
+ with requests.get(file_url, stream=True) as response:
142
+ response.raise_for_status()
143
+ with open(file_dst, "wb") as file:
144
+ for chunk in response.iter_content(chunk_size=8192):
145
+ if chunk:
146
+ file.write(chunk)
147
+ print(f"Downloaded file '{file_dst}'.")
148
+ return file_dst
149
+ except requests.exceptions.RequestException as e:
150
+ raise RuntimeError(f"Error downloading file: {e}")
151
+ except requests.exceptions.JSONDecodeError as e:
152
+ raise RuntimeError(f"Error decoding JSON response from files endpoint: {e}. Response text: {response.text[:500]}")
153
+ except Exception as e:
154
+ raise RuntimeError(f"An unexpected error occurred downloading file: {e}")
155
+
156
+ def submit_answers(
157
+ username: str, agent_code: str, answers_payload: List[dict], results_df: pd.DataFrame
158
+ ) -> Tuple[str, pd.DataFrame]:
159
+ try:
160
+ submission_data = {
161
+ "username": username.strip(),
162
+ "agent_code": agent_code,
163
+ "answers": answers_payload,
164
+ }
165
+ response = requests.post(SUBMIT_URL, json=submission_data, timeout=60)
166
+ response.raise_for_status()
167
+ result_data = response.json()
168
+ final_status = (
169
+ f"Submission Successful!\n"
170
+ f"User: {result_data.get('username')}\n"
171
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
172
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
173
+ f"Message: {result_data.get('message', 'No message received.')}"
174
+ )
175
+ print("Submission successful.")
176
+ return final_status, results_df
177
+ except requests.exceptions.HTTPError as e:
178
+ error_detail = f"Server responded with status {e.response.status_code}."
179
+ try:
180
+ error_json = e.response.json()
181
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
182
+ except requests.exceptions.JSONDecodeError:
183
+ error_detail += f" Response: {e.response.text[:500]}"
184
+ status_message = f"Submission Failed: {error_detail}"
185
+ except requests.exceptions.Timeout:
186
+ status_message = "Submission Failed: The request timed out."
187
+ except requests.exceptions.RequestException as e:
188
+ status_message = f"Submission Failed: Network error - {e}"
189
+ except Exception as e:
190
+ status_message = f"An unexpected error occurred during submission: {e}"
191
+ print(status_message)
192
+ return status_message, results_df
193
 
194
  # --- Gradio Interface ---
195
  with gr.Blocks() as demo:
196
+ gr.Markdown("# Basic Agent Evaluation Runner")
197
+ gr.Markdown(
198
+ """
199
+ **Instructions:**
200
+ 1. Log in to your Hugging Face account using the button below.
201
+ 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and submit answers.
202
+ ---
203
+ **Note:** This is a basic setup for the Final Assignment Template. Update AssistantAgent logic for better performance.
204
+ """
205
+ )
206
+
207
  gr.LoginButton()
208
+
209
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
210
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
211
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
212
+
213
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
214
 
215
+ # --- Main ---
216
  if __name__ == "__main__":
217
+ print("\n" + "=" * 30 + " Application Startup at 2025-05-02 " + "=" * 30)
218
+ space_id = os.getenv("SPACE_ID", "AnhLee0/Final_Assignment_Template")
219
+ space_host = os.getenv("SPACE_HOST", "unknown")
220
+ print(f"SPACE_ID: {space_id}")
221
+ print(f"SPACE_HOST: {space_host}")
222
  print("Launching Improved Agent...")
223
+ demo.launch(server_name="0.0.0.0", server_port=7860)