AnhLee0 commited on
Commit
5415e44
·
verified ·
1 Parent(s): bc49549

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +205 -67
app.py CHANGED
@@ -7,6 +7,10 @@ import pandas as pd
7
  import mimetypes
8
  import speech_recognition as sr
9
  from pydub import AudioSegment
 
 
 
 
10
 
11
  # --- Constants ---
12
  QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
@@ -14,7 +18,7 @@ SUBMIT_URL = "https://agents-course-unit4-scoring.hf.space/submit"
14
  FILES_URL = "https://agents-course-unit4-scoring.hf.space/files"
15
  FILES_DIR = "files"
16
  SYSTEM_PROMPT = "You are a helpful AI assistant tasked with answering questions accurately. Provide concise and accurate answers in the format requested by the question."
17
- GEMINI_API_KEY = "AIzaSyBvImpFo9o5Dz8OL_mfFEoRijeUyYBvXiI"
18
  GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
19
 
20
  # --- AssistantAgent Implementation ---
@@ -22,13 +26,21 @@ class AssistantAgent:
22
  def __init__(self, system_prompt: str):
23
  self.system_prompt = system_prompt
24
  self.headers = {"Content-Type": "application/json"}
 
 
25
 
26
  def call_gemini_api(self, prompt: str) -> str:
27
  retry_delay = 5 # Chờ 5 giây nếu gặp lỗi quota
28
  payload = {
29
  "contents": [{
30
  "parts": [{"text": prompt}]
31
- }]
 
 
 
 
 
 
32
  }
33
  for attempt in range(3):
34
  try:
@@ -45,6 +57,143 @@ class AssistantAgent:
45
  return f"Error calling Gemini API: {e}"
46
  return "Error: Exceeded retry attempts due to quota limits."
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def check_commutative(self, table: str) -> str:
49
  try:
50
  rows = table.strip().split('\n')
@@ -85,7 +234,7 @@ class AssistantAgent:
85
  non_commutative.add(a)
86
  non_commutative.add(b)
87
 
88
- return ", ".join(sorted(non_commutative)) if non_commutative else "No counter-examples found"
89
  except Exception as e:
90
  return f"Error processing table: {e}"
91
 
@@ -94,85 +243,94 @@ class AssistantAgent:
94
  botanical_fruits = {"plums", "corn", "bell pepper", "zucchini"}
95
  vegetables = sorted([item for item in all_items if item not in botanical_fruits and item in {
96
  "sweet potatoes", "fresh basil", "green beans", "broccoli", "celery", "lettuce"}])
97
- return ", ".join(vegetables)
98
 
99
  def analyze_python_code(self, code: str) -> str:
100
  if "keep_trying" in code and "randint" in code:
101
  return "0"
102
  return "Error: Could not analyze Python code."
103
 
104
- def process_excel_sales(self, file_path: str) -> str:
 
 
 
 
 
105
  try:
106
- df = pd.read_excel(file_path, engine='openpyxl')
107
  if 'Category' in df.columns and 'Sales' in df.columns:
108
- food_sales = df[df['Category'] == 'Food']['Sales'].sum()
109
  return f"{food_sales:.2f}"
110
  else:
111
  return "Error: Excel file does not contain required columns (Category, Sales)."
112
  except Exception as e:
113
- return f"Error reading Excel file: {e}"
114
-
115
- def process_questions_batch(self, questions: List[Tuple[str, str]]) -> List[str]:
116
- batch_size = 5 # 5 câu hỏi mỗi batch
117
- answers = []
118
- for i in range(0, len(questions), batch_size):
119
- batch = questions[i:i + batch_size]
120
- prompt = f"{self.system_prompt}\nAnswer the following questions concisely:\n"
121
- for idx, (question, _) in enumerate(batch, 1):
122
- prompt += f"{idx}. {question}\n"
123
-
124
- batch_answers = self.call_gemini_api(prompt)
125
- if "Error" in batch_answers:
126
- answers.extend([batch_answers] * len(batch))
127
- else:
128
- batch_answers = batch_answers.split('\n')
129
- for idx in range(len(batch)):
130
- answer = batch_answers[idx].split('. ', 1)[1] if idx < len(batch_answers) and '. ' in batch_answers[idx] else "Error: Could not parse answer."
131
- answers.append(answer)
132
- if i + batch_size < len(questions):
133
- print("Waiting 5 seconds before next batch to avoid rate limit...")
134
- time.sleep(5) # Giảm từ 60 giây xuống 5 giây
135
- return answers
136
 
137
- def process_file(self, question: str, file_path: str) -> str:
138
  mime_type, _ = mimetypes.guess_type(file_path)
139
  if mime_type and mime_type.startswith('text'):
140
  try:
141
- with open(file_path, 'r', encoding='utf-8') as f:
142
- file_content = f.read()
143
  if file_path.endswith('.py') and "What is the final numeric output" in question:
144
- return self.analyze_python_code(file_content)
145
- return f"{question}\nFile content:\n{file_content}"
146
  except UnicodeDecodeError as e:
147
  return f"Error reading file: {e}. File may not be a valid text file."
148
  except Exception as e:
149
  return f"Error reading file: {e}"
 
150
  elif mime_type and mime_type == 'audio/mpeg':
151
  try:
152
- audio = AudioSegment.from_mp3(file_path)
153
- wav_path = file_path.replace('.mp3', '.wav')
 
 
 
154
  audio.export(wav_path, format="wav")
155
 
156
  recognizer = sr.Recognizer()
157
  with sr.AudioFile(wav_path) as source:
158
  audio_data = recognizer.record(source)
159
  text = recognizer.recognize_google(audio_data)
 
160
  os.remove(wav_path)
161
  return f"{question}\nAudio transcript: {text}"
162
  except Exception as e:
163
  return f"Error processing audio file: {e}"
 
164
  elif mime_type and mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
165
  if "total sales" in question.lower():
166
- return self.process_excel_sales(file_path)
167
- try:
168
- df = pd.read_excel(file_path, engine='openpyxl')
169
- file_content = df.to_string(index=False)
170
- return f"{question}\nExcel content:\n{file_content}"
171
- except Exception as e:
172
- return f"Error reading Excel file: {e}"
173
  else:
174
  return "Error: Gemini API does not support non-text files (e.g., images). Please provide a text description instead."
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  def __call__(self, question: str, file_path: str = None) -> str:
177
  if "provide the subset of S involved in any possible counter-examples" in question:
178
  table = question.split("provide the subset")[0].strip()
@@ -254,10 +412,10 @@ def run_agent(agent: AssistantAgent, questions_data: List[dict]) -> Tuple[List[d
254
  print(f"Skipping item with missing task_id or question: {item}")
255
  continue
256
 
257
- file_dst = None
258
  if question_file:
259
- file_dst = download_question_file(question_uuid, question_file)
260
- processed_question = agent(question_text, file_dst)
261
  else:
262
  processed_question = agent(question_text, None)
263
 
@@ -280,26 +438,6 @@ def run_agent(agent: AssistantAgent, questions_data: List[dict]) -> Tuple[List[d
280
 
281
  return answers_payload, results_log
282
 
283
- def download_question_file(question_uuid: str, question_file: str) -> str:
284
- try:
285
- file_url = f"{FILES_URL}/{question_uuid}"
286
- file_dst = f"{FILES_DIR}/{question_file}"
287
- if os.path.exists(file_dst):
288
- return file_dst
289
- print(f"Downloading file from: '{file_url}'")
290
- with requests.get(file_url, stream=True) as response:
291
- response.raise_for_status()
292
- with open(file_dst, "wb") as file:
293
- for chunk in response.iter_content(chunk_size=8192):
294
- if chunk:
295
- file.write(chunk)
296
- print(f"Downloaded file '{file_dst}'.")
297
- return file_dst
298
- except requests.exceptions.RequestException as e:
299
- raise RuntimeError(f"Error downloading file: {e}")
300
- except Exception as e:
301
- raise RuntimeError(f"An unexpected error occurred downloading file: {e}")
302
-
303
  def submit_answers(
304
  username: str, agent_code: str, answers_payload: List[dict], results_df: pd.DataFrame
305
  ) -> Tuple[str, pd.DataFrame]:
 
7
  import mimetypes
8
  import speech_recognition as sr
9
  from pydub import AudioSegment
10
+ import io
11
+ import openpyxl
12
+ import xlrd
13
+ from bs4 import BeautifulSoup
14
 
15
  # --- Constants ---
16
  QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
 
18
  FILES_URL = "https://agents-course-unit4-scoring.hf.space/files"
19
  FILES_DIR = "files"
20
  SYSTEM_PROMPT = "You are a helpful AI assistant tasked with answering questions accurately. Provide concise and accurate answers in the format requested by the question."
21
+ GEMINI_API_KEY = "AIzaSyBO46AIuY3Lmq3-k2bZkABgc0gL6A1RV20"
22
  GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
23
 
24
  # --- AssistantAgent Implementation ---
 
26
  def __init__(self, system_prompt: str):
27
  self.system_prompt = system_prompt
28
  self.headers = {"Content-Type": "application/json"}
29
+ if not os.path.exists(FILES_DIR):
30
+ os.makedirs(FILES_DIR)
31
 
32
  def call_gemini_api(self, prompt: str) -> str:
33
  retry_delay = 5 # Chờ 5 giây nếu gặp lỗi quota
34
  payload = {
35
  "contents": [{
36
  "parts": [{"text": prompt}]
37
+ }],
38
+ "safetySettings": [
39
+ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
40
+ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
41
+ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
42
+ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
43
+ ]
44
  }
45
  for attempt in range(3):
46
  try:
 
57
  return f"Error calling Gemini API: {e}"
58
  return "Error: Exceeded retry attempts due to quota limits."
59
 
60
+ def search_wikipedia(self, query: str) -> str:
61
+ """Tìm kiếm thông tin chi tiết bằng Wikipedia API."""
62
+ try:
63
+ url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={urllib.parse.quote(query)}&format=json"
64
+ response = requests.get(url, timeout=10)
65
+ response.raise_for_status()
66
+ data = response.json()
67
+ if data["query"]["search"]:
68
+ page_id = data["query"]["search"][0]["pageid"]
69
+ page_url = f"https://en.wikipedia.org/wiki?curid={page_id}"
70
+ page_response = requests.get(page_url, timeout=10)
71
+ soup = BeautifulSoup(page_response.text, "html.parser")
72
+ paragraphs = soup.find_all("p")
73
+ return " ".join([p.get_text() for p in paragraphs[:2]])
74
+ return "No results found."
75
+ except Exception as e:
76
+ print(f"Wikipedia search error: {e}")
77
+ return ""
78
+
79
+ def search_bing(self, query: str) -> str:
80
+ """Tìm kiếm thông tin chung bằng Bing."""
81
+ try:
82
+ url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}"
83
+ headers = {"User-Agent": "Mozilla/5.0"}
84
+ response = requests.get(url, headers=headers, timeout=10)
85
+ response.raise_for_status()
86
+ soup = BeautifulSoup(response.text, "html.parser")
87
+ results = soup.find_all("li", class_="b_algo")
88
+ result_text = " ".join([result.get_text() for result in results[:3]])
89
+ return result_text
90
+ except Exception as e:
91
+ print(f"Bing search error: {e}")
92
+ return ""
93
+
94
+ def download_file_from_url(self, file_url: str, file_dst: str) -> bool:
95
+ """Tải file từ URL và lưu vào đích."""
96
+ try:
97
+ with requests.get(file_url, stream=True, timeout=15) as response:
98
+ response.raise_for_status()
99
+ with open(file_dst, "wb") as file:
100
+ for chunk in response.iter_content(chunk_size=8192):
101
+ if chunk:
102
+ file.write(chunk)
103
+ print(f"Downloaded file '{file_dst}'.")
104
+ return True
105
+ except Exception as e:
106
+ print(f"Error downloading file from URL {file_url}: {e}")
107
+ return False
108
+
109
+ def get_file(self, task_id: str, question_file: str) -> Tuple[bytes, str]:
110
+ """Tải tệp đính kèm từ API và kiểm tra nếu là URL thì tải tiếp."""
111
+ try:
112
+ file_url = f"{FILES_URL}/{task_id}"
113
+ file_dst = os.path.join(FILES_DIR, question_file)
114
+ if os.path.exists(file_dst):
115
+ with open(file_dst, "rb") as f:
116
+ return f.read(), file_dst
117
+
118
+ print(f"Downloading file from: '{file_url}'")
119
+ response = requests.get(file_url, timeout=15)
120
+ response.raise_for_status()
121
+ content = response.content
122
+
123
+ # Kiểm tra nếu nội dung trả về là URL
124
+ content_str = content.decode('utf-8', errors='ignore')
125
+ if content_str.startswith('http'):
126
+ if self.download_file_from_url(content_str, file_dst):
127
+ with open(file_dst, "rb") as f:
128
+ return f.read(), file_dst
129
+ return b"", ""
130
+
131
+ # Lưu file vào đích
132
+ with open(file_dst, "wb") as file:
133
+ file.write(content)
134
+ return content, file_dst
135
+ except Exception as e:
136
+ print(f"Error fetching file for task {task_id}: {e}")
137
+ return b"", ""
138
+
139
+ def read_excel_with_pandas(self, file_content: bytes) -> str:
140
+ """Đọc file Excel bằng Pandas."""
141
+ try:
142
+ df = pd.read_excel(io.BytesIO(file_content), engine='openpyxl')
143
+ return df.to_csv(index=False)
144
+ except Exception as e:
145
+ print(f"Pandas read_excel error: {e}")
146
+ return ""
147
+
148
+ def read_excel_with_openpyxl(self, file_content: bytes) -> str:
149
+ """Đọc file Excel bằng Openpyxl."""
150
+ try:
151
+ workbook = openpyxl.load_workbook(io.BytesIO(file_content))
152
+ sheet = workbook.active
153
+ data = []
154
+ for row in sheet.rows:
155
+ row_data = [cell.value if cell.value is not None else "" for cell in row]
156
+ data.append(row_data)
157
+ df = pd.DataFrame(data)
158
+ return df.to_csv(index=False)
159
+ except Exception as e:
160
+ print(f"Openpyxl read_excel error: {e}")
161
+ return ""
162
+
163
+ def read_excel_with_xlrd(self, file_content: bytes) -> str:
164
+ """Đọc file Excel bằng xlrd (hỗ trợ định dạng cũ .xls)."""
165
+ try:
166
+ workbook = xlrd.open_workbook(file_contents=file_content)
167
+ sheet = workbook.sheet_by_index(0)
168
+ data = []
169
+ for row_idx in range(sheet.nrows):
170
+ row_data = [sheet.cell_value(row_idx, col_idx) for col_idx in range(sheet.ncols)]
171
+ data.append(row_data)
172
+ df = pd.DataFrame(data)
173
+ return df.to_csv(index=False)
174
+ except Exception as e:
175
+ print(f"xlrd read_excel error: {e}")
176
+ return ""
177
+
178
+ def read_excel_combined(self, file_content: bytes) -> str:
179
+ """Kết hợp nhiều phương pháp để đọc file Excel."""
180
+ # Thử đọc bằng Pandas
181
+ data = self.read_excel_with_pandas(file_content)
182
+ if data:
183
+ return data
184
+
185
+ # Thử đọc bằng Openpyxl nếu Pandas thất bại
186
+ data = self.read_excel_with_openpyxl(file_content)
187
+ if data:
188
+ return data
189
+
190
+ # Thử đọc bằng xlrd nếu cả hai phương pháp trên thất bại
191
+ data = self.read_excel_with_xlrd(file_content)
192
+ if data:
193
+ return data
194
+
195
+ return ""
196
+
197
  def check_commutative(self, table: str) -> str:
198
  try:
199
  rows = table.strip().split('\n')
 
234
  non_commutative.add(a)
235
  non_commutative.add(b)
236
 
237
+ return ",".join(sorted(non_commutative)) if non_commutative else "No counter-examples found"
238
  except Exception as e:
239
  return f"Error processing table: {e}"
240
 
 
243
  botanical_fruits = {"plums", "corn", "bell pepper", "zucchini"}
244
  vegetables = sorted([item for item in all_items if item not in botanical_fruits and item in {
245
  "sweet potatoes", "fresh basil", "green beans", "broccoli", "celery", "lettuce"}])
246
+ return ",".join(vegetables)
247
 
248
  def analyze_python_code(self, code: str) -> str:
249
  if "keep_trying" in code and "randint" in code:
250
  return "0"
251
  return "Error: Could not analyze Python code."
252
 
253
+ def process_excel_sales(self, file_content: bytes) -> str:
254
+ """Xử lý dữ liệu Excel để tính tổng doanh thu từ thực phẩm."""
255
+ excel_data = self.read_excel_combined(file_content)
256
+ if not excel_data:
257
+ return "Error: Could not read Excel file."
258
+
259
  try:
260
+ df = pd.read_csv(io.StringIO(excel_data))
261
  if 'Category' in df.columns and 'Sales' in df.columns:
262
+ food_sales = df[df['Category'].str.lower() == 'food']['Sales'].sum()
263
  return f"{food_sales:.2f}"
264
  else:
265
  return "Error: Excel file does not contain required columns (Category, Sales)."
266
  except Exception as e:
267
+ return f"Error processing Excel data: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
+ def process_file(self, question: str, file_content: bytes, file_path: str) -> str:
270
  mime_type, _ = mimetypes.guess_type(file_path)
271
  if mime_type and mime_type.startswith('text'):
272
  try:
273
+ file_content_text = file_content.decode('utf-8', errors='ignore')
 
274
  if file_path.endswith('.py') and "What is the final numeric output" in question:
275
+ return self.analyze_python_code(file_content_text)
276
+ return f"{question}\nFile content:\n{file_content_text}"
277
  except UnicodeDecodeError as e:
278
  return f"Error reading file: {e}. File may not be a valid text file."
279
  except Exception as e:
280
  return f"Error reading file: {e}"
281
+
282
  elif mime_type and mime_type == 'audio/mpeg':
283
  try:
284
+ file_dst = os.path.join(FILES_DIR, "temp_audio.mp3")
285
+ with open(file_dst, "wb") as f:
286
+ f.write(file_content)
287
+ audio = AudioSegment.from_mp3(file_dst)
288
+ wav_path = file_dst.replace('.mp3', '.wav')
289
  audio.export(wav_path, format="wav")
290
 
291
  recognizer = sr.Recognizer()
292
  with sr.AudioFile(wav_path) as source:
293
  audio_data = recognizer.record(source)
294
  text = recognizer.recognize_google(audio_data)
295
+ os.remove(file_dst)
296
  os.remove(wav_path)
297
  return f"{question}\nAudio transcript: {text}"
298
  except Exception as e:
299
  return f"Error processing audio file: {e}"
300
+
301
  elif mime_type and mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
302
  if "total sales" in question.lower():
303
+ return self.process_excel_sales(file_content)
304
+ excel_data = self.read_excel_combined(file_content)
305
+ if excel_data:
306
+ return f"{question}\nExcel content:\n{excel_data}"
307
+ return "Error: Could not read Excel file."
308
+
 
309
  else:
310
  return "Error: Gemini API does not support non-text files (e.g., images). Please provide a text description instead."
311
 
312
+ def process_questions_batch(self, questions: List[Tuple[str, str]]) -> List[str]:
313
+ batch_size = 5 # 5 câu hỏi mỗi batch
314
+ answers = []
315
+ for i in range(0, len(questions), batch_size):
316
+ batch = questions[i:i + batch_size]
317
+ prompt = f"{self.system_prompt}\nAnswer the following questions concisely:\n"
318
+ for idx, (question, _) in enumerate(batch, 1):
319
+ prompt += f"{idx}. {question}\n"
320
+
321
+ batch_answers = self.call_gemini_api(prompt)
322
+ if "Error" in batch_answers:
323
+ answers.extend([batch_answers] * len(batch))
324
+ else:
325
+ batch_answers = batch_answers.split('\n')
326
+ for idx in range(len(batch)):
327
+ answer = batch_answers[idx].split('. ', 1)[1] if idx < len(batch_answers) and '. ' in batch_answers[idx] else "Error: Could not parse answer."
328
+ answers.append(answer)
329
+ if i + batch_size < len(questions):
330
+ print("Waiting 5 seconds before next batch to avoid rate limit...")
331
+ time.sleep(5)
332
+ return answers
333
+
334
  def __call__(self, question: str, file_path: str = None) -> str:
335
  if "provide the subset of S involved in any possible counter-examples" in question:
336
  table = question.split("provide the subset")[0].strip()
 
412
  print(f"Skipping item with missing task_id or question: {item}")
413
  continue
414
 
415
+ file_content, file_dst = None, None
416
  if question_file:
417
+ file_content, file_dst = agent.get_file(question_uuid, question_file)
418
+ processed_question = agent.process_file(question_text, file_content, file_dst)
419
  else:
420
  processed_question = agent(question_text, None)
421
 
 
438
 
439
  return answers_payload, results_log
440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  def submit_answers(
442
  username: str, agent_code: str, answers_payload: List[dict], results_df: pd.DataFrame
443
  ) -> Tuple[str, pd.DataFrame]: