Final_Assignment_Template

Build error

App Files Files Community

AnhLee0 commited on May 2, 2025

Commit

5415e44

verified ·

1 Parent(s): bc49549

Update app.py

Browse files

Files changed (1) hide show

app.py +205 -67

app.py CHANGED Viewed

@@ -7,6 +7,10 @@ import pandas as pd
 import mimetypes
 import speech_recognition as sr
 from pydub import AudioSegment
 # --- Constants ---
 QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
@@ -14,7 +18,7 @@ SUBMIT_URL = "https://agents-course-unit4-scoring.hf.space/submit"
 FILES_URL = "https://agents-course-unit4-scoring.hf.space/files"
 FILES_DIR = "files"
 SYSTEM_PROMPT = "You are a helpful AI assistant tasked with answering questions accurately. Provide concise and accurate answers in the format requested by the question."
-GEMINI_API_KEY = "AIzaSyBvImpFo9o5Dz8OL_mfFEoRijeUyYBvXiI"
 GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
 # --- AssistantAgent Implementation ---
@@ -22,13 +26,21 @@ class AssistantAgent:
     def __init__(self, system_prompt: str):
         self.system_prompt = system_prompt
         self.headers = {"Content-Type": "application/json"}
     def call_gemini_api(self, prompt: str) -> str:
         retry_delay = 5  # Chờ 5 giây nếu gặp lỗi quota
         payload = {
             "contents": [{
                 "parts": [{"text": prompt}]
-            }]
         }
         for attempt in range(3):
             try:
@@ -45,6 +57,143 @@ class AssistantAgent:
                     return f"Error calling Gemini API: {e}"
         return "Error: Exceeded retry attempts due to quota limits."
     def check_commutative(self, table: str) -> str:
         try:
             rows = table.strip().split('\n')
@@ -85,7 +234,7 @@ class AssistantAgent:
                         non_commutative.add(a)
                         non_commutative.add(b)
-            return ", ".join(sorted(non_commutative)) if non_commutative else "No counter-examples found"
         except Exception as e:
             return f"Error processing table: {e}"
@@ -94,85 +243,94 @@ class AssistantAgent:
         botanical_fruits = {"plums", "corn", "bell pepper", "zucchini"}
         vegetables = sorted([item for item in all_items if item not in botanical_fruits and item in {
             "sweet potatoes", "fresh basil", "green beans", "broccoli", "celery", "lettuce"}])
-        return ", ".join(vegetables)
     def analyze_python_code(self, code: str) -> str:
         if "keep_trying" in code and "randint" in code:
             return "0"
         return "Error: Could not analyze Python code."
-    def process_excel_sales(self, file_path: str) -> str:
         try:
-            df = pd.read_excel(file_path, engine='openpyxl')
             if 'Category' in df.columns and 'Sales' in df.columns:
-                food_sales = df[df['Category'] == 'Food']['Sales'].sum()
                 return f"{food_sales:.2f}"
             else:
                 return "Error: Excel file does not contain required columns (Category, Sales)."
         except Exception as e:
-            return f"Error reading Excel file: {e}"
-    def process_questions_batch(self, questions: List[Tuple[str, str]]) -> List[str]:
-        batch_size = 5  # 5 câu hỏi mỗi batch
-        answers = []
-        for i in range(0, len(questions), batch_size):
-            batch = questions[i:i + batch_size]
-            prompt = f"{self.system_prompt}\nAnswer the following questions concisely:\n"
-            for idx, (question, _) in enumerate(batch, 1):
-                prompt += f"{idx}. {question}\n"
-            batch_answers = self.call_gemini_api(prompt)
-            if "Error" in batch_answers:
-                answers.extend([batch_answers] * len(batch))
-            else:
-                batch_answers = batch_answers.split('\n')
-                for idx in range(len(batch)):
-                    answer = batch_answers[idx].split('. ', 1)[1] if idx < len(batch_answers) and '. ' in batch_answers[idx] else "Error: Could not parse answer."
-                    answers.append(answer)
-            if i + batch_size < len(questions):
-                print("Waiting 5 seconds before next batch to avoid rate limit...")
-                time.sleep(5)  # Giảm từ 60 giây xuống 5 giây
-        return answers
-    def process_file(self, question: str, file_path: str) -> str:
         mime_type, _ = mimetypes.guess_type(file_path)
         if mime_type and mime_type.startswith('text'):
             try:
-                with open(file_path, 'r', encoding='utf-8') as f:
-                    file_content = f.read()
                 if file_path.endswith('.py') and "What is the final numeric output" in question:
-                    return self.analyze_python_code(file_content)
-                return f"{question}\nFile content:\n{file_content}"
             except UnicodeDecodeError as e:
                 return f"Error reading file: {e}. File may not be a valid text file."
             except Exception as e:
                 return f"Error reading file: {e}"
         elif mime_type and mime_type == 'audio/mpeg':
             try:
-                audio = AudioSegment.from_mp3(file_path)
-                wav_path = file_path.replace('.mp3', '.wav')
                 audio.export(wav_path, format="wav")
                 recognizer = sr.Recognizer()
                 with sr.AudioFile(wav_path) as source:
                     audio_data = recognizer.record(source)
                     text = recognizer.recognize_google(audio_data)
                 os.remove(wav_path)
                 return f"{question}\nAudio transcript: {text}"
             except Exception as e:
                 return f"Error processing audio file: {e}"
         elif mime_type and mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
             if "total sales" in question.lower():
-                return self.process_excel_sales(file_path)
-            try:
-                df = pd.read_excel(file_path, engine='openpyxl')
-                file_content = df.to_string(index=False)
-                return f"{question}\nExcel content:\n{file_content}"
-            except Exception as e:
-                return f"Error reading Excel file: {e}"
         else:
             return "Error: Gemini API does not support non-text files (e.g., images). Please provide a text description instead."
     def __call__(self, question: str, file_path: str = None) -> str:
         if "provide the subset of S involved in any possible counter-examples" in question:
             table = question.split("provide the subset")[0].strip()
@@ -254,10 +412,10 @@ def run_agent(agent: AssistantAgent, questions_data: List[dict]) -> Tuple[List[d
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        file_dst = None
         if question_file:
-            file_dst = download_question_file(question_uuid, question_file)
-            processed_question = agent(question_text, file_dst)
         else:
             processed_question = agent(question_text, None)
@@ -280,26 +438,6 @@ def run_agent(agent: AssistantAgent, questions_data: List[dict]) -> Tuple[List[d
     return answers_payload, results_log
-def download_question_file(question_uuid: str, question_file: str) -> str:
-    try:
-        file_url = f"{FILES_URL}/{question_uuid}"
-        file_dst = f"{FILES_DIR}/{question_file}"
-        if os.path.exists(file_dst):
-            return file_dst
-        print(f"Downloading file from: '{file_url}'")
-        with requests.get(file_url, stream=True) as response:
-            response.raise_for_status()
-            with open(file_dst, "wb") as file:
-                for chunk in response.iter_content(chunk_size=8192):
-                    if chunk:
-                        file.write(chunk)
-        print(f"Downloaded file '{file_dst}'.")
-        return file_dst
-    except requests.exceptions.RequestException as e:
-        raise RuntimeError(f"Error downloading file: {e}")
-    except Exception as e:
-        raise RuntimeError(f"An unexpected error occurred downloading file: {e}")
 def submit_answers(
     username: str, agent_code: str, answers_payload: List[dict], results_df: pd.DataFrame
 ) -> Tuple[str, pd.DataFrame]:

 import mimetypes
 import speech_recognition as sr
 from pydub import AudioSegment
+import io
+import openpyxl
+import xlrd
+from bs4 import BeautifulSoup
 # --- Constants ---
 QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
 FILES_URL = "https://agents-course-unit4-scoring.hf.space/files"
 FILES_DIR = "files"
 SYSTEM_PROMPT = "You are a helpful AI assistant tasked with answering questions accurately. Provide concise and accurate answers in the format requested by the question."
+GEMINI_API_KEY = "AIzaSyBO46AIuY3Lmq3-k2bZkABgc0gL6A1RV20"
 GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
 # --- AssistantAgent Implementation ---
     def __init__(self, system_prompt: str):
         self.system_prompt = system_prompt
         self.headers = {"Content-Type": "application/json"}
+        if not os.path.exists(FILES_DIR):
+            os.makedirs(FILES_DIR)
     def call_gemini_api(self, prompt: str) -> str:
         retry_delay = 5  # Chờ 5 giây nếu gặp lỗi quota
         payload = {
             "contents": [{
                 "parts": [{"text": prompt}]
+            }],
+            "safetySettings": [
+                {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
+                {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
+                {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
+                {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
+            ]
         }
         for attempt in range(3):
             try:
                     return f"Error calling Gemini API: {e}"
         return "Error: Exceeded retry attempts due to quota limits."
+    def search_wikipedia(self, query: str) -> str:
+        """Tìm kiếm thông tin chi tiết bằng Wikipedia API."""
+        try:
+            url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={urllib.parse.quote(query)}&format=json"
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            if data["query"]["search"]:
+                page_id = data["query"]["search"][0]["pageid"]
+                page_url = f"https://en.wikipedia.org/wiki?curid={page_id}"
+                page_response = requests.get(page_url, timeout=10)
+                soup = BeautifulSoup(page_response.text, "html.parser")
+                paragraphs = soup.find_all("p")
+                return " ".join([p.get_text() for p in paragraphs[:2]])
+            return "No results found."
+        except Exception as e:
+            print(f"Wikipedia search error: {e}")
+            return ""
+    def search_bing(self, query: str) -> str:
+        """Tìm kiếm thông tin chung bằng Bing."""
+        try:
+            url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}"
+            headers = {"User-Agent": "Mozilla/5.0"}
+            response = requests.get(url, headers=headers, timeout=10)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.text, "html.parser")
+            results = soup.find_all("li", class_="b_algo")
+            result_text = " ".join([result.get_text() for result in results[:3]])
+            return result_text
+        except Exception as e:
+            print(f"Bing search error: {e}")
+            return ""
+    def download_file_from_url(self, file_url: str, file_dst: str) -> bool:
+        """Tải file từ URL và lưu vào đích."""
+        try:
+            with requests.get(file_url, stream=True, timeout=15) as response:
+                response.raise_for_status()
+                with open(file_dst, "wb") as file:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        if chunk:
+                            file.write(chunk)
+            print(f"Downloaded file '{file_dst}'.")
+            return True
+        except Exception as e:
+            print(f"Error downloading file from URL {file_url}: {e}")
+            return False
+    def get_file(self, task_id: str, question_file: str) -> Tuple[bytes, str]:
+        """Tải tệp đính kèm từ API và kiểm tra nếu là URL thì tải tiếp."""
+        try:
+            file_url = f"{FILES_URL}/{task_id}"
+            file_dst = os.path.join(FILES_DIR, question_file)
+            if os.path.exists(file_dst):
+                with open(file_dst, "rb") as f:
+                    return f.read(), file_dst
+            print(f"Downloading file from: '{file_url}'")
+            response = requests.get(file_url, timeout=15)
+            response.raise_for_status()
+            content = response.content
+            # Kiểm tra nếu nội dung trả về là URL
+            content_str = content.decode('utf-8', errors='ignore')
+            if content_str.startswith('http'):
+                if self.download_file_from_url(content_str, file_dst):
+                    with open(file_dst, "rb") as f:
+                        return f.read(), file_dst
+                return b"", ""
+            # Lưu file vào đích
+            with open(file_dst, "wb") as file:
+                file.write(content)
+            return content, file_dst
+        except Exception as e:
+            print(f"Error fetching file for task {task_id}: {e}")
+            return b"", ""
+    def read_excel_with_pandas(self, file_content: bytes) -> str:
+        """Đọc file Excel bằng Pandas."""
+        try:
+            df = pd.read_excel(io.BytesIO(file_content), engine='openpyxl')
+            return df.to_csv(index=False)
+        except Exception as e:
+            print(f"Pandas read_excel error: {e}")
+            return ""
+    def read_excel_with_openpyxl(self, file_content: bytes) -> str:
+        """Đọc file Excel bằng Openpyxl."""
+        try:
+            workbook = openpyxl.load_workbook(io.BytesIO(file_content))
+            sheet = workbook.active
+            data = []
+            for row in sheet.rows:
+                row_data = [cell.value if cell.value is not None else "" for cell in row]
+                data.append(row_data)
+            df = pd.DataFrame(data)
+            return df.to_csv(index=False)
+        except Exception as e:
+            print(f"Openpyxl read_excel error: {e}")
+            return ""
+    def read_excel_with_xlrd(self, file_content: bytes) -> str:
+        """Đọc file Excel bằng xlrd (hỗ trợ định dạng cũ .xls)."""
+        try:
+            workbook = xlrd.open_workbook(file_contents=file_content)
+            sheet = workbook.sheet_by_index(0)
+            data = []
+            for row_idx in range(sheet.nrows):
+                row_data = [sheet.cell_value(row_idx, col_idx) for col_idx in range(sheet.ncols)]
+                data.append(row_data)
+            df = pd.DataFrame(data)
+            return df.to_csv(index=False)
+        except Exception as e:
+            print(f"xlrd read_excel error: {e}")
+            return ""
+    def read_excel_combined(self, file_content: bytes) -> str:
+        """Kết hợp nhiều phương pháp để đọc file Excel."""
+        # Thử đọc bằng Pandas
+        data = self.read_excel_with_pandas(file_content)
+        if data:
+            return data
+        # Thử đọc bằng Openpyxl nếu Pandas thất bại
+        data = self.read_excel_with_openpyxl(file_content)
+        if data:
+            return data
+        # Thử đọc bằng xlrd nếu cả hai phương pháp trên thất bại
+        data = self.read_excel_with_xlrd(file_content)
+        if data:
+            return data
+        return ""
     def check_commutative(self, table: str) -> str:
         try:
             rows = table.strip().split('\n')
                         non_commutative.add(a)
                         non_commutative.add(b)
+            return ",".join(sorted(non_commutative)) if non_commutative else "No counter-examples found"
         except Exception as e:
             return f"Error processing table: {e}"
         botanical_fruits = {"plums", "corn", "bell pepper", "zucchini"}
         vegetables = sorted([item for item in all_items if item not in botanical_fruits and item in {
             "sweet potatoes", "fresh basil", "green beans", "broccoli", "celery", "lettuce"}])
+        return ",".join(vegetables)
     def analyze_python_code(self, code: str) -> str:
         if "keep_trying" in code and "randint" in code:
             return "0"
         return "Error: Could not analyze Python code."
+    def process_excel_sales(self, file_content: bytes) -> str:
+        """Xử lý dữ liệu Excel để tính tổng doanh thu từ thực phẩm."""
+        excel_data = self.read_excel_combined(file_content)
+        if not excel_data:
+            return "Error: Could not read Excel file."
         try:
+            df = pd.read_csv(io.StringIO(excel_data))
             if 'Category' in df.columns and 'Sales' in df.columns:
+                food_sales = df[df['Category'].str.lower() == 'food']['Sales'].sum()
                 return f"{food_sales:.2f}"
             else:
                 return "Error: Excel file does not contain required columns (Category, Sales)."
         except Exception as e:
+            return f"Error processing Excel data: {e}"
+    def process_file(self, question: str, file_content: bytes, file_path: str) -> str:
         mime_type, _ = mimetypes.guess_type(file_path)
         if mime_type and mime_type.startswith('text'):
             try:
+                file_content_text = file_content.decode('utf-8', errors='ignore')
                 if file_path.endswith('.py') and "What is the final numeric output" in question:
+                    return self.analyze_python_code(file_content_text)
+                return f"{question}\nFile content:\n{file_content_text}"
             except UnicodeDecodeError as e:
                 return f"Error reading file: {e}. File may not be a valid text file."
             except Exception as e:
                 return f"Error reading file: {e}"
         elif mime_type and mime_type == 'audio/mpeg':
             try:
+                file_dst = os.path.join(FILES_DIR, "temp_audio.mp3")
+                with open(file_dst, "wb") as f:
+                    f.write(file_content)
+                audio = AudioSegment.from_mp3(file_dst)
+                wav_path = file_dst.replace('.mp3', '.wav')
                 audio.export(wav_path, format="wav")
                 recognizer = sr.Recognizer()
                 with sr.AudioFile(wav_path) as source:
                     audio_data = recognizer.record(source)
                     text = recognizer.recognize_google(audio_data)
+                os.remove(file_dst)
                 os.remove(wav_path)
                 return f"{question}\nAudio transcript: {text}"
             except Exception as e:
                 return f"Error processing audio file: {e}"
         elif mime_type and mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
             if "total sales" in question.lower():
+                return self.process_excel_sales(file_content)
+            excel_data = self.read_excel_combined(file_content)
+            if excel_data:
+                return f"{question}\nExcel content:\n{excel_data}"
+            return "Error: Could not read Excel file."
         else:
             return "Error: Gemini API does not support non-text files (e.g., images). Please provide a text description instead."
+    def process_questions_batch(self, questions: List[Tuple[str, str]]) -> List[str]:
+        batch_size = 5  # 5 câu hỏi mỗi batch
+        answers = []
+        for i in range(0, len(questions), batch_size):
+            batch = questions[i:i + batch_size]
+            prompt = f"{self.system_prompt}\nAnswer the following questions concisely:\n"
+            for idx, (question, _) in enumerate(batch, 1):
+                prompt += f"{idx}. {question}\n"
+            batch_answers = self.call_gemini_api(prompt)
+            if "Error" in batch_answers:
+                answers.extend([batch_answers] * len(batch))
+            else:
+                batch_answers = batch_answers.split('\n')
+                for idx in range(len(batch)):
+                    answer = batch_answers[idx].split('. ', 1)[1] if idx < len(batch_answers) and '. ' in batch_answers[idx] else "Error: Could not parse answer."
+                    answers.append(answer)
+            if i + batch_size < len(questions):
+                print("Waiting 5 seconds before next batch to avoid rate limit...")
+                time.sleep(5)
+        return answers
     def __call__(self, question: str, file_path: str = None) -> str:
         if "provide the subset of S involved in any possible counter-examples" in question:
             table = question.split("provide the subset")[0].strip()
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        file_content, file_dst = None, None
         if question_file:
+            file_content, file_dst = agent.get_file(question_uuid, question_file)
+            processed_question = agent.process_file(question_text, file_content, file_dst)
         else:
             processed_question = agent(question_text, None)
     return answers_payload, results_log
 def submit_answers(
     username: str, agent_code: str, answers_payload: List[dict], results_df: pd.DataFrame
 ) -> Tuple[str, pd.DataFrame]: