Final_Assignment_Template

Build error

App Files Files Community

AnhLee0 commited on May 2, 2025

Commit

312bb48

verified ·

1 Parent(s): 79d0f56

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -351

app.py CHANGED Viewed

@@ -1,78 +1,84 @@
 import os
 import gradio as gr
 import requests
-import pandas as pd
 import re
 import urllib.parse
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self):
-        self.api_url = DEFAULT_API_URL
-        print("BasicAgent initialized with multiple search tools and LLM.")
     def search_bing(self, query: str) -> str:
-        """Tìm kiếm bằng Bing."""
         try:
             url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}"
             headers = {"User-Agent": "Mozilla/5.0"}
-            response = requests.get(url, headers=headers, timeout=15)
             response.raise_for_status()
-            return response.text
         except Exception as e:
             print(f"Bing search error: {e}")
             return ""
-    def search_startpage(self, query: str) -> str:
-        """Tìm kiếm bằng Startpage (bảo mật cao)."""
         try:
-            url = f"https://www.startpage.com/do/search?q={urllib.parse.quote(query)}"
-            headers = {"User-Agent": "Mozilla/5.0"}
-            response = requests.get(url, headers=headers, timeout=15)
             response.raise_for_status()
-            return response.text
         except Exception as e:
-            print(f"Startpage search error: {e}")
-            return ""
-    def search_yandex(self, query: str) -> str:
-        """Tìm kiếm bằng Yandex."""
-        try:
-            url = f"https://yandex.com/search/?text={urllib.parse.quote(query)}"
-            headers = {"User-Agent": "Mozilla/5.0"}
-            response = requests.get(url, headers=headers, timeout=15)
-            response.raise_for_status()
-            return response.text
-        except Exception as e:
-            print(f"Yandex search error: {e}")
-            return ""
-    def search_wolfram(self, query: str) -> str:
-        """Tìm kiếm bằng WolframAlpha (tính toán logic)."""
-        try:
-            # Lưu ý: WolframAlpha thường yêu cầu API key, đây là giả lập
-            url = f"https://www.wolframalpha.com/input/?i={urllib.parse.quote(query)}"
-            headers = {"User-Agent": "Mozilla/5.0"}
-            response = requests.get(url, headers=headers, timeout=15)
-            response.raise_for_status()
-            return response.text
-        except Exception as e:
-            print(f"WolframAlpha search error: {e}")
             return ""
     def get_file(self, task_id: str) -> str:
-        """T��i tệp đính kèm từ API /files/{task_id}."""
         try:
-            file_url = f"{self.api_url}/files/{task_id}"
-            response = requests.get(file_url, timeout=15)
             response.raise_for_status()
             return response.text
-        except requests.exceptions.RequestException as e:
             print(f"Error fetching file for task {task_id}: {e}")
-            return "Error fetching file."
     def extract_number(self, text: str) -> str:
         """Trích xuất số từ văn bản."""
@@ -80,7 +86,7 @@ class BasicAgent:
         return numbers[0] if numbers else "Unknown"
     def extract_name(self, text: str) -> str:
-        """Trích xuất tên riêng hoặc từ khóa ngắn."""
         words = text.split()
         for word in words:
             if word[0].isupper() and 3 <= len(word) <= 15:
@@ -88,330 +94,88 @@ class BasicAgent:
         return "Unknown"
     def __call__(self, task_id: str, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        try:
-            # Lấy tệp đính kèm (nếu có)
-            file_content = self.get_file(task_id)
-            print(f"File content for task {task_id}: {file_content[:100]}...")
-            # Sử dụng LLM (Grok) để phân tích và trả lời
-            # 1. Câu hỏi về số lượng album của Mercedes Sosa
-            if "Mercedes Sosa" in question and "2000 and 2009" in question:
-                search_bing = self.search_bing("Mercedes Sosa studio albums 2000-2009 site:en.wikipedia.org")
-                search_yandex = self.search_yandex("Mercedes Sosa studio albums 2000-2009")
-                combined = search_bing + " " + search_yandex
-                albums = []
-                years = range(2000, 2010)
-                for year in years:
-                    if str(year) in combined:
-                        if "Misa Criolla" in combined and year == 2000:
-                            albums.append("Misa Criolla")
-                        if "Voz y Sentimiento" in combined and year == 2003:
-                            albums.append("Voz y Sentimiento")
-                        if "Corazón Libre" in combined and year == 2005:
-                            albums.append("Corazón Libre")
-                        if "Cantora" in combined and year == 2009:
-                            albums.append("Cantora 1")
-                            albums.append("Cantora 2")
-                return str(len(set(albums))) if albums else "5"
-            # 2. Câu hỏi về số loài chim trong video
-            if "highest number of bird species" in question and "youtube.com" in question:
-                search_startpage = self.search_startpage("highest number of bird species in video L1vXCYZAYYM")
-                search_yandex = self.search_yandex("highest number of bird species in video L1vXCYZAYYM")
-                combined = search_startpage + " " + search_yandex
-                return self.extract_number(combined)
-            # 3. Câu hỏi về đảo ngược câu (sử dụng LLM để hiểu ngữ nghĩa)
-            if ".rewsna eht sa" in question:
-                reversed_question = question[::-1]
-                if "opposite of the word 'left'" in reversed_question:
-                    return "right"
-            # 4. Câu hỏi về nước đi cờ vua
-            if "chess position" in question and "algebraic notation" in question:
-                # Giả định nước đi chiếu tướng (LLM suy luận)
-                return "Qe8"
-            # 5. Câu hỏi về người đề cử bài viết Wikipedia
-            if "Featured Article on English Wikipedia about a dinosaur" in question and "November 2016" in question:
-                search_bing = self.search_bing("Featured Article dinosaur November 2016 Wikipedia nominator")
-                search_startpage = self.search_startpage("Featured Article dinosaur November 2016 Wikipedia nominator")
-                combined = search_bing + " " + search_startpage
-                return "FunkMonk" if "FunkMonk" in combined else self.extract_name(combined)
-            # 6. Câu hỏi về toán tử không giao hoán (LLM phân tích bảng)
-            if "prove * is not commutative" in question:
-                # Bảng: |*|a|b|c|d|e|...
-                # Phân tích: a*b = b, b*a = c (không giao hoán), v.v.
-                # LLM suy luận: tất cả phần tử đều có thể nằm trong cặp không giao hoán
-                return "a,b,c,d,e"
-            # 7. Câu hỏi về Teal'c trong video
-            if "Teal'c" in question and "Isn't that hot?" in question:
-                search_yandex = self.search_yandex("Teal'c response to 'Isn't that hot?' Stargate SG-1")
-                search_bing = self.search_bing("Teal'c response to 'Isn't that hot?' Stargate SG-1")
-                combined = search_yandex + " " + search_bing
-                if "indeed" in combined.lower():
-                    return "Indeed"
-                return "Unknown"
-            # 8. Câu hỏi về bác sĩ thú y
-            if "equine veterinarian" in question and "LibreText's Introductory Chemistry" in question:
-                search_startpage = self.search_startpage("equine veterinarian LibreText Introductory Chemistry 1.E Exercises")
-                search_bing = self.search_bing("equine veterinarian LibreText Introductory Chemistry 1.E Exercises")
-                combined = search_startpage + " " + search_bing
-                return "Smith" if "Smith" in combined else self.extract_name(combined)
-            # 9. Câu hỏi về rau củ (LLM phân loại thực vật học)
-            if "grocery list" in question and "fruits and vegetables" in question:
-                items = re.search(r"milk,.*?, peanuts", question).group().split(", ")
-                all_items = [item.strip() for item in items]
-                # Rau củ (theo phân loại thực vật học, không tính quả như bell pepper, corn)
-                vegetables = [
-                    "sweet potatoes", "fresh basil", "green beans", "broccoli",
-                    "celery", "zucchini", "lettuce"
-                ]
-                veggie_list = sorted([item for item in all_items if item in vegetables])
-                return ",".join(veggie_list)
-            # 10. Câu hỏi về nguyên liệu làm bánh
-            if "Strawberry pie.mp3" in question:
-                # Giả định nội dung file âm thanh (LLM suy luận nguyên liệu bánh dâu)
-                return "lemon juice,ripe strawberries,salt,sugar"
-            # 11. Diễn viên trong Magda M.
-            if "Polish-language version of Everybody Loves Raymond" in question and "Magda M" in question:
-                search_yandex = self.search_yandex("actor who played Ray Polish Everybody Loves Raymond Magda M")
-                return self.extract_name(search_yandex)
-            # 12. Output mã Python
-            if "final numeric output from the attached Python code" in question:
-                # Giả định file_content chứa mã Python
-                numbers = re.findall(r"print\((\d+)\)", file_content)
-                return numbers[0] if numbers else "42"
-            # 13. Số lần đánh bóng (Yankees 1977)
-            if "Yankee with the most walks in the 1977 regular season" in question:
-                search_bing = self.search_bing("Yankee most walks 1977 regular season at bats")
-                search_startpage = self.search_startpage("Yankee most walks 1977 regular season at bats")
-                combined = search_bing + " " + search_startpage
-                return self.extract_number(combined)
-            # 14. Số trang bài tập
-            if "Homework.mp3" in question and "page numbers" in question:
-                numbers = re.findall(r"\b\d+\b", file_content)
-                if numbers:
-                    return ",".join(sorted(numbers))
-                return "10,15,20"
-            # 15. NASA award number
-            if "NASA award number" in question and "R. G. Arendt" in question:
-                search_yandex = self.search_yandex("R. G. Arendt NASA award number Universe Today June 6 2023")
-                return "NNX17AJ88G" if "NNX17AJ88G" in search_yandex else "Unknown"
-            # 16. Thành phố lưu trữ mẫu vật
-            if "Vietnamese specimens" in question and "Nedoshivina's 2010 paper" in question:
-                search_bing = self.search_bing("Vietnamese specimens Kuznetzov Nedoshivina 2010 deposited city")
-                return "Hanoi" if "Hanoi" in search_bing else "Unknown"
-            # 17. Quốc gia ít vận động viên nhất 1928 Olympics
-            if "1928 Summer Olympics" in question and "least number of athletes" in question:
-                search_startpage = self.search_startpage("country least athletes 1928 Summer Olympics IOC code")
-                if "Monaco" in search_startpage:
-                    return "MON"
-                return "Unknown"
-            # 18. Pitchers trước và sau Taishō Tamai
-            if "Taishō Tamai" in question and "pitchers with the number before and after" in question:
-                search_yandex = self.search_yandex("pitchers before and after Taishō Tamai July 2023")
-                names = re.findall(r"\b[A-Z][a-z]+\b", search_yandex)
-                return f"{names[0]},{names[1]}" if len(names) >= 2 else "Suzuki,Tanaka"
-            # 19. Tổng doanh thu từ thực phẩm
-            if "Excel file" in question and "total sales" in question and "not including drinks" in question:
-                numbers = re.findall(r"\b\d+\.\d{2}\b", file_content)
-                return numbers[0] if numbers else "1500.00"
-            # 20. Người nhận Malko Competition
-            if "Malko Competition recipient" in question and "country that no longer exists" in question:
-                search_bing = self.search_bing("Malko Competition recipient after 1977 country no longer exists")
-                return "Vladimir" if "Vladimir" in search_bing else self.extract_name(search_bing)
-            # Các câu hỏi khác: Tìm kiếm thông tin chung
-            search_bing = self.search_bing(question)
-            search_startpage = self.search_startpage(question)
-            search_yandex = self.search_yandex(question)
-            combined = search_bing + " " + search_startpage + " " + search_yandex
-            if file_content != "Error fetching file.":
-                combined += " " + file_content
-            if "number" in question.lower() or "how many" in question.lower():
-                return self.extract_number(combined)
-            return self.extract_name(combined)
-        except Exception as e:
-            print(f"Error processing question: {e}")
-            return "Error answering question."
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    try:
-        agent = BasicAgent()
-    except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
-    print(f"Fetching questions from: {questions_url}")
-    try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-        if not questions_data:
-            print("Fetched questions list is empty.")
-            return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-        print(f"Error decoding JSON response from questions endpoint: {e}")
-        print(f"Response text: {response.text[:500]}")
-        return f"Error decoding server response for questions: {e}", None
-    except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
     results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
             continue
-        try:
-            submitted_answer = agent(task_id, question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
-    if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
-with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        **Instructions:**
-        1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
-        For instance, for the delay process of the submit button, a solution could be to cache the answers and submit in a separate action or even to answer the questions in async.
-        """
     )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID")
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup:
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
-    else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

 import os
 import gradio as gr
 import requests
 import re
 import urllib.parse
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from smolagents import OpenAIServerModel, CodeAgent, WikipediaSearchTool
+from bs4 import BeautifulSoup
+import cachetools
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Improved BasicAgent Definition ---
 class BasicAgent:
     def __init__(self):
+        # GPT-4o-mini cho câu hỏi chung
+        self.agent = CodeAgent(
+            model=OpenAIServerModel(model_id="gpt-4o-mini"),
+            tools=[WikipediaSearchTool()],
+            add_base_tools=True,
+        )
+        # Mistral cho suy luận logic
+        self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+        self.model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+        self.mistral_pipeline = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer, max_length=200)
+        # Caching để tối ưu hiệu suất
+        self.cache = cachetools.LRUCache(maxsize=100)
+        print("BasicAgent initialized with GPT-4o-mini, Mistral, and WikipediaSearchTool.")
     def search_bing(self, query: str) -> str:
+        """Tìm kiếm thông tin chung bằng Bing."""
+        if query in self.cache:
+            return self.cache[query]
         try:
             url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}"
             headers = {"User-Agent": "Mozilla/5.0"}
+            response = requests.get(url, headers=headers, timeout=10)
             response.raise_for_status()
+            soup = BeautifulSoup(response.text, "html.parser")
+            results = soup.find_all("li", class_="b_algo")
+            result_text = " ".join([result.get_text() for result in results[:3]])
+            self.cache[query] = result_text
+            return result_text
         except Exception as e:
             print(f"Bing search error: {e}")
             return ""
+    def search_wikipedia(self, query: str) -> str:
+        """Tìm kiếm chi tiết bằng Wikipedia API."""
+        if query in self.cache:
+            return self.cache[query]
         try:
+            url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={urllib.parse.quote(query)}&format=json"
+            response = requests.get(url, timeout=10)
             response.raise_for_status()
+            data = response.json()
+            if data["query"]["search"]:
+                page_id = data["query"]["search"][0]["pageid"]
+                page_url = f"https://en.wikipedia.org/wiki?curid={page_id}"
+                page_response = requests.get(page_url, timeout=10)
+                soup = BeautifulSoup(page_response.text, "html.parser")
+                paragraphs = soup.find_all("p")
+                result_text = " ".join([p.get_text() for p in paragraphs[:2]])
+                self.cache[query] = result_text
+                return result_text
+            return "No results found."
         except Exception as e:
+            print(f"Wikipedia search error: {e}")
             return ""
     def get_file(self, task_id: str) -> str:
+        """Tải tệp đính kèm từ API."""
         try:
+            file_url = f"{DEFAULT_API_URL}/files/{task_id}"
+            response = requests.get(file_url, timeout=10)
             response.raise_for_status()
             return response.text
+        except Exception as e:
             print(f"Error fetching file for task {task_id}: {e}")
+            return ""
     def extract_number(self, text: str) -> str:
         """Trích xuất số từ văn bản."""
         return numbers[0] if numbers else "Unknown"
     def extract_name(self, text: str) -> str:
+        """Trích xuất tên riêng hoặc từ khóa."""
         words = text.split()
         for word in words:
             if word[0].isupper() and 3 <= len(word) <= 15:
         return "Unknown"
     def __call__(self, task_id: str, question: str) -> str:
+        print(f"Processing question (task {task_id}): {question[:50]}...")
+        file_content = self.get_file(task_id)
+        # Phân loại và xử lý câu hỏi
+        question_lower = question.lower()
+        if "how many" in question_lower or "number of" in question_lower:
+            # Câu hỏi về số lượng
+            search_result = self.search_wikipedia(question) if "history" in question_lower else self.search_bing(question)
+            return self.extract_number(search_result + " " + file_content)
+        elif "who" in question_lower or "name" in question_lower:
+            # Câu hỏi về tên riêng
+            search_result = self.search_wikipedia(question)
+            return self.extract_name(search_result + " " + file_content)
+        elif "prove" in question_lower or "logic" in question_lower:
+            # Câu hỏi suy luận logic
+            prompt = f"Question: {question}\nFile content: {file_content}\nProvide a logical answer:"
+            mistral_response = self.mistral_pipeline(prompt)[0]["generated_text"]
+            return mistral_response.strip().split()[-1]  # Lấy kết quả cuối
+        elif "code" in question_lower or "python" in question_lower:
+            # Câu hỏi về mã (phân tích tệp nếu có)
+            if file_content:
+                prompt = f"Analyze this code and answer: {question}\nCode:\n{file_content}"
+                return self.agent.run(prompt)
+            return "No code provided."
+        else:
+            # Câu hỏi chung
+            prompt = f"Question: {question}\nFile content: {file_content}"
+            return self.agent.run(prompt)
+# --- Rest of the code remains unchanged ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
+    if not profile:
+        return "Please Login to Hugging Face.", None
+    username = profile.username
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    agent = BasicAgent()
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    response = requests.get(questions_url, timeout=15)
+    questions_data = response.json()
     results_log = []
     answers_payload = []
     for item in questions_data:
         task_id = item.get("task_id")
+        question = item.get("question")
+        if not task_id or not question:
             continue
+        answer = agent(task_id, question)
+        answers_payload.append({"task_id": task_id, "submitted_answer": answer})
+        results_log.append({"Task ID": task_id, "Question": question, "Answer": answer})
+    submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
+    response = requests.post(submit_url, json=submission_data, timeout=60)
+    result_data = response.json()
+    status = (
+        f"Submission Successful!\n"
+        f"User: {result_data.get('username')}\n"
+        f"Score: {result_data.get('score', 'N/A')}% "
+        f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')})"
     )
+    return status, pd.DataFrame(results_log)
+# --- Gradio Interface ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Improved Agent Evaluation Runner")
     gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit")
+    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Results", wrap=True)
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
+    print("Launching Improved Agent...")
     demo.launch(debug=True, share=False)