Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import requests | |
| import inspect | |
| import pandas as pd | |
| from src.agent import BasicAgent | |
| from datasets import load_dataset | |
| from huggingface_hub import snapshot_download | |
| from docx import Document | |
| # (Keep Constants as is) | |
| # --- Constants --- | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| agent = BasicAgent() | |
| def run_and_submit_all( profile: gr.OAuthProfile | None): | |
| """ | |
| Fetches all questions, runs the BasicAgent on them, submits all answers, | |
| and displays the results. | |
| """ | |
| # --- Determine HF Space Runtime URL and Repo URL --- | |
| space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code | |
| if profile: | |
| username= f"{profile.username}" | |
| print(f"User logged in: {username}") | |
| else: | |
| print("User not logged in.") | |
| return "Please Login to Hugging Face with the button.", None | |
| # 1. Instantiate Agent ( modify this part to create your agent) | |
| try: | |
| agent = BasicAgent() | |
| except Exception as e: | |
| print(f"Error instantiating agent: {e}") | |
| return f"Error initializing agent: {e}", None | |
| # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public) | |
| ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
| if not ACCESS_TOKEN: | |
| raise ValueError("HF_TOKEN environment variable is not set. Please set it in Space Secrets.") | |
| else: | |
| print("Key is good") | |
| data_dir = snapshot_download( | |
| repo_id="gaia-benchmark/GAIA", | |
| repo_type="dataset" | |
| ) | |
| dataset = load_dataset(data_dir, "2023_level1", split="validation", cache_dir=data_dir) | |
| print("Dataset", dataset) | |
| print("Length is ", len(dataset)) | |
| print(type(dataset)) | |
| id_to_path = {} | |
| for ex in dataset: | |
| # Check if the example has an associated file path and name | |
| if ex.get("file_path") and ex.get("file_name"): | |
| full_path = os.path.join(data_dir, ex["file_path"]) | |
| # Check if the file actually exists on disk | |
| if os.path.exists(full_path): | |
| id_to_path[ex["task_id"]] = full_path | |
| # The 'id_to_path' dictionary is essential for your file reading tool. | |
| print(f"Mapped {len(id_to_path)} {id_to_path} question IDs to resource files.") | |
| # 3. Run your Agent | |
| results_log = [] | |
| answers_payload = [] | |
| #files_base = os.path.join(data_dir, "2023", "test") | |
| space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code | |
| if profile: | |
| username= f"{profile.username}" | |
| print(f"User logged in: {username}") | |
| else: | |
| print("User not logged in.") | |
| return "Please Login to Hugging Face with the button.", None | |
| """ | |
| # Filter the dataset to include ONLY the target task ID | |
| # This uses the 'filter' method available on Hugging Face datasets. | |
| #subset = dataset.filter(lambda example: example['task_id'] in target_task_ids) | |
| specific_target_ids = [ | |
| 'e1fc63a2-da7a-432f-be78-7c4a95598703', | |
| 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6', | |
| '4fc2f1ae-8625-45b5-ab34-ad4433bc21f8', | |
| '8e867cd7-cff9-4e6c-867a-ff5ddc2550be', | |
| 'ec09fa32-d03f-4bf8-84b0-1f16922c3ae4', | |
| '2d83110e-a098-4ebb-9987-066c06fa42d0', | |
| '5cfb274c-0207-4aa7-9575-6ac0bd95d9b2', | |
| '27d5d136-8563-469e-92bf-fd103c28b57c', | |
| 'dc28cf18-6431-458b-83ef-64b3ce566c10', | |
| '42576abe-0deb-4869-8c63-225c2d75a95a' | |
| ] | |
| # --- END SPECIFIC TARGET IDS --- | |
| # 1. Get the list of Task IDs from the slice (indices 20 to 50) | |
| # We must fetch the task_id column data specifically. | |
| sliced_ids = dataset.select(range(20, 51))['task_id'] | |
| # 2. Combine the sliced IDs with the specific IDs into a single set for uniqueness | |
| # This ensures we don't accidentally duplicate tasks if some specific IDs are in the slice range. | |
| all_unique_target_ids = set(sliced_ids) | |
| all_unique_target_ids.update(specific_target_ids) | |
| all_unique_target_ids_list = list(all_unique_target_ids) | |
| print(f"Total unique tasks to run: {len(all_unique_target_ids_list)}") | |
| # 3. Filter the original dataset using the complete list of unique IDs | |
| # This replaces the need for complex concatenation. | |
| """ | |
| target_task_ids = [ | |
| "8e867cd7-cff9-4e6c-867a-ff5ddc2550be", | |
| "a1e91b78-d3d8-4675-bb8d-62741b4b68a6", | |
| "2d83110e-a098-4ebb-9987-066c06fa42d0", | |
| "cca530fc-4052-43b2-b130-b30968d8aa44", | |
| "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8", | |
| "6f37996b-2ac7-44b0-8e68-6d28256631b4", | |
| "9d191bce-651d-4746-be2d-7ef8ecadb9c2", | |
| "cabe07ed-9eca-40ea-8ead-410ef5e83f91", | |
| "3cef3a44-215e-4aed-8e3b-b1e3f08063b7", | |
| "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3", | |
| "305ac316-eef6-4446-960a-92d80d542f82", | |
| "f918266a-b3e0-4914-865d-4faa564f1aef", | |
| "3f57289b-8c60-48be-bd80-01f8099ca449", | |
| "1f975693-876d-457b-a649-393859e79bf3", | |
| "840bfca7-4f7b-481a-8794-c560c340185d", | |
| "bda648d7-d618-4883-88f4-3466eabd860e", | |
| "cf106601-ab4f-4af9-b045-5295fe67b37d", | |
| "a0c07678-e491-4bbc-8f0b-07405144218f", | |
| "7bd855d8-463d-4ed5-93ca-5fe35145f733", | |
| "5a0c1adf-205e-4841-a666-7c3ef95def9d" | |
| ] | |
| subset = dataset.filter(lambda example: example['task_id'] in target_task_ids) | |
| subset = subset.to_list() | |
| print(subset) | |
| results_log = [] | |
| answers_payload = [] | |
| for item in subset: | |
| print(f"ITEMS {item}") | |
| task_id = item.get("task_id") | |
| print(f"Task ID is {task_id}") | |
| question_text = item.get("Question") | |
| print(f"question_text is {question_text}") | |
| file_name = item.get("file_name") | |
| print(f"File Name {file_name}") | |
| file_path = id_to_path.get(task_id, None) | |
| print(f"File path {file_path}") | |
| file_content = None | |
| if file_name and file_path: | |
| exists = os.path.exists(file_path) | |
| print("Checking file path") | |
| print(f"Task ID: {task_id}, File Name: {file_name}, Exists: {exists}, Calculated Path: {file_path}") | |
| print(f"Attempting to load file at: {file_path} (Exists: {exists})") | |
| if exists: | |
| # Decide binary or text | |
| if file_name.endswith((".txt", ".py", ".csv", ".json")): | |
| try: | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| file_content = f.read() | |
| print(f"File Content is {file_content}, {file_path}") | |
| except Exception as e: | |
| print(f"Error reading text file {file_path}: {e}") | |
| file_content = None | |
| elif file_name.endswith(".docx"): | |
| try: | |
| doc = Document(file_path) | |
| file_content = "\n".join([p.text for p in doc.paragraphs]) | |
| print(f"Docx content loaded, {file_path}") | |
| except Exception as e: | |
| print(f"Error reading docx file {file_path}: {e}") | |
| file_content = None | |
| else: # binary files like images, audio, video | |
| try: | |
| with open(file_path, "rb") as f: | |
| file_content = f.read() | |
| print(f"Binary file loaded, {file_path}") | |
| except Exception as e: | |
| print(f"Error reading binary file {file_path}: {e}") | |
| file_content = None | |
| if not task_id or question_text is None: | |
| print(f"Skipping item with missing task_id or question: {item}") | |
| continue | |
| try: | |
| if file_content: | |
| answer = agent(question_text, file_content=file_content, file_path=file_path) | |
| else: | |
| answer = agent(question_text) | |
| if not answer: | |
| answer = "I am unable to answer" | |
| answers_payload.append({"task_id": task_id, "submitted_answer": answer}) | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Answer": answer}) | |
| except Exception as e: | |
| print(f"Error running agent on task {task_id}: {e}") | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"AGENT ERROR: {e}"}) | |
| if not answers_payload: | |
| print("Agent did not produce any answers to submit.") | |
| return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) | |
| agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" | |
| print(agent_code) | |
| # 4. Prepare Submission | |
| submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload} | |
| status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..." | |
| print(status_update) | |
| submit_url = f"{DEFAULT_API_URL}/submit" | |
| # 5. Submit | |
| print(f"Submitting {len(answers_payload)} answers to: {submit_url}") | |
| try: | |
| response = requests.post(submit_url, json=submission_data, timeout=60) | |
| response.raise_for_status() | |
| result_data = response.json() | |
| final_status = ( | |
| f"Submission Successful!\n" | |
| f"User: {result_data.get('username')}\n" | |
| f"Overall Score: {result_data.get('score', 'N/A')}% " | |
| f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" | |
| f"Message: {result_data.get('message', 'No message received.')}" | |
| ) | |
| print("Submission successful!") | |
| results_df = pd.DataFrame(results_log) | |
| return final_status, results_df | |
| except requests.exceptions.HTTPError as e: | |
| error_detail = f"Server responded with status {e.response.status_code}." | |
| try: | |
| error_json = e.response.json() | |
| error_detail += f" Detail: {error_json.get('detail', e.response.text)}" | |
| except requests.exceptions.JSONDecodeError: | |
| error_detail += f" Response: {e.response.text[:500]}" | |
| status_message = f"Submission Failed: {error_detail}" | |
| print(status_message) | |
| results_df = pd.DataFrame(results_log) | |
| return status_message, results_df | |
| except requests.exceptions.Timeout: | |
| status_message = "Submission Failed: The request timed out." | |
| print(status_message) | |
| results_df = pd.DataFrame(results_log) | |
| return status_message, results_df | |
| except requests.exceptions.RequestException as e: | |
| status_message = f"Submission Failed: Network error - {e}" | |
| print(status_message) | |
| results_df = pd.DataFrame(results_log) | |
| return status_message, results_df | |
| except Exception as e: | |
| status_message = f"An unexpected error occurred during submission: {e}" | |
| print(status_message) | |
| results_df = pd.DataFrame(results_log) | |
| return status_message, results_df | |
| # --- Build Gradio Interface using Blocks --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Basic Agent Evaluation Runner") | |
| gr.Markdown( | |
| """ | |
| **Instructions:** | |
| 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ... | |
| 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission. | |
| 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score. | |
| --- | |
| **Disclaimers:** | |
| Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions). | |
| This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async. | |
| """ | |
| ) | |
| gr.LoginButton() | |
| run_button = gr.Button("Run Evaluation & Submit All Answers") | |
| status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) | |
| # Removed max_rows=10 from DataFrame constructor | |
| results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) | |
| run_button.click( | |
| fn=run_and_submit_all, | |
| outputs=[status_output, results_table] | |
| ) | |
| if __name__ == "__main__": | |
| print("\n" + "-"*30 + " App Starting " + "-"*30) | |
| # Check for SPACE_HOST and SPACE_ID at startup for information | |
| space_host_startup = os.getenv("SPACE_HOST") | |
| print(space_host_startup) | |
| space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup | |
| print(space_id_startup) | |
| if space_host_startup: | |
| print(f"✅ SPACE_HOST found: {space_host_startup}") | |
| print(f" Runtime URL should be: https://{space_host_startup}.hf.space") | |
| else: | |
| print("ℹ️ SPACE_HOST environment variable not found (running locally?).") | |
| if space_id_startup: # Print repo URLs if SPACE_ID is found | |
| print(f"✅ SPACE_ID found: {space_id_startup}") | |
| print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") | |
| print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") | |
| else: | |
| print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.") | |
| print("-"*(60 + len(" App Starting ")) + "\n") | |
| print("Launching Gradio Interface for Basic Agent Evaluation...") | |
| demo.launch(debug=True, share=False) |