Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| from langchain_community.llms import HuggingFaceHub | |
| # from dotenv import load_dotenv # Uncomment for local testing with a .env file | |
| # For local testing, you might want to load environment variables from a .env file | |
| # (ensure .env is in .gitignore and HUGGINGFACEHUB_API_TOKEN is defined in it) | |
| # if os.path.exists(".env"): | |
| # load_dotenv() | |
| # --- Constants --- | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| # --- Basic Agent Definition -- (HuggingFaceHub Agent Activated) --- | |
| class BasicAgent: | |
| def __init__(self, hf_api_token: str | None = None): | |
| print("BasicAgent initializing with HuggingFaceHub...") | |
| token_to_use = hf_api_token or os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") | |
| if not token_to_use: | |
| raise ValueError( | |
| "Hugging Face API token not found. Please set HUGGINGFACEHUB_API_TOKEN or HF_TOKEN " | |
| "as a secret in your Hugging Face Space. This token is required for the LLM." | |
| ) | |
| self.llm_repo_id = "mistralai/Mistral-7B-Instruct-v0.1" | |
| # Other options: | |
| # self.llm_repo_id = "HuggingFaceH4/zephyr-7b-beta" | |
| # self.llm_repo_id = "google/gemma-7b-it" # Ensure you have access/agreed to terms | |
| try: | |
| self.llm = HuggingFaceHub( | |
| repo_id=self.llm_repo_id, | |
| task="text-generation", # Explicitly set the task for instruct models | |
| model_kwargs={ | |
| "temperature": 0.1, | |
| "max_new_tokens": 1024 # Increased slightly for potentially longer reasoning or verbosity | |
| }, | |
| huggingfacehub_api_token=token_to_use | |
| ) | |
| print(f"BasicAgent initialized with LLM: {self.llm_repo_id}") | |
| except Exception as e: | |
| print(f"Error initializing HuggingFaceHub: {e}") | |
| # Added more detail to the error message | |
| raise ValueError( | |
| f"Failed to initialize LLM ({self.llm_repo_id}): {e}. " | |
| "Check token, model repo_id, and ensure 'huggingface_hub>=0.20.2' is in requirements.txt." | |
| ) | |
| def __call__(self, question: str, task_id: str | None = None) -> str: | |
| print(f"Agent (HF) received question (Task ID: {task_id}, first 80 chars): {question[:80]}...") | |
| current_prompt = f"""You are a diligent and highly intelligent AI assistant. Your goal is to answer the given `Question` accurately and concisely. | |
| If the question requires multiple steps or information from tools, think step-by-step. | |
| **Available Tools (Conceptual - for your reasoning process, actual tool calls are not implemented in this version):** | |
| 1. **`GAIAFileLookup(filename: str) -> str`**: Retrieves file content. | |
| 2. **`Calculator(expression: str) -> str`**: Performs calculations. | |
| 3. **`LLM_Query(sub_question: str) -> str`**: For general knowledge. | |
| **Output Format Expectation:** | |
| While you might reason using a "Thought:", "Action:", "Observation:" cycle internally, for this specific task, your final output should be ONLY the direct answer to the question. | |
| Example: If asked "What is 2+2?", your output should be "4". | |
| **Key Guidelines for GAIA Submission:** | |
| 1. **Conciseness:** The final answer must be precise and directly address the question. | |
| 2. **No "FINAL ANSWER" Prefix in Submission:** Do NOT include "FINAL ANSWER:" or "The answer is:" in your actual response. Just the answer value. | |
| --- | |
| Now, please answer the following question: | |
| Question: {question} | |
| Answer:""" | |
| try: | |
| print(f"Sending to LLM (HF Hub) (first 200 chars of prompt): {current_prompt[:200]}...") | |
| response_text = self.llm.invoke(current_prompt) | |
| answer = response_text.strip() | |
| # Clean the answer | |
| # If the model includes the "Answer:" prompt in its response | |
| if "Answer:" in answer: | |
| answer = answer.split("Answer:")[-1].strip() | |
| common_prefixes_to_remove = [ | |
| "The answer is", "My answer is", "Based on the information", "The final answer is", | |
| "Here is the answer", "I found that", "It seems that" | |
| ] | |
| for prefix in common_prefixes_to_remove: | |
| if answer.lower().startswith(prefix.lower()): | |
| answer = answer[len(prefix):].strip() | |
| if answer.startswith(":") or answer.startswith("."): | |
| answer = answer[1:].strip() | |
| break | |
| # Remove "Final Answer:" if present (as per GAIA guidelines for submission) | |
| if "Final Answer:" in answer: | |
| answer = answer.split("Final Answer:")[-1].strip() | |
| print(f"Agent (HF) LLM raw response (first 80 chars): {response_text[:80]}...") | |
| print(f"Agent (HF) cleaned answer (first 80 chars): {answer[:80]}...") | |
| if not answer: | |
| print("Warning: Agent (HF) produced an empty answer after cleaning.") | |
| return "AGENT_ERROR: LLM produced an empty answer." | |
| return answer | |
| except Exception as e: | |
| print(f"Error during LLM call for question '{question[:50]}...': {e}") | |
| # Check if the error is the specific AttributeError again | |
| if isinstance(e, AttributeError) and "'InferenceClient' object has no attribute 'post'" in str(e): | |
| return (f"AGENT_ERROR: LLM call failed. ({type(e).__name__}: {str(e)}). " | |
| "This often indicates an issue with the 'huggingface_hub' library version. " | |
| "Please ensure 'huggingface-hub>=0.20.2' is in your requirements.txt.") | |
| return f"AGENT_ERROR: LLM call failed. ({type(e).__name__}: {str(e)})" | |
| # --- The rest of your Gradio app code (run_and_submit_all, UI blocks) remains the same --- | |
| # Make sure to copy the BasicAgent class above into your app.py | |
| def run_and_submit_all(profile: gr.OAuthProfile | None): | |
| """ | |
| Fetches all questions, runs the BasicAgent on them, submits all answers, | |
| and displays the results. | |
| """ | |
| space_id = os.getenv("SPACE_ID") | |
| if profile: | |
| username = f"{profile.username}" | |
| print(f"User logged in: {username}") | |
| else: | |
| # If running locally without login for testing, you can set a default username | |
| # For submission to the leaderboard, login is required. | |
| # username = "local-test-user" | |
| print("User not logged in.") | |
| return "Please Login to Hugging Face with the button to submit.", None | |
| api_url = DEFAULT_API_URL | |
| questions_url = f"{api_url}/questions" | |
| submit_url = f"{api_url}/submit" | |
| # 1. Instantiate Agent | |
| try: | |
| # Pass the HF token if available from secrets, or let the agent find it | |
| # No explicit token passing here as the agent handles os.getenv | |
| agent = BasicAgent() | |
| except Exception as e: | |
| print(f"Error instantiating agent: {e}") | |
| # Return the more detailed error from agent init if it fails | |
| return f"Error initializing agent: {str(e)}", None | |
| agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run_no_space_id" | |
| print(f"Agent code link: {agent_code}") | |
| # 2. Fetch Questions | |
| print(f"Fetching questions from: {questions_url}") | |
| try: | |
| response = requests.get(questions_url, timeout=20) | |
| response.raise_for_status() | |
| questions_data = response.json() | |
| if not questions_data: | |
| print("Fetched questions list is empty.") | |
| return "Fetched questions list is empty or invalid format.", None | |
| print(f"Fetched {len(questions_data)} questions.") | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error fetching questions: {e}") | |
| return f"Error fetching questions: {e}", None | |
| except requests.exceptions.JSONDecodeError as e: | |
| print(f"Error decoding JSON response from questions endpoint: {e}") | |
| print(f"Response text: {response.text[:500]}") # Log part of the response | |
| return f"Error decoding server response for questions: {e}", None | |
| except Exception as e: # Catch any other unexpected errors | |
| print(f"An unexpected error occurred fetching questions: {e}") | |
| return f"An unexpected error occurred fetching questions: {e}", None | |
| # 3. Run your Agent | |
| results_log = [] | |
| answers_payload = [] | |
| print(f"Running agent on {len(questions_data)} questions...") | |
| for i, item in enumerate(questions_data): | |
| task_id = item.get("task_id") | |
| question_text = item.get("question") | |
| if not task_id or question_text is None: # More robust check | |
| print(f"Skipping item with missing task_id or question: {item}") | |
| continue | |
| print(f"\nProcessing question {i+1}/{len(questions_data)}, Task ID: {task_id}") | |
| try: | |
| submitted_answer = agent(question_text, task_id=task_id) | |
| answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}) | |
| except Exception as e: # Catch errors from the agent call itself | |
| print(f"Error running agent on task {task_id}: {e}") | |
| error_answer = f"AGENT_RUNTIME_ERROR: {type(e).__name__}" | |
| answers_payload.append({"task_id": task_id, "submitted_answer": error_answer}) | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}) | |
| if not answers_payload: # Handle case where no answers were generated | |
| print("Agent did not produce any answers to submit.") | |
| return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) # Return empty df | |
| # 4. Prepare Submission | |
| submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload} | |
| status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..." | |
| print(status_update) | |
| # 5. Submit | |
| print(f"Submitting {len(answers_payload)} answers to: {submit_url}") | |
| try: | |
| # Increased timeout for submission as well, server might be busy | |
| response = requests.post(submit_url, json=submission_data, timeout=120) | |
| response.raise_for_status() | |
| result_data = response.json() | |
| final_status = ( | |
| f"Submission Successful!\n" | |
| f"User: {result_data.get('username')}\n" | |
| f"Overall Score: {result_data.get('score', 'N/A')}% " | |
| f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" | |
| f"Message: {result_data.get('message', 'No message received.')}" | |
| ) | |
| print("Submission successful.") | |
| results_df = pd.DataFrame(results_log) | |
| return final_status, results_df | |
| except requests.exceptions.HTTPError as e: | |
| error_detail = f"Server responded with status {e.response.status_code}." | |
| try: | |
| error_json = e.response.json() # Try to get JSON error detail | |
| error_detail += f" Detail: {error_json.get('detail', e.response.text)}" | |
| except requests.exceptions.JSONDecodeError: # If response is not JSON | |
| error_detail += f" Response: {e.response.text[:500]}" # Log first 500 chars | |
| status_message = f"Submission Failed: {error_detail}" | |
| print(status_message) | |
| results_df = pd.DataFrame(results_log) | |
| return status_message, results_df | |
| except requests.exceptions.Timeout: | |
| status_message = "Submission Failed: The request timed out." | |
| print(status_message) | |
| results_df = pd.DataFrame(results_log) | |
| return status_message, results_df | |
| except requests.exceptions.RequestException as e: # Catch other requests errors | |
| status_message = f"Submission Failed: Network error - {e}" | |
| print(status_message) | |
| results_df = pd.DataFrame(results_log) | |
| return status_message, results_df | |
| except Exception as e: # Catch any other unexpected errors during submission | |
| status_message = f"An unexpected error occurred during submission: {e}" | |
| print(status_message) | |
| results_df = pd.DataFrame(results_log) | |
| return status_message, results_df | |
| # --- Build Gradio Interface using Blocks --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Basic Agent Evaluation Runner") | |
| gr.Markdown( | |
| """ | |
| **Instructions:** | |
| 1. This Space uses a `BasicAgent` with an LLM from HuggingFace Hub. Ensure you have set your `HUGGINGFACEHUB_API_TOKEN` or `HF_TOKEN` in the Space secrets for the LLM to work. | |
| 2. **Crucial:** Ensure your `requirements.txt` file includes `huggingface-hub>=0.20.2` to prevent common LLM call errors. | |
| 3. Log in to your Hugging Face account using the button below. This uses your HF username for submission. | |
| 4. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score. | |
| --- | |
| **Disclaimers:** | |
| Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions using an LLM). | |
| This space provides a basic setup. For better GAIA scores, you might need to: | |
| - Choose a more powerful LLM (e.g., from the `llm_repo_id` options in `BasicAgent` or others). | |
| - Implement a proper ReAct loop with tool parsing and execution. | |
| - Implement actual tool usage (e.g., fetching files via `/files/{task_id}`, using a calculator, web search, vision models). The current agent is purely LLM-based and cannot use external tools or files. | |
| """ | |
| ) | |
| login_button = gr.LoginButton() | |
| run_button = gr.Button("Run Evaluation & Submit All Answers") | |
| status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) | |
| results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) | |
| run_button.click( | |
| fn=run_and_submit_all, | |
| # Gradio automatically passes gr.OAuthProfile if type-hinted and user is logged in | |
| outputs=[status_output, results_table] | |
| ) | |
| if __name__ == "__main__": | |
| print("\n" + "-"*30 + " App Starting " + "-"*30) | |
| space_host_startup = os.getenv("SPACE_HOST") | |
| space_id_startup = os.getenv("SPACE_ID") | |
| if space_host_startup: | |
| print(f"✅ SPACE_HOST found: {space_host_startup}") | |
| print(f" Runtime URL should be: https://{space_host_startup}.hf.space") | |
| else: | |
| print("ℹ️ SPACE_HOST environment variable not found (running locally?).") | |
| if space_id_startup: | |
| print(f"✅ SPACE_ID found: {space_id_startup}") | |
| print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") | |
| print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") | |
| else: | |
| print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.") | |
| if not (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")): | |
| print("⚠️ WARNING: HUGGINGFACEHUB_API_TOKEN or HF_TOKEN environment variable not found.") | |
| print(" The LLM agent will likely fail to initialize. Please set this token in your Space secrets.") | |
| else: | |
| print("✅ HUGGINGFACEHUB_API_TOKEN or HF_TOKEN found (or assumed to be set).") | |
| # Check for huggingface_hub version at startup (informative, actual check is in requirements.txt) | |
| try: | |
| import huggingface_hub | |
| print(f"✅ Found huggingface_hub version: {huggingface_hub.__version__}") | |
| if tuple(map(int, huggingface_hub.__version__.split('.')[:3])) < (0, 20, 2): | |
| print("⚠️ WARNING: Your huggingface_hub version is older than 0.20.2. " | |
| "This might lead to errors. Please update it in requirements.txt to 'huggingface-hub>=0.20.2'.") | |
| except ImportError: | |
| print("⚠️ WARNING: huggingface_hub library not found. Please add it to requirements.txt.") | |
| except Exception as e: | |
| print(f"ℹ️ Could not determine huggingface_hub version: {e}") | |
| print("-"*(60 + len(" App Starting ")) + "\n") | |
| print("Launching Gradio Interface for Basic Agent Evaluation...") | |
| demo.launch(debug=True, share=False) # debug=True can be helpful for local dev |