import os import gradio as gr import requests import inspect import pandas as pd import json from typing import Dict, List, Optional, Any # (Keep Constants as is) # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # --- Enhanced Agent Definition --- # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------ class GIAIAAgent: """ Agent designed to answer GIAIA questions. Modify this class to implement your own logic for answering questions. """ def __init__(self): """Initialize your agent with any necessary tools, models, or resources.""" print("GIAIA Agent initialized.") # TODO: Initialize your tools, models, or APIs here # Example: # self.model = load_your_model() # self.tools = load_your_tools() # You can store a cache of answers if needed self.answer_cache = {} def __call__(self, question: str) -> str: """ Process a question and return an answer. Args: question: The question text to answer Returns: The answer as a string """ print(f"Processing question (first 100 chars): {question[:100]}...") # TODO: Implement your actual question-answering logic here # This is where you should put your agent's intelligence # For now, let's do some basic processing to show the structure try: # You might want to: # 1. Parse the question # 2. Use tools to gather information # 3. Process with a model # 4. Format the answer # Example structure (replace with your actual logic): answer = self._generate_answer(question) print(f"Generated answer: {answer[:50]}...") return answer except Exception as e: print(f"Error processing question: {e}") return f"Error generating answer: {str(e)}" def _generate_answer(self, question: str) -> str: """ Internal method to generate answers. Replace this with your actual implementation. This is a placeholder - you should implement your own logic! """ # TODO: IMPLEMENT YOUR ACTUAL ANSWER GENERATION LOGIC HERE # # Some ideas: # - Use a language model via API # - Use retrieval augmented generation # - Use web search tools # - Use a knowledge base # - Implement specific logic for each type of question # For demonstration, I'll categorize questions based on keywords # BUT YOU SHOULD REPLACE THIS WITH YOUR ACTUAL IMPLEMENTATION question_lower = question.lower() # This is just a simple example - REPLACE WITH REAL LOGIC! if "what is" in question_lower: return f"Based on the context, {question.replace('What is', '').strip()} refers to a concept in the field." elif "how to" in question_lower: return f"To {question.replace('How to', '').strip()}, you should follow these steps: [Your solution here]" elif "explain" in question_lower: return f"Here's an explanation of {question.replace('Explain', '').strip()}: [Your explanation here]" elif "difference between" in question_lower: return f"The main differences are: [Your comparison here]" else: # For questions without clear keywords, you might want to use a default approach return f"Answer: [Your answer for: {question[:50]}...]" def batch_answer(self, questions: List[str]) -> List[str]: """ Optional: Process multiple questions at once for efficiency. Args: questions: List of question strings Returns: List of answer strings """ answers = [] for question in questions: answers.append(self(question)) return answers def run_and_submit_all(profile: gr.OAuthProfile | None): """ Fetches all questions, runs the GIAIAAgent on them, submits all answers, and displays the results. """ # --- Determine HF Space Runtime URL and Repo URL --- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code if profile: username = f"{profile.username}" print(f"User logged in: {username}") else: print("User not logged in.") return "Please Login to Hugging Face with the button.", None api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" # 1. Instantiate Agent (modify this part to create your agent) try: # Use the enhanced GIAIA agent instead of BasicAgent agent = GIAIAAgent() print("Agent instantiated successfully") except Exception as e: print(f"Error instantiating agent: {e}") return f"Error initializing agent: {e}", None # In the case of an app running as a Hugging Face space, this link points toward your codebase agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local development" print(f"Agent code URL: {agent_code}") # 2. Fetch Questions print(f"Fetching questions from: {questions_url}") try: response = requests.get(questions_url, timeout=15) response.raise_for_status() questions_data = response.json() if not questions_data: print("Fetched questions list is empty.") return "Fetched questions list is empty or invalid format.", None print(f"Fetched {len(questions_data)} questions.") # Optional: Display the first few questions to see what we're dealing with print("\n--- First 3 questions (preview) ---") for i, item in enumerate(questions_data[:3]): print(f"Q{i+1}: {item.get('question', 'No question')[:100]}...") print("--- End preview ---\n") except requests.exceptions.RequestException as e: print(f"Error fetching questions: {e}") return f"Error fetching questions: {e}", None except requests.exceptions.JSONDecodeError as e: print(f"Error decoding JSON response from questions endpoint: {e}") print(f"Response text: {response.text[:500]}") return f"Error decoding server response for questions: {e}", None except Exception as e: print(f"An unexpected error occurred fetching questions: {e}") return f"An unexpected error occurred fetching questions: {e}", None # 3. Run your Agent on all questions results_log = [] answers_payload = [] print(f"\nRunning GIAIA agent on {len(questions_data)} questions...") print("This may take a while depending on your implementation...") # Process questions one by one (or in batches if you implement batch_answer) for i, item in enumerate(questions_data): task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: print(f"Skipping item with missing task_id or question: {item}") continue print(f"Processing question {i+1}/{len(questions_data)} (Task ID: {task_id})") try: # Run your agent on the question submitted_answer = agent(question_text) # Add to payload for submission answers_payload.append({ "task_id": task_id, "submitted_answer": submitted_answer }) # Log for display results_log.append({ "Task ID": task_id, "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, "Submitted Answer": submitted_answer[:100] + "..." if len(submitted_answer) > 100 else submitted_answer }) print(f"✓ Question {i+1} answered") except Exception as e: print(f"✗ Error running agent on task {task_id}: {e}") results_log.append({ "Task ID": task_id, "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, "Submitted Answer": f"AGENT ERROR: {str(e)}" }) if not answers_payload: print("Agent did not produce any answers to submit.") return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) # 4. Prepare Submission submission_data = { "username": username.strip(), "agent_code": agent_code, "answers": answers_payload } status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..." print(status_update) # 5. Submit answers to scoring server print(f"Submitting {len(answers_payload)} answers to: {submit_url}") try: response = requests.post(submit_url, json=submission_data, timeout=60) response.raise_for_status() result_data = response.json() final_status = ( f"Submission Successful!\n" f"User: {result_data.get('username')}\n" f"Overall Score: {result_data.get('score', 'N/A')}% " f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" f"Message: {result_data.get('message', 'No message received.')}" ) print("Submission successful.") print(f"Score: {result_data.get('score', 'N/A')}%") # Create full results DataFrame with complete answers for download full_results_log = [] for i, item in enumerate(questions_data): if i < len(answers_payload): full_results_log.append({ "Task ID": item.get("task_id"), "Question": item.get("question"), "Submitted Answer": answers_payload[i].get("submitted_answer") }) results_df = pd.DataFrame(full_results_log if full_results_log else results_log) return final_status, results_df except requests.exceptions.HTTPError as e: error_detail = f"Server responded with status {e.response.status_code}." try: error_json = e.response.json() error_detail += f" Detail: {error_json.get('detail', e.response.text)}" except requests.exceptions.JSONDecodeError: error_detail += f" Response: {e.response.text[:500]}" status_message = f"Submission Failed: {error_detail}" print(status_message) results_df = pd.DataFrame(results_log) return status_message, results_df except requests.exceptions.Timeout: status_message = "Submission Failed: The request timed out." print(status_message) results_df = pd.DataFrame(results_log) return status_message, results_df except requests.exceptions.RequestException as e: status_message = f"Submission Failed: Network error - {e}" print(status_message) results_df = pd.DataFrame(results_log) return status_message, results_df except Exception as e: status_message = f"An unexpected error occurred during submission: {e}" print(status_message) results_df = pd.DataFrame(results_log) return status_message, results_df # --- Build Gradio Interface using Blocks --- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# GIAIA Agent Evaluation Runner") gr.Markdown( """ **Welcome to the GIAIA Agent Evaluation!** This space evaluates your agent on 20 GIAIA questions. **Instructions:** 1. **Fork/Clone** this space to your own account 2. **Modify the `GIAIAAgent` class** in `app.py` to implement your agent's logic 3. Add any required **dependencies** to `requirements.txt` 4. Log in with your Hugging Face account below 5. Click 'Run Evaluation' to test your agent on all 20 questions 6. View your score and detailed results **Tips for Implementation:** - The agent will be called once for each question - You can add tools, use APIs, or implement any logic you want - Consider performance - all 20 questions will be processed sequentially - You can implement caching if needed **Disclaimers:** - This evaluation may take some time depending on your implementation - Make sure to keep your space public so others can see your solution """ ) with gr.Row(): with gr.Column(scale=1): gr.LoginButton() with gr.Column(scale=2): run_button = gr.Button("🚀 Run Evaluation on 20 Questions", variant="primary", size="lg") with gr.Row(): with gr.Column(): status_output = gr.Textbox( label="Run Status / Submission Result", lines=6, interactive=False, placeholder="Status will appear here..." ) with gr.Row(): with gr.Column(): results_table = gr.DataFrame( label="Questions and Agent Answers (Preview)", wrap=True, height=400 ) with gr.Row(): with gr.Column(): gr.Markdown( """ --- **Need Help?** - Check the [documentation](https://huggingface.co/docs) - Modify the `GIAIAAgent._generate_answer` method with your logic - Add any required packages to `requirements.txt` """ ) run_button.click( fn=run_and_submit_all, outputs=[status_output, results_table] ) if __name__ == "__main__": print("\n" + "="*70) print(" GIAIA Agent Evaluation App Starting") print("="*70) # Check for SPACE_HOST and SPACE_ID at startup space_host_startup = os.getenv("SPACE_HOST") space_id_startup = os.getenv("SPACE_ID") if space_host_startup: print(f"✅ SPACE_HOST found: {space_host_startup}") print(f" Runtime URL: https://{space_host_startup}.hf.space") else: print("â„šī¸ SPACE_HOST not found (running locally)") if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup}") print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") else: print("â„šī¸ SPACE_ID not found (running locally)") print("="*70 + "\n") print("Launching Gradio Interface...") print("NOTE: The agent in this template uses placeholder logic.") print("You MUST modify the GIAIAAgent class to implement actual answers!") print("-"*70 + "\n") demo.launch(debug=True, share=False)