Final_Assignment_Template

Configuration error

App Files Files Community

Deltacorvi commited on Jun 7, 2025

Commit

ec342d2

verified ·

1 Parent(s): aa12172

Upload 3 files

Browse files

Files changed (3) hide show

agent_utilities.py +79 -0
app.py +313 -0
requirements.txt +16 -0

agent_utilities.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from smolagents import PythonInterpreterTool, tool
+import requests
+import json
+@tool
+def TextInverterTool(input_string: str) -> str:
+    """
+    Inverts the order of characters in a given text string.
+    Args:
+        input_string: Text string to be inverted
+    Returns:
+        str: Character-reversed version of the input text
+    """
+    return input_string[::-1]
+@tool
+def PythonScriptExecutor(script_location: str) -> str:
+    """
+    Loads and executes Python code from a specified file path using interpreter tools.
+    Args:
+        script_location: Complete file system path to the Python script (.py extension)
+    Returns:
+        str: Execution results or error description if the operation fails
+    """
+    try:
+        # Read the Python file content
+        with open(script_location, "r", encoding='utf-8') as file_handle:
+            python_code = file_handle.read()
+        # Initialize interpreter and execute
+        code_interpreter = PythonInterpreterTool()
+        execution_result = code_interpreter.run({"code": python_code})
+        return execution_result.get("output", "Execution completed without output.")
+    except FileNotFoundError:
+        return f"File not found: {script_location}"
+    except Exception as error:
+        return f"Script execution error: {str(error)}"
+@tool
+def WebFileDownloader(source_url: str, destination_path: str) -> str:
+    """
+    Retrieves a file from a web URL and stores it locally at the specified path.
+    Args:
+        source_url: Web address of the file to download
+        destination_path: Local filesystem path for saving the downloaded content
+    Returns:
+        str: Status message describing the download operation result
+    """
+    try:
+        # Configure request with headers and timeout
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+        }
+        web_response = requests.get(source_url, headers=headers, timeout=45)
+        web_response.raise_for_status()
+        # Save file content to destination
+        with open(destination_path, "wb") as output_file:
+            output_file.write(web_response.content)
+        file_size = len(web_response.content)
+        return f"Successfully downloaded {file_size} bytes to {destination_path}"
+    except requests.exceptions.RequestException as req_error:
+        return f"Download request failed: {str(req_error)}"
+    except IOError as io_error:
+        return f"File save operation failed: {str(io_error)}"
+    except Exception as general_error:
+        return f"Unexpected download error: {str(general_error)}"

app.py ADDED Viewed

	@@ -0,0 +1,313 @@

+import os
+import sys
+import gradio as gr
+import requests
+import pandas as pd
+import logging
+from datetime import datetime
+from typing import Optional, Dict, List, Any
+from smolagents import LiteLLMModel, CodeAgent, DuckDuckGoSearchTool
+from agent_utilities import TextInverterTool, PythonScriptExecutor, WebFileDownloader
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Enhanced system prompt with detailed instructions
+AGENT_SYSTEM_INSTRUCTIONS = """You are an advanced AI assistant designed to solve complex problems systematically.
+When presented with a question, analyze it thoroughly and provide a comprehensive response.
+Your final answer should be concise and direct - provide just the essential information requested.
+- For numerical answers: provide only the number without currency symbols, percentages, or formatting unless explicitly required
+- For text answers: use minimal words, avoid articles, write numbers as digits unless instructed otherwise
+- For lists: use comma-separated format without additional formatting
+Strategic Tool Usage:
+1. **Exclusive Tool Usage**: Only use the tools provided in your toolkit - no external tools or libraries
+2. **Sequential Processing**: Execute one tool operation per step for clear reasoning
+3. **Python Execution Priority**: When questions involve .py files or Python scripts, use PythonScriptExecutor immediately
+4. **Text Decoding**: If input appears reversed or encoded (begins with punctuation, reads backwards), apply TextInverterTool first
+5. **File Operations**: For downloading requirements, always use WebFileDownloader with appropriate paths
+6. **Logical Problem Solving**: Handle puzzles and logic problems directly unless they require text reversal
+7. **Persistent Problem Solving**: If initial approaches fail, iterate with alternative strategies using available tools
+8. **Search Optimization**: Keep web searches focused and concise due to context limitations
+Remember: Every problem has a solution - explore different approaches if needed.
+"""
+# Configuration constants
+API_ENDPOINT_BASE = "https://agents-course-unit4-scoring.hf.space"
+GEMINI_MODEL_ID = "gemini/gemini-2.0-flash-lite"
+class EnhancedAIAgent:
+    """Enhanced AI agent wrapper with improved error handling and logging"""
+    def __init__(self):
+        self._initialize_model()
+        self._setup_agent()
+        logger.info("Enhanced AI Agent initialized successfully")
+    def _initialize_model(self):
+        """Initialize the LiteLLM model with Gemini configuration"""
+        gemini_key = os.getenv("GEMINI_API_KEY")
+        if not gemini_key:
+            error_msg = "GEMINI_API_KEY environment variable is required but not found"
+            logger.error(error_msg)
+            raise EnvironmentError(error_msg)
+        try:
+            self.llm_model = LiteLLMModel(
+                model_id=GEMINI_MODEL_ID,
+                api_key=gemini_key,
+                system_prompt=AGENT_SYSTEM_INSTRUCTIONS
+            )
+            logger.info(f"LiteLLM model configured with {GEMINI_MODEL_ID}")
+        except Exception as e:
+            logger.error(f"Model initialization failed: {str(e)}")
+            raise
+    def _setup_agent(self):
+        """Configure the code agent with available tools"""
+        tool_collection = [
+            DuckDuckGoSearchTool(),
+            TextInverterTool,
+            PythonScriptExecutor,
+            WebFileDownloader
+        ]
+        try:
+            self.ai_agent = CodeAgent(
+                tools=tool_collection,
+                model=self.llm_model,
+                add_base_tools=True,
+            )
+            logger.info(f"Code agent configured with {len(tool_collection)} custom tools")
+        except Exception as e:
+            logger.error(f"Agent setup failed: {str(e)}")
+            raise
+    def process_query(self, query_text: str) -> str:
+        """Process a query and return the agent's response"""
+        try:
+            logger.info(f"Processing query: {query_text[:100]}...")
+            response = self.ai_agent.run(query_text)
+            logger.info("Query processed successfully")
+            return response
+        except Exception as e:
+            error_response = f"Query processing error: {str(e)}"
+            logger.error(error_response)
+            return error_response
+def execute_evaluation_workflow(user_profile: Optional[gr.OAuthProfile]) -> tuple[str, Optional[pd.DataFrame]]:
+    """Main evaluation workflow function"""
+    # Verify user authentication
+    if not user_profile:
+        logger.warning("Evaluation attempted without user authentication")
+        return "Authentication required - please log in to Hugging Face first.", None
+    username = user_profile.username
+    space_identifier = os.getenv("SPACE_ID")
+    logger.info(f"Starting evaluation workflow for user: {username}")
+    # API endpoint configuration
+    questions_endpoint = f"{API_ENDPOINT_BASE}/questions"
+    submission_endpoint = f"{API_ENDPOINT_BASE}/submit"
+    # Initialize AI agent
+    try:
+        ai_agent = EnhancedAIAgent()
+        logger.info("AI agent initialized for evaluation")
+    except Exception as initialization_error:
+        error_message = f"Agent initialization error: {str(initialization_error)}"
+        logger.error(error_message)
+        return error_message, None
+    # Retrieve evaluation questions
+    try:
+        logger.info("Fetching evaluation questions...")
+        questions_response = requests.get(questions_endpoint, timeout=20)
+        questions_response.raise_for_status()
+        questions_dataset = questions_response.json()
+        logger.info(f"Retrieved {len(questions_dataset)} evaluation questions")
+    except Exception as fetch_error:
+        error_message = f"Questions retrieval error: {str(fetch_error)}"
+        logger.error(error_message)
+        return error_message, None
+    # Process each question
+    evaluation_log = []
+    submission_answers = []
+    for idx, question_item in enumerate(questions_dataset, 1):
+        task_identifier = question_item.get("task_id")
+        question_content = question_item.get("question")
+        if not task_identifier or question_content is None:
+            logger.warning(f"Skipping invalid question item at index {idx}")
+            continue
+        logger.info(f"Processing question {idx}/{len(questions_dataset)}: {task_identifier}")
+        try:
+            agent_response = ai_agent.process_query(question_content)
+            # Store results
+            submission_answers.append({
+                "task_id": task_identifier,
+                "submitted_answer": agent_response
+            })
+            evaluation_log.append({
+                "Task ID": task_identifier,
+                "Question": question_content,
+                "Agent Response": agent_response,
+                "Status": "Success"
+            })
+            logger.info(f"Question {task_identifier} processed successfully")
+        except Exception as processing_error:
+            error_response = f"PROCESSING_ERROR: {str(processing_error)}"
+            evaluation_log.append({
+                "Task ID": task_identifier,
+                "Question": question_content,
+                "Agent Response": error_response,
+                "Status": "Failed"
+            })
+            logger.error(f"Failed to process question {task_identifier}: {str(processing_error)}")
+    # Validate submission data
+    if not submission_answers:
+        logger.warning("No valid answers generated for submission")
+        return "No answers were generated by the agent.", pd.DataFrame(evaluation_log)
+    # Prepare submission payload
+    submission_payload = {
+        "username": username.strip(),
+        "agent_code": f"https://huggingface.co/spaces/{space_identifier}/tree/main",
+        "answers": submission_answers
+    }
+    # Submit answers for evaluation
+    try:
+        logger.info("Submitting answers for evaluation...")
+        submission_response = requests.post(
+            submission_endpoint,
+            json=submission_payload,
+            timeout=90
+        )
+        submission_response.raise_for_status()
+        result_data = submission_response.json()
+        # Format success response
+        success_message = (
+            f"🎉 Evaluation Completed Successfully!\n"
+            f"👤 User: {result_data.get('username', 'Unknown')}\n"
+            f"📊 Final Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"💬 System Message: {result_data.get('message', 'No additional information.')}\n"
+            f"⏰ Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+        )
+        logger.info(f"Submission successful - Score: {result_data.get('score', 'N/A')}%")
+        return success_message, pd.DataFrame(evaluation_log)
+    except Exception as submission_error:
+        error_message = f"Answer submission failed: {str(submission_error)}"
+        logger.error(error_message)
+        return error_message, pd.DataFrame(evaluation_log)
+# Gradio interface configuration
+def create_gradio_interface():
+    """Create and configure the Gradio web interface"""
+    interface_theme = gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="slate",
+    )
+    with gr.Blocks(theme=interface_theme, title="AI Agent Evaluation Platform") as interface:
+        # Header section
+        gr.Markdown("""
+        # 🤖 Advanced AI Agent Evaluation Platform
+        **Welcome to the comprehensive AI agent testing environment!**
+        ### Getting Started:
+        1. 🔑 **Setup**: Clone this space and configure your Gemini API key in the environment
+        2. 🔐 **Authentication**: Log in using your Hugging Face account credentials
+        3. 🚀 **Execute**: Run the complete evaluation suite and submit your results
+        4. 📈 **Review**: Analyze performance metrics and detailed response logs
+        """)
+        # Authentication section
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### 🔐 Authentication")
+                auth_button = gr.LoginButton(value="Connect to Hugging Face")
+            with gr.Column(scale=2):
+                gr.Markdown("### 📋 Evaluation Status")
+                status_display = gr.Textbox(
+                    label="Current Status",
+                    lines=6,
+                    interactive=False,
+                    placeholder="Ready to begin evaluation..."
+                )
+        # Control section
+        gr.Markdown("### 🎯 Evaluation Controls")
+        with gr.Row():
+            execute_button = gr.Button(
+                "🚀 Start Complete Evaluation",
+                variant="primary",
+                size="lg"
+            )
+        # Results section
+        gr.Markdown("### 📊 Detailed Results")
+        results_dataframe = gr.DataFrame(
+            label="Evaluation Results",
+            wrap=True
+        )
+        # Footer
+        gr.Markdown("""
+        ---
+        **Note**: This platform uses Gemini 2.0 Flash Lite for AI processing.
+        Ensure your API key has sufficient quota for evaluation tasks.
+        """)
+        # Event handlers
+        execute_button.click(
+            fn=execute_evaluation_workflow,
+            inputs=[],
+            outputs=[status_display, results_dataframe]
+        )
+    return interface
+# Application entry point
+def main():
+    """Main application entry point"""
+    print("🚀 Initializing Advanced AI Agent Evaluation Platform...")
+    print(f"⏰ Startup Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    try:
+        interface = create_gradio_interface()
+        print("✅ Interface created successfully")
+        interface.launch(
+            debug=True,
+            share=False,
+            show_error=True
+        )
+    except Exception as e:
+        logger.error(f"Application startup failed: {str(e)}")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+gradio
+smolagents
+requests
+pandas
+litellm
+duckduckgo-search
+typing-extensions
+python-dotenv
+numpy
+matplotlib
+seaborn
+plotly
+openpyxl
+xlsxwriter