Deltacorvi commited on
Commit
ec342d2
Β·
verified Β·
1 Parent(s): aa12172

Upload 3 files

Browse files
Files changed (3) hide show
  1. agent_utilities.py +79 -0
  2. app.py +313 -0
  3. requirements.txt +16 -0
agent_utilities.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import PythonInterpreterTool, tool
2
+ import requests
3
+ import json
4
+
5
+ @tool
6
+ def TextInverterTool(input_string: str) -> str:
7
+ """
8
+ Inverts the order of characters in a given text string.
9
+
10
+ Args:
11
+ input_string: Text string to be inverted
12
+
13
+ Returns:
14
+ str: Character-reversed version of the input text
15
+ """
16
+ return input_string[::-1]
17
+
18
+
19
+ @tool
20
+ def PythonScriptExecutor(script_location: str) -> str:
21
+ """
22
+ Loads and executes Python code from a specified file path using interpreter tools.
23
+
24
+ Args:
25
+ script_location: Complete file system path to the Python script (.py extension)
26
+
27
+ Returns:
28
+ str: Execution results or error description if the operation fails
29
+ """
30
+ try:
31
+ # Read the Python file content
32
+ with open(script_location, "r", encoding='utf-8') as file_handle:
33
+ python_code = file_handle.read()
34
+
35
+ # Initialize interpreter and execute
36
+ code_interpreter = PythonInterpreterTool()
37
+ execution_result = code_interpreter.run({"code": python_code})
38
+
39
+ return execution_result.get("output", "Execution completed without output.")
40
+ except FileNotFoundError:
41
+ return f"File not found: {script_location}"
42
+ except Exception as error:
43
+ return f"Script execution error: {str(error)}"
44
+
45
+
46
+ @tool
47
+ def WebFileDownloader(source_url: str, destination_path: str) -> str:
48
+ """
49
+ Retrieves a file from a web URL and stores it locally at the specified path.
50
+
51
+ Args:
52
+ source_url: Web address of the file to download
53
+ destination_path: Local filesystem path for saving the downloaded content
54
+
55
+ Returns:
56
+ str: Status message describing the download operation result
57
+ """
58
+ try:
59
+ # Configure request with headers and timeout
60
+ headers = {
61
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
62
+ }
63
+
64
+ web_response = requests.get(source_url, headers=headers, timeout=45)
65
+ web_response.raise_for_status()
66
+
67
+ # Save file content to destination
68
+ with open(destination_path, "wb") as output_file:
69
+ output_file.write(web_response.content)
70
+
71
+ file_size = len(web_response.content)
72
+ return f"Successfully downloaded {file_size} bytes to {destination_path}"
73
+
74
+ except requests.exceptions.RequestException as req_error:
75
+ return f"Download request failed: {str(req_error)}"
76
+ except IOError as io_error:
77
+ return f"File save operation failed: {str(io_error)}"
78
+ except Exception as general_error:
79
+ return f"Unexpected download error: {str(general_error)}"
app.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+ import requests
5
+ import pandas as pd
6
+ import logging
7
+ from datetime import datetime
8
+ from typing import Optional, Dict, List, Any
9
+
10
+ from smolagents import LiteLLMModel, CodeAgent, DuckDuckGoSearchTool
11
+ from agent_utilities import TextInverterTool, PythonScriptExecutor, WebFileDownloader
12
+
13
+ # Configure logging
14
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Enhanced system prompt with detailed instructions
18
+ AGENT_SYSTEM_INSTRUCTIONS = """You are an advanced AI assistant designed to solve complex problems systematically.
19
+ When presented with a question, analyze it thoroughly and provide a comprehensive response.
20
+
21
+ Your final answer should be concise and direct - provide just the essential information requested.
22
+ - For numerical answers: provide only the number without currency symbols, percentages, or formatting unless explicitly required
23
+ - For text answers: use minimal words, avoid articles, write numbers as digits unless instructed otherwise
24
+ - For lists: use comma-separated format without additional formatting
25
+
26
+ Strategic Tool Usage:
27
+ 1. **Exclusive Tool Usage**: Only use the tools provided in your toolkit - no external tools or libraries
28
+ 2. **Sequential Processing**: Execute one tool operation per step for clear reasoning
29
+ 3. **Python Execution Priority**: When questions involve .py files or Python scripts, use PythonScriptExecutor immediately
30
+ 4. **Text Decoding**: If input appears reversed or encoded (begins with punctuation, reads backwards), apply TextInverterTool first
31
+ 5. **File Operations**: For downloading requirements, always use WebFileDownloader with appropriate paths
32
+ 6. **Logical Problem Solving**: Handle puzzles and logic problems directly unless they require text reversal
33
+ 7. **Persistent Problem Solving**: If initial approaches fail, iterate with alternative strategies using available tools
34
+ 8. **Search Optimization**: Keep web searches focused and concise due to context limitations
35
+
36
+ Remember: Every problem has a solution - explore different approaches if needed.
37
+ """
38
+
39
+ # Configuration constants
40
+ API_ENDPOINT_BASE = "https://agents-course-unit4-scoring.hf.space"
41
+ GEMINI_MODEL_ID = "gemini/gemini-2.0-flash-lite"
42
+
43
+ class EnhancedAIAgent:
44
+ """Enhanced AI agent wrapper with improved error handling and logging"""
45
+
46
+ def __init__(self):
47
+ self._initialize_model()
48
+ self._setup_agent()
49
+ logger.info("Enhanced AI Agent initialized successfully")
50
+
51
+ def _initialize_model(self):
52
+ """Initialize the LiteLLM model with Gemini configuration"""
53
+ gemini_key = os.getenv("GEMINI_API_KEY")
54
+ if not gemini_key:
55
+ error_msg = "GEMINI_API_KEY environment variable is required but not found"
56
+ logger.error(error_msg)
57
+ raise EnvironmentError(error_msg)
58
+
59
+ try:
60
+ self.llm_model = LiteLLMModel(
61
+ model_id=GEMINI_MODEL_ID,
62
+ api_key=gemini_key,
63
+ system_prompt=AGENT_SYSTEM_INSTRUCTIONS
64
+ )
65
+ logger.info(f"LiteLLM model configured with {GEMINI_MODEL_ID}")
66
+ except Exception as e:
67
+ logger.error(f"Model initialization failed: {str(e)}")
68
+ raise
69
+
70
+ def _setup_agent(self):
71
+ """Configure the code agent with available tools"""
72
+ tool_collection = [
73
+ DuckDuckGoSearchTool(),
74
+ TextInverterTool,
75
+ PythonScriptExecutor,
76
+ WebFileDownloader
77
+ ]
78
+
79
+ try:
80
+ self.ai_agent = CodeAgent(
81
+ tools=tool_collection,
82
+ model=self.llm_model,
83
+ add_base_tools=True,
84
+ )
85
+ logger.info(f"Code agent configured with {len(tool_collection)} custom tools")
86
+ except Exception as e:
87
+ logger.error(f"Agent setup failed: {str(e)}")
88
+ raise
89
+
90
+ def process_query(self, query_text: str) -> str:
91
+ """Process a query and return the agent's response"""
92
+ try:
93
+ logger.info(f"Processing query: {query_text[:100]}...")
94
+ response = self.ai_agent.run(query_text)
95
+ logger.info("Query processed successfully")
96
+ return response
97
+ except Exception as e:
98
+ error_response = f"Query processing error: {str(e)}"
99
+ logger.error(error_response)
100
+ return error_response
101
+
102
+ def execute_evaluation_workflow(user_profile: Optional[gr.OAuthProfile]) -> tuple[str, Optional[pd.DataFrame]]:
103
+ """Main evaluation workflow function"""
104
+
105
+ # Verify user authentication
106
+ if not user_profile:
107
+ logger.warning("Evaluation attempted without user authentication")
108
+ return "Authentication required - please log in to Hugging Face first.", None
109
+
110
+ username = user_profile.username
111
+ space_identifier = os.getenv("SPACE_ID")
112
+ logger.info(f"Starting evaluation workflow for user: {username}")
113
+
114
+ # API endpoint configuration
115
+ questions_endpoint = f"{API_ENDPOINT_BASE}/questions"
116
+ submission_endpoint = f"{API_ENDPOINT_BASE}/submit"
117
+
118
+ # Initialize AI agent
119
+ try:
120
+ ai_agent = EnhancedAIAgent()
121
+ logger.info("AI agent initialized for evaluation")
122
+ except Exception as initialization_error:
123
+ error_message = f"Agent initialization error: {str(initialization_error)}"
124
+ logger.error(error_message)
125
+ return error_message, None
126
+
127
+ # Retrieve evaluation questions
128
+ try:
129
+ logger.info("Fetching evaluation questions...")
130
+ questions_response = requests.get(questions_endpoint, timeout=20)
131
+ questions_response.raise_for_status()
132
+ questions_dataset = questions_response.json()
133
+ logger.info(f"Retrieved {len(questions_dataset)} evaluation questions")
134
+ except Exception as fetch_error:
135
+ error_message = f"Questions retrieval error: {str(fetch_error)}"
136
+ logger.error(error_message)
137
+ return error_message, None
138
+
139
+ # Process each question
140
+ evaluation_log = []
141
+ submission_answers = []
142
+
143
+ for idx, question_item in enumerate(questions_dataset, 1):
144
+ task_identifier = question_item.get("task_id")
145
+ question_content = question_item.get("question")
146
+
147
+ if not task_identifier or question_content is None:
148
+ logger.warning(f"Skipping invalid question item at index {idx}")
149
+ continue
150
+
151
+ logger.info(f"Processing question {idx}/{len(questions_dataset)}: {task_identifier}")
152
+
153
+ try:
154
+ agent_response = ai_agent.process_query(question_content)
155
+
156
+ # Store results
157
+ submission_answers.append({
158
+ "task_id": task_identifier,
159
+ "submitted_answer": agent_response
160
+ })
161
+
162
+ evaluation_log.append({
163
+ "Task ID": task_identifier,
164
+ "Question": question_content,
165
+ "Agent Response": agent_response,
166
+ "Status": "Success"
167
+ })
168
+
169
+ logger.info(f"Question {task_identifier} processed successfully")
170
+
171
+ except Exception as processing_error:
172
+ error_response = f"PROCESSING_ERROR: {str(processing_error)}"
173
+ evaluation_log.append({
174
+ "Task ID": task_identifier,
175
+ "Question": question_content,
176
+ "Agent Response": error_response,
177
+ "Status": "Failed"
178
+ })
179
+ logger.error(f"Failed to process question {task_identifier}: {str(processing_error)}")
180
+
181
+ # Validate submission data
182
+ if not submission_answers:
183
+ logger.warning("No valid answers generated for submission")
184
+ return "No answers were generated by the agent.", pd.DataFrame(evaluation_log)
185
+
186
+ # Prepare submission payload
187
+ submission_payload = {
188
+ "username": username.strip(),
189
+ "agent_code": f"https://huggingface.co/spaces/{space_identifier}/tree/main",
190
+ "answers": submission_answers
191
+ }
192
+
193
+ # Submit answers for evaluation
194
+ try:
195
+ logger.info("Submitting answers for evaluation...")
196
+ submission_response = requests.post(
197
+ submission_endpoint,
198
+ json=submission_payload,
199
+ timeout=90
200
+ )
201
+ submission_response.raise_for_status()
202
+ result_data = submission_response.json()
203
+
204
+ # Format success response
205
+ success_message = (
206
+ f"πŸŽ‰ Evaluation Completed Successfully!\n"
207
+ f"πŸ‘€ User: {result_data.get('username', 'Unknown')}\n"
208
+ f"πŸ“Š Final Score: {result_data.get('score', 'N/A')}% "
209
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
210
+ f"πŸ’¬ System Message: {result_data.get('message', 'No additional information.')}\n"
211
+ f"⏰ Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
212
+ )
213
+
214
+ logger.info(f"Submission successful - Score: {result_data.get('score', 'N/A')}%")
215
+ return success_message, pd.DataFrame(evaluation_log)
216
+
217
+ except Exception as submission_error:
218
+ error_message = f"Answer submission failed: {str(submission_error)}"
219
+ logger.error(error_message)
220
+ return error_message, pd.DataFrame(evaluation_log)
221
+
222
+ # Gradio interface configuration
223
+ def create_gradio_interface():
224
+ """Create and configure the Gradio web interface"""
225
+
226
+ interface_theme = gr.themes.Soft(
227
+ primary_hue="blue",
228
+ secondary_hue="slate",
229
+ )
230
+
231
+ with gr.Blocks(theme=interface_theme, title="AI Agent Evaluation Platform") as interface:
232
+
233
+ # Header section
234
+ gr.Markdown("""
235
+ # πŸ€– Advanced AI Agent Evaluation Platform
236
+
237
+ **Welcome to the comprehensive AI agent testing environment!**
238
+
239
+ ### Getting Started:
240
+ 1. πŸ”‘ **Setup**: Clone this space and configure your Gemini API key in the environment
241
+ 2. πŸ” **Authentication**: Log in using your Hugging Face account credentials
242
+ 3. πŸš€ **Execute**: Run the complete evaluation suite and submit your results
243
+ 4. πŸ“ˆ **Review**: Analyze performance metrics and detailed response logs
244
+ """)
245
+
246
+ # Authentication section
247
+ with gr.Row():
248
+ with gr.Column(scale=1):
249
+ gr.Markdown("### πŸ” Authentication")
250
+ auth_button = gr.LoginButton(value="Connect to Hugging Face")
251
+
252
+ with gr.Column(scale=2):
253
+ gr.Markdown("### πŸ“‹ Evaluation Status")
254
+ status_display = gr.Textbox(
255
+ label="Current Status",
256
+ lines=6,
257
+ interactive=False,
258
+ placeholder="Ready to begin evaluation..."
259
+ )
260
+
261
+ # Control section
262
+ gr.Markdown("### 🎯 Evaluation Controls")
263
+ with gr.Row():
264
+ execute_button = gr.Button(
265
+ "πŸš€ Start Complete Evaluation",
266
+ variant="primary",
267
+ size="lg"
268
+ )
269
+
270
+ # Results section
271
+ gr.Markdown("### πŸ“Š Detailed Results")
272
+ results_dataframe = gr.DataFrame(
273
+ label="Evaluation Results",
274
+ wrap=True
275
+ )
276
+
277
+ # Footer
278
+ gr.Markdown("""
279
+ ---
280
+ **Note**: This platform uses Gemini 2.0 Flash Lite for AI processing.
281
+ Ensure your API key has sufficient quota for evaluation tasks.
282
+ """)
283
+
284
+ # Event handlers
285
+ execute_button.click(
286
+ fn=execute_evaluation_workflow,
287
+ inputs=[],
288
+ outputs=[status_display, results_dataframe]
289
+ )
290
+
291
+ return interface
292
+
293
+ # Application entry point
294
+ def main():
295
+ """Main application entry point"""
296
+ print("πŸš€ Initializing Advanced AI Agent Evaluation Platform...")
297
+ print(f"⏰ Startup Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
298
+
299
+ try:
300
+ interface = create_gradio_interface()
301
+ print("βœ… Interface created successfully")
302
+
303
+ interface.launch(
304
+ debug=True,
305
+ share=False,
306
+ show_error=True
307
+ )
308
+ except Exception as e:
309
+ logger.error(f"Application startup failed: {str(e)}")
310
+ sys.exit(1)
311
+
312
+ if __name__ == "__main__":
313
+ main()
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ smolagents
3
+ requests
4
+ pandas
5
+ litellm
6
+ duckduckgo-search
7
+ typing-extensions
8
+ python-dotenv
9
+ numpy
10
+ matplotlib
11
+ seaborn
12
+ plotly
13
+ openpyxl
14
+ xlsxwriter
15
+
16
+