""" Test script for the OpenEnv Data Cleaning Grader """ import pandas as pd import numpy as np from grader import Grader from tasks import get_task_config def create_test_dataset(task_id: str) -> pd.DataFrame: """Create a test dataset for the given task""" config = get_task_config(task_id) if task_id == "easy_001": # Create dataset with some nulls and duplicates data = { "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Henry", "Ivy", "Jack"], "age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70], "email": ["a@example.com", "b@example.com", "c@example.com", "d@example.com", "e@example.com", "f@example.com", "g@example.com", "h@example.com", "i@example.com", "j@example.com"], "salary": [50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000] } df = pd.DataFrame(data) # Add some nulls df.loc[2, 'age'] = np.nan df.loc[5, 'email'] = np.nan # Add a duplicate df = pd.concat([df, df.iloc[[0]]]).reset_index(drop=True) return df elif task_id == "medium_001": # Create dataset with various issues data = { "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Henry", "Ivy", "Jack"], "age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70], "email": ["a@example.com", "b@example.com", "c@example.com", "d@example.com", "e@example.com", "f@example.com", "g@example.com", "h@example.com", "i@example.com", "j@example.com"], "salary": [50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000], "department": ["HR", "IT", "Finance", "IT", "HR", "Finance", "IT", "HR", "Finance", "IT"] } df = pd.DataFrame(data) # Add some nulls df.loc[3, 'age'] = np.nan df.loc[7, 'email'] = np.nan # Add invalid emails df.loc[1, 'email'] = "invalid-email" df.loc[4, 'email'] = "another-invalid" # Add outliers df.loc[9, 'salary'] = 500000 return df elif task_id == "hard_001": # Create dataset with advanced issues data = { "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Henry", "Ivy", "Jack"], "age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70], "email": ["a@example.com", "b@example.com", "c@example.com", "d@example.com", "e@example.com", "f@example.com", "g@example.com", "h@example.com", "i@example.com", "j@example.com"], "salary": [50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000], "department": ["HR", "IT", "Finance", "IT", "HR", "Finance", "IT", "HR", "Finance", "IT"], "join_date": pd.to_datetime(["2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01"]), "score": [85, 90, 78, 92, 88, 76, 95, 89, 84, 91] } df = pd.DataFrame(data) # Add various issues df.loc[2, 'age'] = np.nan df.loc[5, 'email'] = np.nan df.loc[8, 'salary'] = np.nan # Add invalid emails df.loc[1, 'email'] = "invalid-email" # Add outliers df.loc[9, 'salary'] = 500000 # Add type issues df.loc[3, 'id'] = "three" df.loc[6, 'age'] = "fifty-five" return df elif task_id == "employee_demo": # Create employee dataset with various issues data = { "Education": ["Bachelor", "Master", "PhD", "Bachelor", "Master", "PhD", "Bachelor", "Master", "PhD", "Bachelor"], "JoiningYear": [2018, 2019, 2020, 2018, 2019, 2020, 2018, 2019, 2020, 2018], "City": ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix", "Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose"], "PaymentTier": ["Gold", "Silver", "Bronze", "Gold", "Silver", "Bronze", "Gold", "Silver", "Bronze", "Gold"], "Age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70], "Gender": ["Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female"], "EverBenched": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1], "ExperienceInCurrentDomain": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], "LeaveOrNot": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1] } df = pd.DataFrame(data) # Add some nulls df.loc[2, 'Age'] = np.nan df.loc[5, 'Gender'] = np.nan # Add duplicates df = pd.concat([df, df.iloc[[0]]]).reset_index(drop=True) # Add formatting issues df.loc[1, 'Education'] = 'bachelor' df.loc[4, 'Gender'] = 'female' df.loc[7, 'City'] = 'los angeles' # Add outliers df.loc[9, 'ExperienceInCurrentDomain'] = 50 return df else: raise ValueError(f"Unknown task: {task_id}") def test_grader(): """Test the grader with all tasks""" grader = Grader() tasks = ["easy_001", "medium_001", "hard_001", "employee_demo"] for task_id in tasks: print(f"\n{'='*60}") print(f"Testing task: {task_id}") print(f"{'='*60}") # Create test dataset original_dataset = create_test_dataset(task_id) current_dataset = original_dataset.copy() # Simulate some cleaning actions action_history = [] # Grade the solution grader.setup(task_id, original_dataset, current_dataset, action_history) result = grader.grade() # Print results print(f"\nTask: {task_id}") print(f"Final Score: {result.final_score}") print(f"\nScore Breakdown:") for criterion, score in result.breakdown.items(): print(f" {criterion}: {score}") print(f"\nFeedback:") print(result.feedback) # Verify score is strictly between 0 and 1