Spaces:
Sleeping
Sleeping
| """ | |
| Test script for the OpenEnv Data Cleaning Grader | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from grader import Grader | |
| from tasks import get_task_config | |
| def create_test_dataset(task_id: str) -> pd.DataFrame: | |
| """Create a test dataset for the given task""" | |
| config = get_task_config(task_id) | |
| if task_id == "easy_001": | |
| # Create dataset with some nulls and duplicates | |
| data = { | |
| "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], | |
| "name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Henry", "Ivy", "Jack"], | |
| "age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70], | |
| "email": ["a@example.com", "b@example.com", "c@example.com", "d@example.com", "e@example.com", | |
| "f@example.com", "g@example.com", "h@example.com", "i@example.com", "j@example.com"], | |
| "salary": [50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000] | |
| } | |
| df = pd.DataFrame(data) | |
| # Add some nulls | |
| df.loc[2, 'age'] = np.nan | |
| df.loc[5, 'email'] = np.nan | |
| # Add a duplicate | |
| df = pd.concat([df, df.iloc[[0]]]).reset_index(drop=True) | |
| return df | |
| elif task_id == "medium_001": | |
| # Create dataset with various issues | |
| data = { | |
| "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], | |
| "name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Henry", "Ivy", "Jack"], | |
| "age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70], | |
| "email": ["a@example.com", "b@example.com", "c@example.com", "d@example.com", "e@example.com", | |
| "f@example.com", "g@example.com", "h@example.com", "i@example.com", "j@example.com"], | |
| "salary": [50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000], | |
| "department": ["HR", "IT", "Finance", "IT", "HR", "Finance", "IT", "HR", "Finance", "IT"] | |
| } | |
| df = pd.DataFrame(data) | |
| # Add some nulls | |
| df.loc[3, 'age'] = np.nan | |
| df.loc[7, 'email'] = np.nan | |
| # Add invalid emails | |
| df.loc[1, 'email'] = "invalid-email" | |
| df.loc[4, 'email'] = "another-invalid" | |
| # Add outliers | |
| df.loc[9, 'salary'] = 500000 | |
| return df | |
| elif task_id == "hard_001": | |
| # Create dataset with advanced issues | |
| data = { | |
| "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], | |
| "name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Henry", "Ivy", "Jack"], | |
| "age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70], | |
| "email": ["a@example.com", "b@example.com", "c@example.com", "d@example.com", "e@example.com", | |
| "f@example.com", "g@example.com", "h@example.com", "i@example.com", "j@example.com"], | |
| "salary": [50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000], | |
| "department": ["HR", "IT", "Finance", "IT", "HR", "Finance", "IT", "HR", "Finance", "IT"], | |
| "join_date": pd.to_datetime(["2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", | |
| "2020-06-01", "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01"]), | |
| "score": [85, 90, 78, 92, 88, 76, 95, 89, 84, 91] | |
| } | |
| df = pd.DataFrame(data) | |
| # Add various issues | |
| df.loc[2, 'age'] = np.nan | |
| df.loc[5, 'email'] = np.nan | |
| df.loc[8, 'salary'] = np.nan | |
| # Add invalid emails | |
| df.loc[1, 'email'] = "invalid-email" | |
| # Add outliers | |
| df.loc[9, 'salary'] = 500000 | |
| # Add type issues | |
| df.loc[3, 'id'] = "three" | |
| df.loc[6, 'age'] = "fifty-five" | |
| return df | |
| elif task_id == "employee_demo": | |
| # Create employee dataset with various issues | |
| data = { | |
| "Education": ["Bachelor", "Master", "PhD", "Bachelor", "Master", "PhD", "Bachelor", "Master", "PhD", "Bachelor"], | |
| "JoiningYear": [2018, 2019, 2020, 2018, 2019, 2020, 2018, 2019, 2020, 2018], | |
| "City": ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix", "Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose"], | |
| "PaymentTier": ["Gold", "Silver", "Bronze", "Gold", "Silver", "Bronze", "Gold", "Silver", "Bronze", "Gold"], | |
| "Age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70], | |
| "Gender": ["Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female"], | |
| "EverBenched": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1], | |
| "ExperienceInCurrentDomain": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], | |
| "LeaveOrNot": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1] | |
| } | |
| df = pd.DataFrame(data) | |
| # Add some nulls | |
| df.loc[2, 'Age'] = np.nan | |
| df.loc[5, 'Gender'] = np.nan | |
| # Add duplicates | |
| df = pd.concat([df, df.iloc[[0]]]).reset_index(drop=True) | |
| # Add formatting issues | |
| df.loc[1, 'Education'] = 'bachelor' | |
| df.loc[4, 'Gender'] = 'female' | |
| df.loc[7, 'City'] = 'los angeles' | |
| # Add outliers | |
| df.loc[9, 'ExperienceInCurrentDomain'] = 50 | |
| return df | |
| else: | |
| raise ValueError(f"Unknown task: {task_id}") | |
| def test_grader(): | |
| """Test the grader with all tasks""" | |
| grader = Grader() | |
| tasks = ["easy_001", "medium_001", "hard_001", "employee_demo"] | |
| for task_id in tasks: | |
| print(f"\n{'='*60}") | |
| print(f"Testing task: {task_id}") | |
| print(f"{'='*60}") | |
| # Create test dataset | |
| original_dataset = create_test_dataset(task_id) | |
| current_dataset = original_dataset.copy() | |
| # Simulate some cleaning actions | |
| action_history = [] | |
| # Grade the solution | |
| grader.setup(task_id, original_dataset, current_dataset, action_history) | |
| result = grader.grade() | |
| # Print results | |
| print(f"\nTask: {task_id}") | |
| print(f"Final Score: {result.final_score}") | |
| print(f"\nScore Breakdown:") | |
| for criterion, score in result.breakdown.items(): | |
| print(f" {criterion}: {score}") | |
| print(f"\nFeedback:") | |
| print(result.feedback) | |
| # Verify score is strictly between 0 and 1 | |