env / test_grader.py
sairaj2's picture
Upload folder using huggingface_hub
9b57afb verified
"""
Test script for the OpenEnv Data Cleaning Grader
"""
import pandas as pd
import numpy as np
from grader import Grader
from tasks import get_task_config
def create_test_dataset(task_id: str) -> pd.DataFrame:
"""Create a test dataset for the given task"""
config = get_task_config(task_id)
if task_id == "easy_001":
# Create dataset with some nulls and duplicates
data = {
"id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
"name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Henry", "Ivy", "Jack"],
"age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70],
"email": ["a@example.com", "b@example.com", "c@example.com", "d@example.com", "e@example.com",
"f@example.com", "g@example.com", "h@example.com", "i@example.com", "j@example.com"],
"salary": [50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000]
}
df = pd.DataFrame(data)
# Add some nulls
df.loc[2, 'age'] = np.nan
df.loc[5, 'email'] = np.nan
# Add a duplicate
df = pd.concat([df, df.iloc[[0]]]).reset_index(drop=True)
return df
elif task_id == "medium_001":
# Create dataset with various issues
data = {
"id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
"name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Henry", "Ivy", "Jack"],
"age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70],
"email": ["a@example.com", "b@example.com", "c@example.com", "d@example.com", "e@example.com",
"f@example.com", "g@example.com", "h@example.com", "i@example.com", "j@example.com"],
"salary": [50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000],
"department": ["HR", "IT", "Finance", "IT", "HR", "Finance", "IT", "HR", "Finance", "IT"]
}
df = pd.DataFrame(data)
# Add some nulls
df.loc[3, 'age'] = np.nan
df.loc[7, 'email'] = np.nan
# Add invalid emails
df.loc[1, 'email'] = "invalid-email"
df.loc[4, 'email'] = "another-invalid"
# Add outliers
df.loc[9, 'salary'] = 500000
return df
elif task_id == "hard_001":
# Create dataset with advanced issues
data = {
"id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
"name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Henry", "Ivy", "Jack"],
"age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70],
"email": ["a@example.com", "b@example.com", "c@example.com", "d@example.com", "e@example.com",
"f@example.com", "g@example.com", "h@example.com", "i@example.com", "j@example.com"],
"salary": [50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000],
"department": ["HR", "IT", "Finance", "IT", "HR", "Finance", "IT", "HR", "Finance", "IT"],
"join_date": pd.to_datetime(["2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01",
"2020-06-01", "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01"]),
"score": [85, 90, 78, 92, 88, 76, 95, 89, 84, 91]
}
df = pd.DataFrame(data)
# Add various issues
df.loc[2, 'age'] = np.nan
df.loc[5, 'email'] = np.nan
df.loc[8, 'salary'] = np.nan
# Add invalid emails
df.loc[1, 'email'] = "invalid-email"
# Add outliers
df.loc[9, 'salary'] = 500000
# Add type issues
df.loc[3, 'id'] = "three"
df.loc[6, 'age'] = "fifty-five"
return df
elif task_id == "employee_demo":
# Create employee dataset with various issues
data = {
"Education": ["Bachelor", "Master", "PhD", "Bachelor", "Master", "PhD", "Bachelor", "Master", "PhD", "Bachelor"],
"JoiningYear": [2018, 2019, 2020, 2018, 2019, 2020, 2018, 2019, 2020, 2018],
"City": ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix", "Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose"],
"PaymentTier": ["Gold", "Silver", "Bronze", "Gold", "Silver", "Bronze", "Gold", "Silver", "Bronze", "Gold"],
"Age": [25, 30, 35, 40, 45, 50, 55, 60, 65, 70],
"Gender": ["Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female"],
"EverBenched": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
"ExperienceInCurrentDomain": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
"LeaveOrNot": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
}
df = pd.DataFrame(data)
# Add some nulls
df.loc[2, 'Age'] = np.nan
df.loc[5, 'Gender'] = np.nan
# Add duplicates
df = pd.concat([df, df.iloc[[0]]]).reset_index(drop=True)
# Add formatting issues
df.loc[1, 'Education'] = 'bachelor'
df.loc[4, 'Gender'] = 'female'
df.loc[7, 'City'] = 'los angeles'
# Add outliers
df.loc[9, 'ExperienceInCurrentDomain'] = 50
return df
else:
raise ValueError(f"Unknown task: {task_id}")
def test_grader():
"""Test the grader with all tasks"""
grader = Grader()
tasks = ["easy_001", "medium_001", "hard_001", "employee_demo"]
for task_id in tasks:
print(f"\n{'='*60}")
print(f"Testing task: {task_id}")
print(f"{'='*60}")
# Create test dataset
original_dataset = create_test_dataset(task_id)
current_dataset = original_dataset.copy()
# Simulate some cleaning actions
action_history = []
# Grade the solution
grader.setup(task_id, original_dataset, current_dataset, action_history)
result = grader.grade()
# Print results
print(f"\nTask: {task_id}")
print(f"Final Score: {result.final_score}")
print(f"\nScore Breakdown:")
for criterion, score in result.breakdown.items():
print(f" {criterion}: {score}")
print(f"\nFeedback:")
print(result.feedback)
# Verify score is strictly between 0 and 1