"""Tests for the grading system.""" from __future__ import annotations import inspect import numpy as np import pandas as pd import pytest from data_cleaning_env.grader import ( compute_quality_score, grade_episode, precompute_baselines, ) class TestComputeQualityScore: def test_perfect_score(self, clean_df: pd.DataFrame) -> None: score = compute_quality_score(clean_df, clean_df) assert score["composite"] == 1.0 def test_empty_df_scores_zero(self, clean_df: pd.DataFrame) -> None: empty = pd.DataFrame() score = compute_quality_score(empty, clean_df) assert score["composite"] == 0.0 def test_deterministic( self, clean_df: pd.DataFrame, dirty_df: pd.DataFrame ) -> None: s1 = compute_quality_score(dirty_df, clean_df) s2 = compute_quality_score(dirty_df, clean_df) assert s1 == s2, "Score is not deterministic" def test_dirty_scores_less_than_clean( self, clean_df: pd.DataFrame, dirty_df: pd.DataFrame ) -> None: dirty_score = compute_quality_score(dirty_df, clean_df) clean_score = compute_quality_score(clean_df, clean_df) assert dirty_score["composite"] < clean_score["composite"] def test_all_components_present(self, clean_df: pd.DataFrame) -> None: score = compute_quality_score(clean_df, clean_df) for key in ("completeness", "consistency", "accuracy", "format", "composite"): assert key in score, f"Missing key: {key}" def test_all_nan_completeness_low(self, clean_df: pd.DataFrame) -> None: # Create a df where every column is nullable (float/object) then set all NaN all_nan = clean_df.astype(object).copy() all_nan[:] = None score = compute_quality_score(all_nan, clean_df) assert score["completeness"] < 0.1 def test_row_drop_penalizes_accuracy(self, clean_df: pd.DataFrame) -> None: """Dropping rows should reduce accuracy since the grader uses n_clean as the denominator.""" partial = clean_df.iloc[:2].copy() score = compute_quality_score(partial, clean_df) perfect = compute_quality_score(clean_df, clean_df) assert score["accuracy"] < perfect["accuracy"] def test_no_sklearn_in_grader(self) -> None: src = inspect.getsource(compute_quality_score) assert "sklearn" not in src class TestGradeEpisode: def test_perfect_episode_scores_one(self, clean_df: pd.DataFrame) -> None: precompute_baselines("_test", clean_df, clean_df.copy()) # When dirty baseline == clean, grade_episode returns 1.0 for perfect data result = grade_episode(clean_df, "_test", clean_df) assert result["score"] == 1.0 assert "breakdown" in result def test_per_episode_baseline_overrides_cache( self, clean_df: pd.DataFrame, dirty_df: pd.DataFrame ) -> None: precompute_baselines("_test2", clean_df, dirty_df) dirty_quality = compute_quality_score(dirty_df, clean_df)["composite"] # Pass a custom dirty_baseline that's different from the cached one result = grade_episode( clean_df, "_test2", clean_df, dirty_baseline=dirty_quality ) assert result["score"] == 1.0 def test_no_improvement_scores_zero( self, clean_df: pd.DataFrame, dirty_df: pd.DataFrame ) -> None: dirty_quality = compute_quality_score(dirty_df, clean_df)["composite"] result = grade_episode(dirty_df, "x", clean_df, dirty_baseline=dirty_quality) assert result["score"] == 0.0