Spaces:
Running
Running
| import pytest | |
| import pandas as pd | |
| import numpy as np | |
| import statsmodels.api as sm | |
| from auto_causal.methods.linear_regression.diagnostics import run_lr_diagnostics | |
| # Reuse the sample data fixture from estimator tests | |
| def sample_data(): | |
| """Generates simple synthetic data for testing LR.""" | |
| np.random.seed(42) | |
| n_samples = 100 | |
| treatment_effect = 2.0 | |
| X1 = np.random.normal(0, 1, n_samples) | |
| X2 = np.random.normal(5, 2, n_samples) | |
| treatment = np.random.binomial(1, 0.5, n_samples) | |
| error = np.random.normal(0, 1, n_samples) | |
| outcome = 1.0 + treatment_effect * treatment + 0.5 * X1 - 1.5 * X2 + error | |
| df = pd.DataFrame({ | |
| 'outcome': outcome, | |
| 'treatment': treatment, | |
| 'covariate1': X1, | |
| 'covariate2': X2 | |
| }) | |
| return df | |
| def test_run_lr_diagnostics_implementation(sample_data): | |
| """Tests the implemented diagnostics function with real results.""" | |
| # Run a regression to get a real results object | |
| df_analysis = sample_data.dropna() | |
| covariates = ['covariate1', 'covariate2'] | |
| X = df_analysis[['treatment'] + covariates] | |
| X = sm.add_constant(X) | |
| y = df_analysis['outcome'] | |
| model = sm.OLS(y, X) | |
| results = model.fit() | |
| # Run diagnostics | |
| diagnostics = run_lr_diagnostics(results, X) | |
| assert isinstance(diagnostics, dict) | |
| assert diagnostics["status"] == "Success" | |
| assert "details" in diagnostics | |
| details = diagnostics["details"] | |
| # Check for key diagnostic metrics | |
| assert "r_squared" in details | |
| assert "adj_r_squared" in details | |
| assert "f_statistic" in details | |
| assert "f_p_value" in details | |
| assert "n_observations" in details | |
| assert "degrees_of_freedom_resid" in details | |
| # Check normality test results | |
| assert "residuals_normality_jb_stat" in details | |
| assert "residuals_normality_jb_p_value" in details | |
| assert "residuals_skewness" in details | |
| assert "residuals_kurtosis" in details | |
| assert "residuals_normality_status" in details | |
| assert isinstance(details["residuals_normality_status"], str) | |
| # Check homoscedasticity test results | |
| assert "homoscedasticity_bp_lm_stat" in details | |
| assert "homoscedasticity_bp_lm_p_value" in details | |
| assert "homoscedasticity_bp_f_stat" in details | |
| assert "homoscedasticity_bp_f_p_value" in details | |
| assert "homoscedasticity_status" in details | |
| assert isinstance(details["homoscedasticity_status"], str) | |
| # Check placeholder statuses | |
| assert "linearity_check" in details | |
| assert "multicollinearity_check" in details | |
| assert details["linearity_check"] == "Requires visual inspection (e.g., residual vs fitted plot)" | |
| assert details["multicollinearity_check"] == "Not Implemented (Requires VIF)" | |
| # Check types (basic) | |
| assert isinstance(details["r_squared"], float) | |
| assert isinstance(details["f_p_value"], float) | |
| assert isinstance(details["n_observations"], int) | |
| def test_run_lr_diagnostics_failure(): | |
| """Test diagnostic failure mode (e.g., passing wrong object).""" | |
| # Pass a non-results object | |
| diagnostics = run_lr_diagnostics("not a results object", pd.DataFrame({'const': [1]})) | |
| assert diagnostics["status"] == "Failed" | |
| assert "error" in diagnostics | |