Spaces:
Running
Running
| """ | |
| Basic descriptive statistics for Difference in Means. | |
| """ | |
| from typing import Dict, Any | |
| import pandas as pd | |
| import numpy as np | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| def run_dim_diagnostics(df: pd.DataFrame, treatment: str, outcome: str) -> Dict[str, Any]: | |
| """ | |
| Calculates basic descriptive statistics for treatment and control groups. | |
| Args: | |
| df: Input DataFrame (should already be filtered for NaNs in treatment/outcome). | |
| treatment: Name of the binary treatment variable column. | |
| outcome: Name of the outcome variable column. | |
| Returns: | |
| Dictionary containing group means, standard deviations, and counts. | |
| """ | |
| details = {} | |
| try: | |
| grouped = df.groupby(treatment)[outcome] | |
| stats = grouped.agg(['mean', 'std', 'count']) | |
| # Ensure both groups (0 and 1) are present if possible | |
| control_stats = stats.loc[0].to_dict() if 0 in stats.index else {'mean': np.nan, 'std': np.nan, 'count': 0} | |
| treated_stats = stats.loc[1].to_dict() if 1 in stats.index else {'mean': np.nan, 'std': np.nan, 'count': 0} | |
| details['control_group_stats'] = control_stats | |
| details['treated_group_stats'] = treated_stats | |
| if control_stats['count'] == 0 or treated_stats['count'] == 0: | |
| logger.warning("One or both treatment groups have zero observations.") | |
| return {"status": "Warning - Empty Group(s)", "details": details} | |
| # Simple check for variance difference (Levene's test could be added) | |
| control_std = control_stats.get('std', 0) | |
| treated_std = treated_stats.get('std', 0) | |
| if control_std > 0 and treated_std > 0: | |
| ratio = (control_std**2) / (treated_std**2) | |
| details['variance_ratio_control_div_treated'] = ratio | |
| if ratio > 4 or ratio < 0.25: # Rule of thumb | |
| details['variance_homogeneity_status'] = "Potentially Unequal (ratio > 4 or < 0.25)" | |
| else: | |
| details['variance_homogeneity_status'] = "Likely Similar" | |
| else: | |
| details['variance_homogeneity_status'] = "Could not calculate (zero variance in a group)" | |
| return {"status": "Success", "details": details} | |
| except KeyError as ke: | |
| logger.error(f"KeyError during diagnostics: {ke}. Treatment levels might not be 0/1.") | |
| return {"status": "Failed", "error": f"Treatment levels might not be 0/1: {ke}", "details": details} | |
| except Exception as e: | |
| logger.error(f"Error running Difference in Means diagnostics: {e}") | |
| return {"status": "Failed", "error": str(e), "details": details} | |