Spaces:

abrahamcbe
/

myspace-ooty-analytics

Sleeping

File size: 4,008 Bytes

04b129a

"""
Statistical analysis and insights generation
"""

import pandas as pd
import numpy as np
from scipy import stats
from typing import Dict, Tuple, Union


def calculate_descriptive_stats(df: pd.DataFrame, column: str) -> Dict:
    """
    Calculate descriptive statistics for a column
    
    Args:
        df: Input DataFrame
        column: Column name
        
    Returns:
        Dictionary with statistics
    """
    stats_dict = {
        "count": df[column].count(),
        "mean": df[column].mean(),
        "median": df[column].median(),
        "std": df[column].std(),
        "min": df[column].min(),
        "25%": df[column].quantile(0.25),
        "75%": df[column].quantile(0.75),
        "max": df[column].max(),
        "skewness": df[column].skew(),
        "kurtosis": df[column].kurtosis(),
    }
    return stats_dict


def correlation_analysis(df: pd.DataFrame, method: str = "pearson") -> pd.DataFrame:
    """
    Perform correlation analysis
    
    Args:
        df: Input DataFrame with numeric columns
        method: 'pearson', 'spearman', or 'kendall'
        
    Returns:
        Correlation matrix
    """
    numeric_df = df.select_dtypes(include=[np.number])
    corr_matrix = numeric_df.corr(method=method)
    return corr_matrix


def hypothesis_testing(group1: pd.Series, group2: pd.Series, 
                       test_type: str = "ttest") -> Dict:
    """
    Perform hypothesis testing between two groups
    
    Args:
        group1: First group data
        group2: Second group data
        test_type: 't-test', 'mannwhitneyu', or 'chi2'
        
    Returns:
        Dictionary with test results
    """
    results = {}
    
    if test_type == "ttest":
        statistic, p_value = stats.ttest_ind(group1.dropna(), group2.dropna())
        results = {
            "test": "Independent t-test",
            "statistic": statistic,
            "p_value": p_value,
            "significant": p_value < 0.05
        }
    
    elif test_type == "mannwhitneyu":
        statistic, p_value = stats.mannwhitneyu(group1.dropna(), group2.dropna())
        results = {
            "test": "Mann-Whitney U Test",
            "statistic": statistic,
            "p_value": p_value,
            "significant": p_value < 0.05
        }
    
    return results


def anova_test(groups: list) -> Dict:
    """
    Perform ANOVA test
    
    Args:
        groups: List of group data Series
        
    Returns:
        Dictionary with ANOVA results
    """
    clean_groups = [g.dropna() for g in groups]
    f_stat, p_value = stats.f_oneway(*clean_groups)
    
    return {
        "test": "ANOVA",
        "f_statistic": f_stat,
        "p_value": p_value,
        "significant": p_value < 0.05
    }


def chi_square_test(contingency_table: pd.DataFrame) -> Dict:
    """
    Perform Chi-square test for independence
    
    Args:
        contingency_table: Contingency table (DataFrame)
        
    Returns:
        Dictionary with test results
    """
    chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)
    
    return {
        "test": "Chi-square",
        "statistic": chi2,
        "p_value": p_value,
        "degrees_of_freedom": dof,
        "significant": p_value < 0.05
    }


def trend_analysis(df: pd.DataFrame, time_col: str, value_col: str) -> Dict:
    """
    Perform simple trend analysis
    
    Args:
        df: Input DataFrame
        time_col: Column name for time/date
        value_col: Column name for values
        
    Returns:
        Dictionary with trend metrics
    """
    df_sorted = df.sort_values(time_col).copy()
    x = np.arange(len(df_sorted))
    y = df_sorted[value_col].values
    
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    
    return {
        "slope": slope,
        "intercept": intercept,
        "r_squared": r_value**2,
        "p_value": p_value,
        "trend": "upward" if slope > 0 else "downward",
        "significant": p_value < 0.05
    }