Spaces:

abrahamcbe
/

myspace-ooty-analytics

Sleeping

myspace-ooty-analytics / src /analysis.py

abraham9486937737

Deploy MySpace Ooty Analytics to Hugging Face - with KPI styling updates

04b129a 20 days ago

4.01 kB

	"""
	Statistical analysis and insights generation
	"""

	import pandas as pd
	import numpy as np
	from scipy import stats
	from typing import Dict, Tuple, Union


	def calculate_descriptive_stats(df: pd.DataFrame, column: str) -> Dict:
	"""
	Calculate descriptive statistics for a column

	Args:
	df: Input DataFrame
	column: Column name

	Returns:
	Dictionary with statistics
	"""
	stats_dict = {
	"count": df[column].count(),
	"mean": df[column].mean(),
	"median": df[column].median(),
	"std": df[column].std(),
	"min": df[column].min(),
	"25%": df[column].quantile(0.25),
	"75%": df[column].quantile(0.75),
	"max": df[column].max(),
	"skewness": df[column].skew(),
	"kurtosis": df[column].kurtosis(),
	}
	return stats_dict


	def correlation_analysis(df: pd.DataFrame, method: str = "pearson") -> pd.DataFrame:
	"""
	Perform correlation analysis

	Args:
	df: Input DataFrame with numeric columns
	method: 'pearson', 'spearman', or 'kendall'

	Returns:
	Correlation matrix
	"""
	numeric_df = df.select_dtypes(include=[np.number])
	corr_matrix = numeric_df.corr(method=method)
	return corr_matrix


	def hypothesis_testing(group1: pd.Series, group2: pd.Series,
	test_type: str = "ttest") -> Dict:
	"""
	Perform hypothesis testing between two groups

	Args:
	group1: First group data
	group2: Second group data
	test_type: 't-test', 'mannwhitneyu', or 'chi2'

	Returns:
	Dictionary with test results
	"""
	results = {}

	if test_type == "ttest":
	statistic, p_value = stats.ttest_ind(group1.dropna(), group2.dropna())
	results = {
	"test": "Independent t-test",
	"statistic": statistic,
	"p_value": p_value,
	"significant": p_value < 0.05
	}

	elif test_type == "mannwhitneyu":
	statistic, p_value = stats.mannwhitneyu(group1.dropna(), group2.dropna())
	results = {
	"test": "Mann-Whitney U Test",
	"statistic": statistic,
	"p_value": p_value,
	"significant": p_value < 0.05
	}

	return results


	def anova_test(groups: list) -> Dict:
	"""
	Perform ANOVA test

	Args:
	groups: List of group data Series

	Returns:
	Dictionary with ANOVA results
	"""
	clean_groups = [g.dropna() for g in groups]
	f_stat, p_value = stats.f_oneway(*clean_groups)

	return {
	"test": "ANOVA",
	"f_statistic": f_stat,
	"p_value": p_value,
	"significant": p_value < 0.05
	}


	def chi_square_test(contingency_table: pd.DataFrame) -> Dict:
	"""
	Perform Chi-square test for independence

	Args:
	contingency_table: Contingency table (DataFrame)

	Returns:
	Dictionary with test results
	"""
	chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)

	return {
	"test": "Chi-square",
	"statistic": chi2,
	"p_value": p_value,
	"degrees_of_freedom": dof,
	"significant": p_value < 0.05
	}


	def trend_analysis(df: pd.DataFrame, time_col: str, value_col: str) -> Dict:
	"""
	Perform simple trend analysis

	Args:
	df: Input DataFrame
	time_col: Column name for time/date
	value_col: Column name for values

	Returns:
	Dictionary with trend metrics
	"""
	df_sorted = df.sort_values(time_col).copy()
	x = np.arange(len(df_sorted))
	y = df_sorted[value_col].values

	slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

	return {
	"slope": slope,
	"intercept": intercept,
	"r_squared": r_value**2,
	"p_value": p_value,
	"trend": "upward" if slope > 0 else "downward",
	"significant": p_value < 0.05
	}