Spaces:

Beam2513
/

again

Sleeping

App Files Files Community

again / controllers /hypothesis_controller.py

Beam2513

Upload 127 files

798602c verified about 1 month ago

raw

history blame contribute delete

6.61 kB

	from __future__ import annotations

	from typing import Iterable, Tuple

	import numpy as np
	import pandas as pd

	from core.hypothesis_tests import (
	one_sample_ttest,
	two_sample_ttest,
	variance_test,
	one_way_anova,
	)

	ROUND = 4


	def _round_table(table: pd.DataFrame, decimals: int = ROUND) -> pd.DataFrame:
	"""Round only numeric columns of the result table."""
	if table is None:
	return table
	tbl = table.copy()
	num_cols = tbl.select_dtypes(include="number").columns
	if len(num_cols) > 0:
	tbl[num_cols] = tbl[num_cols].round(decimals)
	return tbl

	def _ensure_numeric_series(df: pd.DataFrame, column: str) -> np.ndarray:
	if df is None:
	raise ValueError("No dataset loaded.")
	if column not in df.columns:
	raise ValueError(f"Column '{column}' not found in the dataset.")

	series = df[column].dropna()
	if series.empty:
	raise ValueError("No valid data in the selected column.")
	return series.to_numpy()


	def _materialize_group(
	df: pd.DataFrame,
	numeric_col: str,
	cat_col: str \| None,
	cat_vals: Iterable[str],
	) -> np.ndarray:
	if cat_col is None:
	raise ValueError("No categorical column selected.")

	if cat_col not in df.columns:
	raise ValueError(f"Categorical column '{cat_col}' not found in the dataset.")

	# Cast selected values to the actual dtype of the column
	if cat_vals is None:
	values = []
	else:
	values = list(cat_vals)

	if not values:
	raise ValueError(f"No categories selected for column '{cat_col}'.")

	cat_series = pd.Series(values).astype(df[cat_col].dtype)
	mask = df[cat_col].isin(cat_series)
	series = df.loc[mask, numeric_col].dropna()

	if series.empty:
	raise ValueError("One or more groups are empty after filtering.")
	return series.to_numpy()


	def run_hypothesis_testing(
	*,
	df: pd.DataFrame \| None,
	numeric_col: str,
	hypo_test: str,
	mu0_text: str,
	alternative: str,
	include_graph: bool,
	bootstrap_samples: int,
	cat_col1: str \| None,
	cat_vals1: list[str],
	name_group1: str,
	cat_col2: str \| None,
	cat_vals2: list[str],
	name_group2: str,
	cat_col3: str \| None,
	cat_vals3: list[str],
	plot_type: str,
	correction: bool,
	test_type: str,
	) -> Tuple[pd.DataFrame, object \| None]:
	"""
	High-level dispatcher used by the Hypothesis Testing tab.

	Returns:
	(result_table, figure_or_none)
	"""
	if df is None:
	raise ValueError("No dataset loaded.")

	# Common numeric data check
	_ = _ensure_numeric_series(df, numeric_col)

	# ------------------------------------------------------------
	# One-sample t-test
	# ------------------------------------------------------------
	if hypo_test == "One sample Student's t-test":
	if not mu0_text.strip():
	raise ValueError("μ₀ must be specified for the one-sample t-test.")
	try:
	mu0 = float(mu0_text)
	except Exception:
	raise ValueError("μ₀ must be a numeric value.")

	sample = df[numeric_col].dropna().to_numpy()

	table, fig = one_sample_ttest(
	sample=sample,
	mu0=mu0,
	alternative=alternative,
	numeric_col=numeric_col,
	bootstrap_samples=bootstrap_samples,
	include_graph=include_graph,
	)
	table = _round_table(table)
	return table, fig

	# ------------------------------------------------------------
	# Two-sample t-test
	# ------------------------------------------------------------
	if hypo_test == "Two samples Student's t-test":
	group1 = _materialize_group(df, numeric_col, cat_col1, cat_vals1)
	group2 = _materialize_group(df, numeric_col, cat_col2, cat_vals2)

	# If names are empty, fall back to defaults
	name1 = name_group1 or "Group 1"
	name2 = name_group2 or "Group 2"

	table, fig = two_sample_ttest(
	group1=group1,
	group2=group2,
	numeric_col=numeric_col,
	name_group1=name1,
	name_group2=name2,
	alternative=alternative,
	correction=correction,
	plot_type=plot_type,
	bootstrap_samples=bootstrap_samples,
	include_graph=include_graph,
	)
	table = _round_table(table)
	return table, fig

	# ------------------------------------------------------------
	# Equal variance between two groups
	# ------------------------------------------------------------
	if hypo_test == "Equal variance between two groups":
	group1 = _materialize_group(df, numeric_col, cat_col1, cat_vals1)
	group2 = _materialize_group(df, numeric_col, cat_col2, cat_vals2)

	name1 = name_group1 or "Group 1"
	name2 = name_group2 or "Group 2"

	table, fig = variance_test(
	group1=group1,
	group2=group2,
	name_group1=name1,
	name_group2=name2,
	test_type=test_type,
	include_graph=include_graph,
	bootstrap_samples=bootstrap_samples,
	)
	table = _round_table(table)
	return table, fig

	# ------------------------------------------------------------
	# One-way ANOVA
	# ------------------------------------------------------------
	if hypo_test == "One-way ANOVA":
	if cat_col3 is None:
	raise ValueError("A categorical column must be selected for ANOVA.")

	if cat_col3 not in df.columns:
	raise ValueError(
	f"Categorical column '{cat_col3}' not found in the dataset."
	)

	if not cat_vals3:
	raise ValueError("At least one category must be selected for ANOVA.")

	cat_series = pd.Series(cat_vals3).astype(df[cat_col3].dtype)
	data_group = df[df[cat_col3].isin(cat_series)][[numeric_col, cat_col3]].dropna()

	table, fig = one_way_anova(
	data_group=data_group,
	numeric_col=numeric_col,
	cat_col=cat_col3,
	)
	table = _round_table(table)
	return table, fig

	# ------------------------------------------------------------
	# Fallback
	# ------------------------------------------------------------
	raise ValueError(f"Unknown hypothesis test: {hypo_test}")