again / ui /tabs /hypothesis_testing_tab.py
Beam2513's picture
Upload 127 files
798602c verified
import pandas as pd
import gradio as gr
from controllers.hypothesis_controller import run_hypothesis_testing
from controllers.utils.downloads import dataframe_to_csv, figure_to_png
def build(state):
gr.Markdown("## πŸ§ͺ Hypothesis Testing")
with gr.Row(elem_id="row_centered"):
refresh_columns_button = gr.Button("πŸ”„ Refresh Numeric Columns")
numeric_column_dropdown = gr.Dropdown(
label="Select Numeric Column",
choices=[],
interactive=True,
elem_classes=["data_related"],
elem_id="custom_dropdown",
)
hypo_test_dropdown = gr.Dropdown(
label="Type of Hypothesis",
choices=[
"One sample Student's t-test",
"Equal variance between two groups",
"Two samples Student's t-test",
"One-way ANOVA",
],
value="One sample Student's t-test",
interactive=True,
)
mu0_input = gr.Textbox(
label="ΞΌβ‚€ (Null Hypothesis Mean)",
value="",
visible=True,
)
alternative_radio = gr.Radio(
label="Alternative hypothesis",
choices=["two-sided", "greater", "less"],
value="two-sided",
interactive=True,
visible=True,
)
ttest_correction_check = gr.Checkbox(
label="Correct for unequal variances (Welch's t-test)",
value=True,
visible=False,
)
equal_var_dropdown = gr.Dropdown(
label="Select Variance Test",
choices=["Bartlett", "Levene"],
value="Levene",
visible=False,
)
with gr.Row() as ttest_graph_option:
ttest_graph_check = gr.Checkbox(
label="Include graph",
value=True,
interactive=True,
)
ttest_plot_type = gr.Dropdown(
label="Select Graph",
choices=["Sample Histogram", "Mean Density"],
value="Mean Density",
visible=False,
)
ttest_boots_sample = gr.Slider(
minimum=100,
maximum=5000,
value=1000,
step=100,
label="Bootstrap Samples",
)
# ------------------------------------------------------------
# Categorical selection
# ------------------------------------------------------------
with gr.Group(visible=False) as category_group:
refresh_categorical_button = gr.Button(
"πŸ”„ Refresh Categorical Columns",
elem_id="run_button",
)
with gr.Row() as group1:
cat_column_dropdown_1 = gr.Dropdown(
label="Categorical Column 1",
choices=[],
elem_classes=["data_related"],
elem_id="custom_dropdown",
)
cat_values_dropdown_1 = gr.Dropdown(
label="Categories for Column 1",
multiselect=True,
choices=[],
interactive=True,
elem_classes=["data_related"],
elem_id="custom_dropdown",
)
name_group1 = gr.Textbox(
label="Name of Group 1",
value="Group 1",
visible=True,
interactive=True,
)
with gr.Row() as group2:
cat_column_dropdown_2 = gr.Dropdown(
label="Categorical Column 2",
choices=[],
elem_classes=["data_related"],
elem_id="custom_dropdown",
)
cat_values_dropdown_2 = gr.Dropdown(
label="Categories for Column 2",
multiselect=True,
choices=[],
interactive=True,
elem_classes=["data_related"],
elem_id="custom_dropdown",
)
name_group2 = gr.Textbox(
label="Name of Group 2",
value="Group 2",
visible=True,
interactive=True,
)
with gr.Row() as group_anova:
cat_column_dropdown_3 = gr.Dropdown(
label="Categorical Column",
choices=[],
elem_classes=["data_related"],
elem_id="custom_dropdown",
)
cat_values_dropdown_3 = gr.Dropdown(
label="Categories for Column",
multiselect=True,
choices=[],
interactive=True,
elem_classes=["data_related"],
elem_id="custom_dropdown",
)
with gr.Column(elem_id="column_centered"):
run_hypo_test_button = gr.Button(
value="πŸš€ Run Hypothesis Testing",
elem_id="run_button",
)
# ============================================================
# Results + Downloads
# ============================================================
with gr.Row(visible=False) as table_download_row:
table_filename = gr.Textbox(
label="Filename (without extension)",
placeholder="e.g. hypothesis_results",
)
table_download_button = gr.Button("πŸ’Ύ Download Table (CSV)")
table_file = gr.File(
label="Download link will appear here",
interactive=False,
)
output_table = gr.Dataframe(
interactive=False,
visible=False,
)
with gr.Row(visible=False) as figure_download_row:
figure_filename = gr.Textbox(
label="Filename (without extension)",
placeholder="e.g. hypothesis_figure",
)
figure_download_button = gr.Button("πŸ–ΌοΈ Download Figure (PNG)")
figure_file = gr.File(
label="Download link will appear here",
interactive=False,
)
output_plot = gr.Plot(
visible=False,
)
# ============================================================
# Helpers
# ============================================================
def refresh_numeric_columns():
numeric_cols = state.numeric_cols or []
return gr.update(choices=numeric_cols)
def refresh_categorical_columns():
cat_cols = state.categorical_cols or []
if not cat_cols:
return [gr.update(choices=[], value=None)] * 3 + [
gr.update(choices=[], value=[]),
gr.update(choices=[], value=[]),
gr.update(choices=[], value=[]),
]
return [
gr.update(choices=cat_cols, value=None), # cat_column_dropdown_1
gr.update(choices=cat_cols, value=None), # cat_column_dropdown_2
gr.update(choices=cat_cols, value=None), # cat_column_dropdown_3
gr.update(choices=[], value=[]), # cat_values_dropdown_1
gr.update(choices=[], value=[]), # cat_values_dropdown_2
gr.update(choices=[], value=[]), # cat_values_dropdown_3
]
def update_category_options(col: str | None):
df = state.filtered_df if state.filtered_df is not None else state.df
if df is None or not col or col not in df.columns:
return gr.update(choices=[], value=[])
values = sorted(df[col].dropna().unique())
values_str = [str(v) for v in values]
return gr.update(choices=values_str, value=[])
def update_group_name(cat_vals: list[str], default_label: str):
if cat_vals:
return gr.update(value=cat_vals[0])
return gr.update(value=default_label)
def toggle_hypo_test(sel: str):
if sel == "One sample Student's t-test":
return [
gr.update(visible=True), # mu0_input
gr.update(visible=True), # alternative_radio
gr.update(visible=True), # ttest_graph_option
gr.update(visible=False), # ttest_correction_check
gr.update(visible=False), # equal_var_dropdown
gr.update(visible=False), # category_group
gr.update(visible=False), # group1
gr.update(visible=False), # group2
gr.update(visible=False), # group_anova
]
elif sel == "Equal variance between two groups":
return [
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=False),
]
elif sel == "Two samples Student's t-test":
return [
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=False),
]
elif sel == "One-way ANOVA":
return [
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=True),
]
else:
# Fallback: hide everything
return [
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
]
def toggle_ttest_plot_type(include_graph: bool, sel: str):
if include_graph and sel == "Two samples Student's t-test":
return gr.update(visible=True)
return gr.update(visible=False)
def on_run(
numeric_col: str | None,
hypo_test: str,
mu0_text: str,
alternative: str,
include_graph: bool,
bootstrap_samples: int,
cat_col1: str | None,
cat_vals1: list[str],
name_g1: str,
cat_col2: str | None,
cat_vals2: list[str],
name_g2: str,
cat_col3: str | None,
cat_vals3: list[str],
plot_type: str,
correction_flag: bool,
variance_test_type: str,
):
df = state.filtered_df if state.filtered_df is not None else state.df
def _error_result(message: str):
err = pd.DataFrame([[message]], columns=["Error"])
state.export_table = err
state.export_figure = None
# (table, figure, table_download_row, figure_download_row)
return (
gr.update(value=err, visible=True),
gr.update(value=None, visible=False),
gr.update(visible=True),
gr.update(visible=False),
)
if df is None:
return _error_result("No dataset loaded.")
if not numeric_col:
return _error_result("No numeric column selected.")
try:
table, fig = run_hypothesis_testing(
df=df,
numeric_col=numeric_col,
hypo_test=hypo_test,
mu0_text=mu0_text,
alternative=alternative,
include_graph=include_graph,
bootstrap_samples=int(bootstrap_samples),
cat_col1=cat_col1,
cat_vals1=cat_vals1 or [],
name_group1=name_g1,
cat_col2=cat_col2,
cat_vals2=cat_vals2 or [],
name_group2=name_g2,
cat_col3=cat_col3,
cat_vals3=cat_vals3 or [],
plot_type=plot_type,
correction=bool(correction_flag),
test_type=variance_test_type,
)
except Exception as e:
return _error_result(f"❌ Error: {e}")
state.export_table = table
state.export_figure = fig
return (
gr.update(value=table, visible=True),
gr.update(value=fig, visible=fig is not None),
gr.update(visible=True),
gr.update(visible=fig is not None),
)
def on_download_table(filename: str | None):
return dataframe_to_csv(state.export_table, filename or "hypothesis_test")
def on_download_figure(filename: str | None):
return figure_to_png(state.export_figure, filename or "hypothesis_test_plot")
# ============================================================
# Events
# ============================================================
refresh_columns_button.click(
fn=refresh_numeric_columns,
inputs=[],
outputs=numeric_column_dropdown,
)
hypo_test_dropdown.change(
fn=toggle_hypo_test,
inputs=[hypo_test_dropdown],
outputs=[
mu0_input,
alternative_radio,
ttest_graph_option,
ttest_correction_check,
equal_var_dropdown,
category_group,
group1,
group2,
group_anova,
],
)
hypo_test_dropdown.change(
fn=toggle_ttest_plot_type,
inputs=[ttest_graph_check, hypo_test_dropdown],
outputs=[ttest_plot_type],
)
ttest_graph_check.change(
fn=lambda check: gr.update(visible=check),
inputs=[ttest_graph_check],
outputs=[ttest_boots_sample],
)
ttest_graph_check.change(
fn=toggle_ttest_plot_type,
inputs=[ttest_graph_check, hypo_test_dropdown],
outputs=[ttest_plot_type],
)
refresh_categorical_button.click(
fn=refresh_categorical_columns,
inputs=[],
outputs=[
cat_column_dropdown_1,
cat_column_dropdown_2,
cat_column_dropdown_3,
cat_values_dropdown_1,
cat_values_dropdown_2,
cat_values_dropdown_3,
],
)
cat_column_dropdown_1.change(
fn=update_category_options,
inputs=[cat_column_dropdown_1],
outputs=[cat_values_dropdown_1],
)
cat_column_dropdown_2.change(
fn=update_category_options,
inputs=[cat_column_dropdown_2],
outputs=[cat_values_dropdown_2],
)
cat_column_dropdown_3.change(
fn=update_category_options,
inputs=[cat_column_dropdown_3],
outputs=[cat_values_dropdown_3],
)
cat_values_dropdown_1.change(
fn=update_group_name,
inputs=[cat_values_dropdown_1, name_group1],
outputs=name_group1,
)
cat_values_dropdown_2.change(
fn=update_group_name,
inputs=[cat_values_dropdown_2, name_group2],
outputs=name_group2,
)
run_hypo_test_button.click(
fn=on_run,
inputs=[
numeric_column_dropdown,
hypo_test_dropdown,
mu0_input,
alternative_radio,
ttest_graph_check,
ttest_boots_sample,
cat_column_dropdown_1,
cat_values_dropdown_1,
name_group1,
cat_column_dropdown_2,
cat_values_dropdown_2,
name_group2,
cat_column_dropdown_3,
cat_values_dropdown_3,
ttest_plot_type,
ttest_correction_check,
equal_var_dropdown,
],
outputs=[
output_table,
output_plot,
table_download_row,
figure_download_row,
],
)
table_download_button.click(
fn=on_download_table,
inputs=table_filename,
outputs=table_file,
)
figure_download_button.click(
fn=on_download_figure,
inputs=figure_filename,
outputs=figure_file,
)