| from __future__ import annotations
|
|
|
| from typing import List, Optional, Sequence, Tuple
|
|
|
| from matplotlib.figure import Figure
|
| import numpy as np
|
| import pandas as pd
|
|
|
| from core.linear_regression import run_linear_regression as _run_linear_regression
|
|
|
|
|
| def _select_working_dataframe(
|
| df: Optional[pd.DataFrame],
|
| filtered_df: Optional[pd.DataFrame],
|
| ) -> pd.DataFrame:
|
| """
|
| Use the filtered dataframe if it is non-empty; otherwise fall back to the
|
| original dataframe. This mirrors the behaviour used in other tabs.
|
| """
|
| if df is None:
|
| raise ValueError("No dataset loaded.")
|
|
|
| if filtered_df is not None and not filtered_df.empty:
|
| return filtered_df
|
|
|
| if df.empty:
|
| raise ValueError("The dataset is empty.")
|
|
|
| return df
|
|
|
|
|
| def _parse_confidence_level(text: str) -> float:
|
| """
|
| Parse a confidence level like '0.95' into an alpha value for statsmodels.
|
|
|
| Returns
|
| -------
|
| alpha : float
|
| Significance level (e.g. 0.05 for a 95% confidence level).
|
| """
|
| s = str(text).strip()
|
| if not s:
|
| raise ValueError("Confidence level is required (e.g. 0.95).")
|
| try:
|
| level = float(s)
|
| except ValueError as exc:
|
| raise ValueError("Confidence level must be a numeric value between 0 and 1.") from exc
|
|
|
| if not (0 < level < 1):
|
| raise ValueError("Confidence level must be between 0 and 1 (e.g. 0.95).")
|
|
|
|
|
| return 1.0 - level
|
|
|
|
|
| def _parse_range(text: str) -> Optional[np.ndarray]:
|
| """
|
| Parse a range string like '0, 10' into a numpy array suitable for predictions.
|
|
|
| Returns
|
| -------
|
| np.ndarray or None
|
| If the string is empty or only whitespace, returns None.
|
| Otherwise returns a 1-D array of 100 evenly spaced values between
|
| the parsed minimum and maximum.
|
| """
|
| s = str(text).strip()
|
| if not s:
|
| return None
|
|
|
| parts = s.split(",")
|
| if len(parts) != 2:
|
| raise ValueError("Range must have the form 'min, max'.")
|
|
|
| try:
|
| lo = float(parts[0].strip())
|
| hi = float(parts[1].strip())
|
| except ValueError as exc:
|
| raise ValueError("Range values must be numeric (e.g. '0, 10').") from exc
|
|
|
| if lo >= hi:
|
| raise ValueError("Range minimum must be strictly less than the maximum.")
|
|
|
| return np.linspace(lo, hi, 100)
|
|
|
|
|
| def run_linear_regression(
|
| *,
|
| df: Optional[pd.DataFrame],
|
| filtered_df: Optional[pd.DataFrame],
|
| formula_check: bool,
|
| formula_text: str,
|
| formula_latex: str,
|
| dependent_var: Optional[str],
|
| independent_vars: List[str],
|
| alpha_input: str,
|
| intercept: bool,
|
| graph_check: bool,
|
| graph_type: str,
|
| show_ci: bool,
|
| show_pi: bool,
|
| fit_to_obs: bool,
|
| x_range_text: str,
|
| round_digits: int = 4,
|
| ) -> Tuple[str, pd.DataFrame, Optional[Figure]]:
|
| """
|
| High-level controller used by the Linear Regression tab.
|
|
|
| This function takes raw user input from the UI, performs validation and
|
| parsing, calls the stats layer, and returns a tuple:
|
|
|
| (summary_html, params_df_rounded, figure)
|
|
|
| Any exceptions should be caught in the tab layer and turned into user-
|
| facing error messages.
|
| """
|
| working_df = _select_working_dataframe(df, filtered_df)
|
|
|
| if dependent_var is None or dependent_var == "":
|
| raise ValueError("Please select a dependent variable.")
|
|
|
| if not independent_vars:
|
| raise ValueError("Please select at least one independent variable.")
|
|
|
|
|
| if graph_check and graph_type == "Simple Regression" and len(independent_vars) != 1:
|
| raise ValueError(
|
| "The 'Simple Regression' graph is only available when exactly one "
|
| "independent variable is selected."
|
| )
|
|
|
|
|
| alpha = _parse_confidence_level(alpha_input)
|
|
|
|
|
| x_vector = None
|
| if graph_check and graph_type == "Simple Regression" and not fit_to_obs:
|
| x_vector = _parse_range(x_range_text)
|
|
|
| summary_html, params_df, fig = _run_linear_regression(
|
| df=working_df,
|
| formula_check=formula_check,
|
| formula_text=formula_text,
|
| formula_latex=formula_latex,
|
| dependent_var=dependent_var,
|
| independent_vars=independent_vars,
|
| alpha=alpha,
|
| intercept=intercept,
|
| create_graph=graph_check,
|
| graph_type=graph_type,
|
| show_ci=show_ci,
|
| show_pi=show_pi,
|
| fit_to_obs=fit_to_obs,
|
| x_vector=x_vector,
|
| )
|
|
|
|
|
| params_df_rounded = params_df.round(round_digits)
|
|
|
| return summary_html, params_df_rounded, fig
|
|
|