Further cleaning of app.py
Browse files
app.py
CHANGED
|
@@ -102,6 +102,13 @@ _VIEW_SPECS = [
|
|
| 102 |
_VIEW_LABELS = [label for label, _ in _VIEW_SPECS]
|
| 103 |
_VIEW_SLUG_BY_LABEL = dict(_VIEW_SPECS)
|
| 104 |
_VIEW_LABEL_BY_SLUG = {slug: label for label, slug in _VIEW_SPECS}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
|
| 107 |
# ---------------------------------------------------------------------------
|
|
@@ -147,6 +154,26 @@ def _sync_view_query_param() -> None:
|
|
| 147 |
st.query_params["view"] = _VIEW_SLUG_BY_LABEL[active]
|
| 148 |
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
@st.cache_data(show_spinner=False)
|
| 151 |
def _clean_pipeline(_raw_hash, raw_df, date_col, y_cols, dup_action, missing_action):
|
| 152 |
cleaned, report = clean_dataframe(raw_df, date_col, list(y_cols),
|
|
@@ -675,10 +702,13 @@ for key in [
|
|
| 675 |
"raw_df", "raw_df_original", "cleaned_df", "cleaning_report", "freq_info",
|
| 676 |
"date_col", "y_cols", "qc", "qc_hash",
|
| 677 |
"_upload_id", "_upload_delim", "_clean_key",
|
| 678 |
-
"_prev_data_format", "_prev_pivot_key",
|
|
|
|
| 679 |
]:
|
| 680 |
if key not in st.session_state:
|
| 681 |
st.session_state[key] = None
|
|
|
|
|
|
|
| 682 |
|
| 683 |
# ---------------------------------------------------------------------------
|
| 684 |
# Sidebar — Data input
|
|
@@ -697,8 +727,8 @@ with st.sidebar:
|
|
| 697 |
""",
|
| 698 |
unsafe_allow_html=True,
|
| 699 |
)
|
| 700 |
-
st.divider()
|
| 701 |
-
st.subheader("Vibe-Coded
|
| 702 |
st.markdown(
|
| 703 |
"""
|
| 704 |
<div class="dev-card">
|
|
@@ -764,11 +794,24 @@ with st.sidebar:
|
|
| 764 |
"""Store new dataset and clear stale format/pivot keys."""
|
| 765 |
st.session_state.raw_df_original = df
|
| 766 |
st.session_state.raw_df = df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 767 |
# Clear format-related keys so auto-detection runs fresh
|
| 768 |
for _k in ("sidebar_data_format", "sidebar_group_col",
|
| 769 |
"sidebar_value_col", "sidebar_y_cols",
|
| 770 |
-
"_prev_data_format", "_prev_pivot_key"
|
|
|
|
| 771 |
st.session_state.pop(_k, None)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 772 |
|
| 773 |
if uploaded is not None:
|
| 774 |
file_id = (uploaded.name, uploaded.size)
|
|
@@ -790,182 +833,210 @@ with st.sidebar:
|
|
| 790 |
|
| 791 |
if raw_df_orig is not None:
|
| 792 |
st.divider()
|
| 793 |
-
st.subheader("Column
|
|
|
|
| 794 |
|
| 795 |
-
# Auto-suggest on the *original* (possibly long) DataFrame
|
| 796 |
date_suggestions = suggest_date_columns(raw_df_orig)
|
| 797 |
-
|
| 798 |
all_cols = list(raw_df_orig.columns)
|
| 799 |
-
default_date_idx = (
|
| 800 |
-
all_cols.index(date_suggestions[0]) if date_suggestions else 0
|
| 801 |
-
)
|
| 802 |
|
| 803 |
if "sidebar_date_col" not in st.session_state:
|
| 804 |
st.session_state["sidebar_date_col"] = all_cols[default_date_idx]
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
# ---- Build effective (wide) DataFrame --------------------------------
|
| 826 |
-
if data_format == "Long":
|
| 827 |
-
# Columns eligible for group (string/object) and value (numeric)
|
| 828 |
-
other_cols = [c for c in all_cols if c != date_col]
|
| 829 |
-
string_cols = [
|
| 830 |
-
c for c in other_cols
|
| 831 |
-
if raw_df_orig[c].dtype == object
|
| 832 |
-
or pd.api.types.is_string_dtype(raw_df_orig[c])
|
| 833 |
-
]
|
| 834 |
-
numeric_cols = [
|
| 835 |
-
c for c in other_cols
|
| 836 |
-
if pd.api.types.is_numeric_dtype(raw_df_orig[c])
|
| 837 |
-
]
|
| 838 |
-
|
| 839 |
-
if "sidebar_group_col" not in st.session_state:
|
| 840 |
-
st.session_state["sidebar_group_col"] = (
|
| 841 |
-
auto_group if auto_group and auto_group in string_cols
|
| 842 |
-
else (string_cols[0] if string_cols else None)
|
| 843 |
-
)
|
| 844 |
-
group_col = st.selectbox(
|
| 845 |
-
"Group column", string_cols, key="sidebar_group_col",
|
| 846 |
)
|
| 847 |
|
| 848 |
-
|
| 849 |
-
if "sidebar_value_col" not in st.session_state:
|
| 850 |
-
st.session_state["sidebar_value_col"] = (
|
| 851 |
-
auto_value if auto_value and auto_value in value_options
|
| 852 |
-
else (value_options[0] if value_options else None)
|
| 853 |
-
)
|
| 854 |
-
value_col_sel = st.selectbox(
|
| 855 |
-
"Value column", value_options, key="sidebar_value_col",
|
| 856 |
-
)
|
| 857 |
-
|
| 858 |
-
# Clear y_cols when group/value changes
|
| 859 |
-
pivot_key = (group_col, value_col_sel)
|
| 860 |
-
if st.session_state.get("_prev_pivot_key") != pivot_key:
|
| 861 |
st.session_state.pop("sidebar_y_cols", None)
|
| 862 |
-
st.session_state["
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 871 |
else:
|
| 872 |
effective_df = raw_df_orig
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
available_y[:4] if available_y else []
|
| 890 |
)
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 914 |
|
| 915 |
-
|
| 916 |
-
if y_cols:
|
| 917 |
-
_key = (date_col, tuple(y_cols), dup_action, missing_action,
|
| 918 |
-
st.session_state._upload_id)
|
| 919 |
-
if st.session_state.get("_clean_key") != _key:
|
| 920 |
cleaned_df, report, freq_info = _clean_pipeline(
|
| 921 |
-
_df_hash(
|
| 922 |
dup_action, missing_action,
|
| 923 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 924 |
st.session_state.cleaned_df = cleaned_df
|
| 925 |
st.session_state.cleaning_report = report
|
| 926 |
st.session_state.freq_info = freq_info
|
| 927 |
-
st.session_state._clean_key =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 928 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 929 |
cleaned_df = st.session_state.cleaned_df
|
|
|
|
|
|
|
| 930 |
freq_info = st.session_state.freq_info
|
| 931 |
-
st.caption(f"Frequency: **{freq_info.label}** "
|
| 932 |
-
f"({'regular' if freq_info.is_regular else 'irregular'})")
|
| 933 |
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
"
|
| 937 |
-
|
| 938 |
-
key="sidebar_freq_override",
|
| 939 |
-
)
|
| 940 |
-
if freq_override.strip():
|
| 941 |
-
st.session_state.freq_info = FrequencyInfo(
|
| 942 |
-
label=freq_override.strip(),
|
| 943 |
-
median_delta=freq_info.median_delta,
|
| 944 |
-
is_regular=freq_info.is_regular,
|
| 945 |
-
)
|
| 946 |
-
freq_info = st.session_state.freq_info
|
| 947 |
|
| 948 |
-
# ------ QueryChat ------
|
| 949 |
if check_querychat_available():
|
| 950 |
st.divider()
|
| 951 |
st.subheader("QueryChat")
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 955 |
else:
|
| 956 |
st.divider()
|
| 957 |
st.info(
|
| 958 |
"Set `OPENAI_API_KEY` to enable QueryChat "
|
| 959 |
"(natural-language data filtering)."
|
| 960 |
)
|
| 961 |
-
|
| 962 |
-
st.
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
)
|
| 969 |
|
| 970 |
# ---------------------------------------------------------------------------
|
| 971 |
# Main area — guard
|
|
@@ -1072,16 +1143,19 @@ _data_quality_fragment(report)
|
|
| 1072 |
# ---------------------------------------------------------------------------
|
| 1073 |
if "active_view" not in st.session_state:
|
| 1074 |
st.session_state["active_view"] = _initial_view_label()
|
|
|
|
|
|
|
| 1075 |
|
|
|
|
|
|
|
| 1076 |
view_col, reset_col = st.columns([6, 1])
|
| 1077 |
with view_col:
|
| 1078 |
active_view = st.radio(
|
| 1079 |
-
"
|
| 1080 |
_VIEW_LABELS,
|
| 1081 |
key="active_view",
|
| 1082 |
horizontal=True,
|
| 1083 |
-
|
| 1084 |
-
on_change=_sync_view_query_param,
|
| 1085 |
)
|
| 1086 |
with reset_col:
|
| 1087 |
if st.button("Reset all", key="reset_main", use_container_width=True):
|
|
|
|
| 102 |
_VIEW_LABELS = [label for label, _ in _VIEW_SPECS]
|
| 103 |
_VIEW_SLUG_BY_LABEL = dict(_VIEW_SPECS)
|
| 104 |
_VIEW_LABEL_BY_SLUG = {slug: label for label, slug in _VIEW_SPECS}
|
| 105 |
+
_ANALYSIS_STATE_KEYS = [
|
| 106 |
+
"tab_a_y", "dr_mode", "dr_n", "dr_custom",
|
| 107 |
+
"chart_type_a", "pal_a", "color_by_a", "period_a", "window_a", "lag_a", "decomp_a",
|
| 108 |
+
"_single_df_plot", "_single_fig", "_single_active_y", "_single_chart_type",
|
| 109 |
+
"panel_cols", "panel_chart", "panel_shared", "pal_b", "_panel_fig",
|
| 110 |
+
"spag_cols", "spag_alpha", "spag_topn", "spag_highlight", "spag_median", "pal_c", "_spag_fig",
|
| 111 |
+
]
|
| 112 |
|
| 113 |
|
| 114 |
# ---------------------------------------------------------------------------
|
|
|
|
| 154 |
st.query_params["view"] = _VIEW_SLUG_BY_LABEL[active]
|
| 155 |
|
| 156 |
|
| 157 |
+
def _clear_analysis_state(reset_querychat: bool = False) -> None:
|
| 158 |
+
"""Clear per-view chart controls/outputs."""
|
| 159 |
+
for key in _ANALYSIS_STATE_KEYS:
|
| 160 |
+
st.session_state.pop(key, None)
|
| 161 |
+
if reset_querychat:
|
| 162 |
+
st.session_state["qc"] = None
|
| 163 |
+
st.session_state["qc_hash"] = None
|
| 164 |
+
st.session_state["enable_querychat"] = False
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def _on_view_change() -> None:
|
| 168 |
+
"""Reset chart/data-filter state when users switch analysis views."""
|
| 169 |
+
active = st.session_state.get("active_view")
|
| 170 |
+
prev = st.session_state.get("_prev_active_view")
|
| 171 |
+
if prev and prev != active:
|
| 172 |
+
_clear_analysis_state(reset_querychat=True)
|
| 173 |
+
st.session_state["_prev_active_view"] = active
|
| 174 |
+
_sync_view_query_param()
|
| 175 |
+
|
| 176 |
+
|
| 177 |
@st.cache_data(show_spinner=False)
|
| 178 |
def _clean_pipeline(_raw_hash, raw_df, date_col, y_cols, dup_action, missing_action):
|
| 179 |
cleaned, report = clean_dataframe(raw_df, date_col, list(y_cols),
|
|
|
|
| 702 |
"raw_df", "raw_df_original", "cleaned_df", "cleaning_report", "freq_info",
|
| 703 |
"date_col", "y_cols", "qc", "qc_hash",
|
| 704 |
"_upload_id", "_upload_delim", "_clean_key",
|
| 705 |
+
"_prev_data_format", "_prev_pivot_key", "_prev_active_view",
|
| 706 |
+
"setup_applied", "_last_applied_settings_key",
|
| 707 |
]:
|
| 708 |
if key not in st.session_state:
|
| 709 |
st.session_state[key] = None
|
| 710 |
+
if st.session_state["setup_applied"] is None:
|
| 711 |
+
st.session_state["setup_applied"] = False
|
| 712 |
|
| 713 |
# ---------------------------------------------------------------------------
|
| 714 |
# Sidebar — Data input
|
|
|
|
| 727 |
""",
|
| 728 |
unsafe_allow_html=True,
|
| 729 |
)
|
| 730 |
+
# st.divider()
|
| 731 |
+
st.subheader("Vibe-Coded By")
|
| 732 |
st.markdown(
|
| 733 |
"""
|
| 734 |
<div class="dev-card">
|
|
|
|
| 794 |
"""Store new dataset and clear stale format/pivot keys."""
|
| 795 |
st.session_state.raw_df_original = df
|
| 796 |
st.session_state.raw_df = df
|
| 797 |
+
st.session_state.cleaned_df = None
|
| 798 |
+
st.session_state.cleaning_report = None
|
| 799 |
+
st.session_state.freq_info = None
|
| 800 |
+
st.session_state.date_col = None
|
| 801 |
+
st.session_state.y_cols = None
|
| 802 |
+
st.session_state._clean_key = None
|
| 803 |
+
st.session_state["setup_applied"] = False
|
| 804 |
+
st.session_state["_last_applied_settings_key"] = None
|
| 805 |
# Clear format-related keys so auto-detection runs fresh
|
| 806 |
for _k in ("sidebar_data_format", "sidebar_group_col",
|
| 807 |
"sidebar_value_col", "sidebar_y_cols",
|
| 808 |
+
"_prev_data_format", "_prev_pivot_key",
|
| 809 |
+
"sidebar_dup_action", "sidebar_missing_action", "sidebar_freq_override"):
|
| 810 |
st.session_state.pop(_k, None)
|
| 811 |
+
_clear_analysis_state(reset_querychat=True)
|
| 812 |
+
st.session_state["active_view"] = _VIEW_LABELS[0]
|
| 813 |
+
st.session_state["_prev_active_view"] = st.session_state["active_view"]
|
| 814 |
+
_sync_view_query_param()
|
| 815 |
|
| 816 |
if uploaded is not None:
|
| 817 |
file_id = (uploaded.name, uploaded.size)
|
|
|
|
| 833 |
|
| 834 |
if raw_df_orig is not None:
|
| 835 |
st.divider()
|
| 836 |
+
st.subheader("Column and Cleaning Setup")
|
| 837 |
+
st.caption("Batch changes below, then click `Apply setup`.")
|
| 838 |
|
|
|
|
| 839 |
date_suggestions = suggest_date_columns(raw_df_orig)
|
|
|
|
| 840 |
all_cols = list(raw_df_orig.columns)
|
| 841 |
+
default_date_idx = all_cols.index(date_suggestions[0]) if date_suggestions else 0
|
|
|
|
|
|
|
| 842 |
|
| 843 |
if "sidebar_date_col" not in st.session_state:
|
| 844 |
st.session_state["sidebar_date_col"] = all_cols[default_date_idx]
|
| 845 |
+
if "sidebar_dup_action" not in st.session_state:
|
| 846 |
+
st.session_state["sidebar_dup_action"] = "keep_last"
|
| 847 |
+
if "sidebar_missing_action" not in st.session_state:
|
| 848 |
+
st.session_state["sidebar_missing_action"] = "interpolate"
|
| 849 |
+
if "sidebar_freq_override" not in st.session_state:
|
| 850 |
+
st.session_state["sidebar_freq_override"] = ""
|
| 851 |
+
|
| 852 |
+
with st.form("sidebar_setup_form", border=False):
|
| 853 |
+
date_col = st.selectbox("Date column", all_cols, key="sidebar_date_col")
|
| 854 |
+
is_long, auto_group, auto_value = detect_long_format(raw_df_orig, date_col)
|
| 855 |
+
|
| 856 |
+
if "sidebar_data_format" not in st.session_state:
|
| 857 |
+
st.session_state["sidebar_data_format"] = "Long" if is_long else "Wide"
|
| 858 |
+
|
| 859 |
+
data_format = st.radio(
|
| 860 |
+
"Data format",
|
| 861 |
+
["Wide", "Long"],
|
| 862 |
+
key="sidebar_data_format",
|
| 863 |
+
horizontal=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 864 |
)
|
| 865 |
|
| 866 |
+
if st.session_state.get("_prev_data_format") != data_format:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
st.session_state.pop("sidebar_y_cols", None)
|
| 868 |
+
st.session_state["_prev_data_format"] = data_format
|
| 869 |
+
|
| 870 |
+
group_col = None
|
| 871 |
+
value_col_sel = None
|
| 872 |
+
if data_format == "Long":
|
| 873 |
+
other_cols = [c for c in all_cols if c != date_col]
|
| 874 |
+
string_cols = [
|
| 875 |
+
c for c in other_cols
|
| 876 |
+
if raw_df_orig[c].dtype == object
|
| 877 |
+
or pd.api.types.is_string_dtype(raw_df_orig[c])
|
| 878 |
+
]
|
| 879 |
+
numeric_cols = [
|
| 880 |
+
c for c in other_cols
|
| 881 |
+
if pd.api.types.is_numeric_dtype(raw_df_orig[c])
|
| 882 |
+
]
|
| 883 |
+
|
| 884 |
+
if string_cols:
|
| 885 |
+
if "sidebar_group_col" not in st.session_state:
|
| 886 |
+
st.session_state["sidebar_group_col"] = (
|
| 887 |
+
auto_group if auto_group and auto_group in string_cols
|
| 888 |
+
else string_cols[0]
|
| 889 |
+
)
|
| 890 |
+
group_col = st.selectbox("Group column", string_cols, key="sidebar_group_col")
|
| 891 |
+
else:
|
| 892 |
+
st.warning("No categorical columns available for long-format grouping.")
|
| 893 |
+
|
| 894 |
+
value_options = [c for c in numeric_cols if c != group_col] if group_col else numeric_cols
|
| 895 |
+
|
| 896 |
+
if value_options:
|
| 897 |
+
if "sidebar_value_col" not in st.session_state:
|
| 898 |
+
st.session_state["sidebar_value_col"] = (
|
| 899 |
+
auto_value if auto_value and auto_value in value_options
|
| 900 |
+
else value_options[0]
|
| 901 |
+
)
|
| 902 |
+
value_col_sel = st.selectbox("Value column", value_options, key="sidebar_value_col")
|
| 903 |
+
else:
|
| 904 |
+
st.warning("No numeric value column available for long-format pivoting.")
|
| 905 |
+
|
| 906 |
+
pivot_key = (group_col, value_col_sel)
|
| 907 |
+
if st.session_state.get("_prev_pivot_key") != pivot_key:
|
| 908 |
+
st.session_state.pop("sidebar_y_cols", None)
|
| 909 |
+
st.session_state["_prev_pivot_key"] = pivot_key
|
| 910 |
+
|
| 911 |
+
if group_col and value_col_sel:
|
| 912 |
+
effective_df = pivot_long_to_wide(
|
| 913 |
+
raw_df_orig, date_col, group_col, value_col_sel,
|
| 914 |
+
)
|
| 915 |
+
n_groups = raw_df_orig[group_col].nunique()
|
| 916 |
+
st.caption(f"Pivot preview: **{n_groups}** groups from `{group_col}`")
|
| 917 |
+
available_y = [c for c in effective_df.columns if c != date_col]
|
| 918 |
+
else:
|
| 919 |
+
effective_df = raw_df_orig
|
| 920 |
+
available_y = []
|
| 921 |
else:
|
| 922 |
effective_df = raw_df_orig
|
| 923 |
+
numeric_suggestions = suggest_numeric_columns(raw_df_orig)
|
| 924 |
+
available_y = [c for c in numeric_suggestions if c != date_col]
|
| 925 |
+
|
| 926 |
+
if "sidebar_y_cols" in st.session_state:
|
| 927 |
+
st.session_state["sidebar_y_cols"] = [
|
| 928 |
+
c for c in st.session_state["sidebar_y_cols"] if c in available_y
|
| 929 |
+
]
|
| 930 |
+
if "sidebar_y_cols" not in st.session_state:
|
| 931 |
+
st.session_state["sidebar_y_cols"] = available_y[:4] if available_y else []
|
| 932 |
+
y_cols = st.multiselect("Value column(s)", available_y, key="sidebar_y_cols")
|
| 933 |
+
|
| 934 |
+
st.markdown("##### Cleaning Options")
|
| 935 |
+
dup_action = st.selectbox(
|
| 936 |
+
"Duplicate dates",
|
| 937 |
+
["keep_last", "keep_first", "drop_all"],
|
| 938 |
+
key="sidebar_dup_action",
|
|
|
|
| 939 |
)
|
| 940 |
+
missing_action = st.selectbox(
|
| 941 |
+
"Missing values",
|
| 942 |
+
["interpolate", "ffill", "drop"],
|
| 943 |
+
key="sidebar_missing_action",
|
| 944 |
+
)
|
| 945 |
+
freq_override = st.text_input(
|
| 946 |
+
"Override frequency label (optional)",
|
| 947 |
+
help="e.g. Daily, Weekly, Monthly, Quarterly, Yearly",
|
| 948 |
+
key="sidebar_freq_override",
|
| 949 |
+
)
|
| 950 |
+
apply_setup = st.form_submit_button("Apply setup", use_container_width=True)
|
| 951 |
+
|
| 952 |
+
if apply_setup:
|
| 953 |
+
st.session_state.raw_df = effective_df
|
| 954 |
+
st.session_state.date_col = date_col
|
| 955 |
+
st.session_state.y_cols = y_cols
|
| 956 |
+
|
| 957 |
+
settings_key = (
|
| 958 |
+
st.session_state._upload_id,
|
| 959 |
+
date_col,
|
| 960 |
+
data_format,
|
| 961 |
+
st.session_state.get("sidebar_group_col"),
|
| 962 |
+
st.session_state.get("sidebar_value_col"),
|
| 963 |
+
tuple(y_cols),
|
| 964 |
+
dup_action,
|
| 965 |
+
missing_action,
|
| 966 |
+
freq_override.strip(),
|
| 967 |
+
)
|
| 968 |
+
if st.session_state.get("_last_applied_settings_key") != settings_key:
|
| 969 |
+
_clear_analysis_state(reset_querychat=True)
|
| 970 |
+
st.session_state["_last_applied_settings_key"] = settings_key
|
| 971 |
+
st.session_state["setup_applied"] = True
|
| 972 |
|
| 973 |
+
if y_cols:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 974 |
cleaned_df, report, freq_info = _clean_pipeline(
|
| 975 |
+
_df_hash(effective_df), effective_df, date_col, tuple(y_cols),
|
| 976 |
dup_action, missing_action,
|
| 977 |
)
|
| 978 |
+
if freq_override.strip():
|
| 979 |
+
freq_info = FrequencyInfo(
|
| 980 |
+
label=freq_override.strip(),
|
| 981 |
+
median_delta=freq_info.median_delta,
|
| 982 |
+
is_regular=freq_info.is_regular,
|
| 983 |
+
)
|
| 984 |
+
|
| 985 |
st.session_state.cleaned_df = cleaned_df
|
| 986 |
st.session_state.cleaning_report = report
|
| 987 |
st.session_state.freq_info = freq_info
|
| 988 |
+
st.session_state._clean_key = (
|
| 989 |
+
date_col, tuple(y_cols), dup_action, missing_action,
|
| 990 |
+
st.session_state._upload_id,
|
| 991 |
+
)
|
| 992 |
+
else:
|
| 993 |
+
st.session_state.cleaned_df = None
|
| 994 |
+
st.session_state.cleaning_report = None
|
| 995 |
+
st.session_state.freq_info = None
|
| 996 |
+
st.session_state._clean_key = None
|
| 997 |
+
st.session_state.qc = None
|
| 998 |
+
st.session_state.qc_hash = None
|
| 999 |
|
| 1000 |
+
if not st.session_state.get("setup_applied"):
|
| 1001 |
+
st.info("Configure columns and cleaning options, then click `Apply setup`.")
|
| 1002 |
+
|
| 1003 |
+
if st.session_state.get("setup_applied") and st.session_state.get("y_cols"):
|
| 1004 |
cleaned_df = st.session_state.cleaned_df
|
| 1005 |
+
date_col = st.session_state.date_col
|
| 1006 |
+
y_cols = st.session_state.y_cols
|
| 1007 |
freq_info = st.session_state.freq_info
|
|
|
|
|
|
|
| 1008 |
|
| 1009 |
+
st.success("Setup applied. Continue in the main panel to choose an analysis view.")
|
| 1010 |
+
if freq_info is not None:
|
| 1011 |
+
st.caption(f"Frequency: **{freq_info.label}** "
|
| 1012 |
+
f"({'regular' if freq_info.is_regular else 'irregular'})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1013 |
|
|
|
|
| 1014 |
if check_querychat_available():
|
| 1015 |
st.divider()
|
| 1016 |
st.subheader("QueryChat")
|
| 1017 |
+
enable_qc = st.toggle(
|
| 1018 |
+
"Enable QueryChat filtering",
|
| 1019 |
+
key="enable_querychat",
|
| 1020 |
+
help="Use natural-language prompts to filter the dataset (e.g., 'last 5 years'); chart views then use the filtered data.",
|
| 1021 |
+
)
|
| 1022 |
+
if enable_qc and cleaned_df is not None and freq_info is not None:
|
| 1023 |
+
_querychat_fragment(cleaned_df, date_col, y_cols, freq_info.label)
|
| 1024 |
+
else:
|
| 1025 |
+
st.session_state.qc = None
|
| 1026 |
+
st.session_state.qc_hash = None
|
| 1027 |
else:
|
| 1028 |
st.divider()
|
| 1029 |
st.info(
|
| 1030 |
"Set `OPENAI_API_KEY` to enable QueryChat "
|
| 1031 |
"(natural-language data filtering)."
|
| 1032 |
)
|
| 1033 |
+
# st.divider()
|
| 1034 |
+
# st.caption(
|
| 1035 |
+
# "**Privacy:** All processing is in-memory. "
|
| 1036 |
+
# "If you click **Interpret Chart with AI**, the chart image is sent to OpenAI — "
|
| 1037 |
+
# "do not include sensitive data in your charts. "
|
| 1038 |
+
# "QueryChat protects your privacy by only passing metadata (not your data) to OpenAI."
|
| 1039 |
+
# )
|
|
|
|
| 1040 |
|
| 1041 |
# ---------------------------------------------------------------------------
|
| 1042 |
# Main area — guard
|
|
|
|
| 1143 |
# ---------------------------------------------------------------------------
|
| 1144 |
if "active_view" not in st.session_state:
|
| 1145 |
st.session_state["active_view"] = _initial_view_label()
|
| 1146 |
+
if st.session_state.get("_prev_active_view") is None:
|
| 1147 |
+
st.session_state["_prev_active_view"] = st.session_state["active_view"]
|
| 1148 |
|
| 1149 |
+
st.subheader("Explore: Choose Analysis View")
|
| 1150 |
+
st.caption("Switching views resets chart controls and filtered data for a clean start.")
|
| 1151 |
view_col, reset_col = st.columns([6, 1])
|
| 1152 |
with view_col:
|
| 1153 |
active_view = st.radio(
|
| 1154 |
+
"Analysis view",
|
| 1155 |
_VIEW_LABELS,
|
| 1156 |
key="active_view",
|
| 1157 |
horizontal=True,
|
| 1158 |
+
on_change=_on_view_change,
|
|
|
|
| 1159 |
)
|
| 1160 |
with reset_col:
|
| 1161 |
if st.button("Reset all", key="reset_main", use_container_width=True):
|