fmegahed commited on
Commit
781ac00
·
1 Parent(s): 22ced1b

Refactor app UX flow: move setup to main canvas, add mode/chart gating, fix date/value column selection, and add optional type casting

Browse files
Files changed (2) hide show
  1. app.py +810 -253
  2. src/cleaning.py +11 -8
app.py CHANGED
@@ -98,6 +98,10 @@ _CHART_TYPES = [
98
 
99
  _PALETTE_NAMES = ["Set2", "Dark2", "Set1", "Paired", "Pastel1", "Pastel2", "Accent"]
100
  _STYLE_DICT = get_miami_mpl_style()
 
 
 
 
101
 
102
  # ---------------------------------------------------------------------------
103
  # State helpers
@@ -116,6 +120,8 @@ def _make_empty_state() -> dict:
116
  "panel_png": None,
117
  "spag_png": None,
118
  "qc": None,
 
 
119
  }
120
 
121
 
@@ -183,6 +189,222 @@ def _format_multi_summary_md(summary_df: pd.DataFrame) -> str:
183
  return "\n".join(lines)
184
 
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  # ---------------------------------------------------------------------------
187
  # Data helpers
188
  # ---------------------------------------------------------------------------
@@ -353,23 +575,33 @@ _WELCOME_MD = """
353
 
354
  ---
355
 
356
- ### Get Started in 3 Steps
357
 
358
- <div style="display:grid; grid-template-columns:repeat(3, 1fr); gap:1rem; margin:1rem 0;">
359
  <div class="step-card">
360
  <div class="step-number">1</div>
361
  <div class="step-title">Load Data</div>
362
- <div class="step-desc">Upload a CSV from the sidebar or pick one of the built-in demo datasets.</div>
363
  </div>
364
  <div class="step-card">
365
  <div class="step-number">2</div>
366
- <div class="step-title">Pick Columns</div>
367
- <div class="step-desc">Select a date column and one or more numeric value columns. The app auto-detects sensible defaults.</div>
368
  </div>
369
  <div class="step-card">
370
  <div class="step-number">3</div>
371
- <div class="step-title">Explore</div>
372
- <div class="step-desc">Choose from 9+ chart types, view summary statistics, and get AI-powered chart interpretation.</div>
 
 
 
 
 
 
 
 
 
 
373
  </div>
374
  </div>
375
 
@@ -410,57 +642,69 @@ def _process_new_data(df: pd.DataFrame, delim: str | None = None):
410
  state["raw_df_original"] = df
411
 
412
  all_cols = list(df.columns)
413
- date_suggestions = suggest_date_columns(df)
414
- default_date = date_suggestions[0] if date_suggestions else all_cols[0]
 
 
 
 
 
 
 
 
 
 
 
415
 
416
- is_long, auto_group, auto_value = detect_long_format(df, default_date)
417
  fmt = "Long" if is_long else "Wide"
418
-
419
- other_cols = [c for c in all_cols if c != default_date]
420
- string_cols = [
421
- c for c in other_cols
422
- if df[c].dtype == object or pd.api.types.is_string_dtype(df[c])
423
- ]
424
- numeric_cols = [
425
- c for c in other_cols if pd.api.types.is_numeric_dtype(df[c])
426
- ]
427
-
428
- group_default = (
429
- auto_group if auto_group and auto_group in string_cols
430
- else (string_cols[0] if string_cols else None)
431
- )
432
- value_options = [c for c in numeric_cols if c != group_default] if group_default else numeric_cols
433
- value_default = (
434
- auto_value if auto_value and auto_value in value_options
435
- else (value_options[0] if value_options else None)
436
  )
437
-
438
- # Compute initial y_cols
439
- if is_long and group_default and value_default:
440
- try:
441
- effective = pivot_long_to_wide(df, default_date, group_default, value_default)
442
- available_y = [c for c in effective.columns if c != default_date]
443
- except Exception:
444
- available_y = list(numeric_cols)
445
- else:
446
- numeric_suggest = suggest_numeric_columns(df)
447
- available_y = [c for c in numeric_suggest if c != default_date]
448
-
449
- default_y = available_y[:4] if available_y else []
450
  delim_text = f"Detected delimiter: `{repr(delim)}`" if delim else ""
 
 
 
 
 
 
 
451
 
452
  return (
453
  state, # app_state
454
  gr.Column(visible=True), # setup_col
455
- gr.Dropdown(choices=all_cols, value=default_date), # date_col_dd
456
  gr.Radio(value=fmt), # format_radio
457
  gr.Column(visible=is_long), # long_col
458
- gr.Dropdown(choices=string_cols, value=group_default), # group_col_dd
459
- gr.Dropdown(choices=value_options, value=value_default), # value_col_dd
460
  gr.CheckboxGroup(choices=available_y, value=default_y), # y_cols_cbg
461
  delim_text, # delim_md
462
- gr.Column(visible=True), # welcome_col
 
 
 
 
 
463
  gr.Column(visible=False), # analysis_col
 
 
 
 
 
 
 
 
464
  )
465
 
466
 
@@ -472,7 +716,18 @@ def on_file_upload(file_obj, state):
472
  gr.Column(visible=False), gr.Dropdown(), gr.Radio(),
473
  gr.Column(visible=False), gr.Dropdown(), gr.Dropdown(),
474
  gr.CheckboxGroup(choices=[], value=[]), "",
 
 
 
 
 
475
  gr.Column(visible=True), gr.Column(visible=False),
 
 
 
 
 
 
476
  )
477
  path = file_obj if isinstance(file_obj, str) else str(file_obj)
478
  df, delim = _read_file_to_df(path)
@@ -486,7 +741,11 @@ def on_demo_select(choice, state):
486
  gr.Column(), gr.Dropdown(), gr.Radio(),
487
  gr.Column(), gr.Dropdown(), gr.Dropdown(),
488
  gr.CheckboxGroup(), "",
 
489
  gr.Column(), gr.Column(),
 
 
 
490
  )
491
  demo_path = _DEMO_FILES[choice]
492
  df = pd.read_csv(demo_path)
@@ -497,6 +756,41 @@ def on_format_change(fmt):
497
  return gr.Column(visible=(fmt == "Long"))
498
 
499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  def on_long_cols_change(date_col, group_col, value_col, state):
501
  raw_df = state.get("raw_df_original")
502
  if raw_df is None or not group_col or not value_col:
@@ -509,31 +803,141 @@ def on_long_cols_change(date_col, group_col, value_col, state):
509
  return gr.CheckboxGroup(choices=[], value=[])
510
 
511
 
512
- def on_apply_setup(state, date_col, data_format, group_col, value_col,
513
- y_cols, dup_action, missing_action, freq_override):
514
- if not y_cols:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  return (
516
  state,
517
- gr.Column(visible=True), gr.Column(visible=False),
518
- "*Select at least one value column.*", "",
519
- gr.Dropdown(), gr.Dropdown(),
520
- None, "", "",
521
- gr.CheckboxGroup(), None, "", "",
522
- gr.CheckboxGroup(), gr.Dropdown(), None, "", "",
 
 
 
 
523
  )
524
 
525
- raw_df = state.get("raw_df_original")
526
- if raw_df is None:
 
 
 
 
 
 
 
527
  return (
528
  state,
529
- gr.Column(visible=True), gr.Column(visible=False),
530
- "*No data loaded.*", "",
531
- gr.Dropdown(), gr.Dropdown(),
532
- None, "", "",
533
- gr.CheckboxGroup(), None, "", "",
534
- gr.CheckboxGroup(), gr.Dropdown(), None, "", "",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
535
  )
536
 
 
 
 
 
 
 
 
537
  # Pivot if long format
538
  if data_format == "Long" and group_col and value_col:
539
  effective_df = pivot_long_to_wide(raw_df, date_col, group_col, value_col)
@@ -589,6 +993,23 @@ def on_apply_setup(state, date_col, data_format, group_col, value_col,
589
  y_list = list(y_cols)
590
  panel_default = y_list[:4] if len(y_list) >= 2 else y_list
591
  highlight_choices = ["(none)"] + y_list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
 
593
  return (
594
  state, # 0 app_state
@@ -596,24 +1017,30 @@ def on_apply_setup(state, date_col, data_format, group_col, value_col,
596
  gr.Column(visible=True), # 2 analysis_col
597
  quality_md, # 3 quality_md
598
  freq_text, # 4 freq_info_md
599
- # Single series tab
600
  gr.Dropdown(choices=y_list, value=y_list[0]), # 5 single_y_dd
601
  gr.Dropdown(choices=color_by_choices,
602
  value=color_by_choices[0] if color_by_choices else None),# 6 color_by_dd
603
  None, # 7 single_plot
604
  "", # 8 single_stats_md
605
  "", # 9 single_interp_md
606
- # Panel tab
607
  gr.CheckboxGroup(choices=y_list, value=panel_default), # 10 panel_cols_cbg
608
  None, # 11 panel_plot
609
  "", # 12 panel_summary_md
610
  "", # 13 panel_interp_md
611
- # Spaghetti tab
612
  gr.CheckboxGroup(choices=y_list, value=y_list), # 14 spag_cols_cbg
613
- gr.Dropdown(choices=highlight_choices, value="(none)"), # 15 spag_highlight_dd
614
  None, # 16 spag_plot
615
  "", # 17 spag_summary_md
616
  "", # 18 spag_interp_md
 
 
 
 
 
 
 
 
 
617
  )
618
 
619
 
@@ -626,6 +1053,27 @@ def on_dr_mode_change(mode):
626
  )
627
 
628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629
  def on_chart_type_change(chart_type):
630
  return (
631
  gr.Column(visible=("Colored Markers" in chart_type)),
@@ -659,6 +1107,10 @@ def on_single_update(state, y_col, dr_mode, dr_n, dr_start, dr_end,
659
  if df_plot.empty:
660
  return state, None, "*No data in selected range.*"
661
 
 
 
 
 
662
  fig, err = _generate_single_chart(
663
  df_plot, date_col, y_col, chart_type, palette_colors,
664
  color_by, period, window, lag, decomp_model, freq_info,
@@ -785,6 +1237,30 @@ def on_spag_interpret(state):
785
  return render_interpretation_markdown(interp)
786
 
787
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
788
  # ---------------------------------------------------------------------------
789
  # Build the Gradio app
790
  # ---------------------------------------------------------------------------
@@ -805,11 +1281,6 @@ with gr.Blocks(
805
  '<span class="subtitle-text">ISA 444 &middot; Miami University</span>'
806
  '</div>'
807
  )
808
- gr.Markdown("**Vibe-Coded By**")
809
- gr.HTML(_DEVELOPER_CARD)
810
- gr.Markdown("v0.2.0 &middot; Last updated Feb 2026", elem_classes=["caption"])
811
-
812
- gr.Markdown("---")
813
  gr.Markdown("### Data Input")
814
 
815
  file_upload = gr.File(
@@ -824,44 +1295,12 @@ with gr.Blocks(
824
  )
825
  reset_btn = gr.Button("Reset all", variant="secondary", size="sm")
826
  delim_md = gr.Markdown("")
 
827
 
828
- # ---- Setup controls (hidden until data loaded) ----
829
- with gr.Column(visible=False) as setup_col:
830
- gr.Markdown("---")
831
- gr.Markdown("### Column & Cleaning Setup")
832
- gr.Markdown("*Configure below, then click **Apply setup**.*")
833
-
834
- date_col_dd = gr.Dropdown(label="Date column", choices=[])
835
- format_radio = gr.Radio(
836
- label="Data format", choices=["Wide", "Long"], value="Wide",
837
- )
838
-
839
- with gr.Column(visible=False) as long_col:
840
- group_col_dd = gr.Dropdown(label="Group column", choices=[])
841
- value_col_dd = gr.Dropdown(label="Value column", choices=[])
842
-
843
- y_cols_cbg = gr.CheckboxGroup(label="Value column(s)", choices=[])
844
-
845
- gr.Markdown("**Cleaning options**")
846
- dup_dd = gr.Dropdown(
847
- label="Duplicate dates",
848
- choices=["keep_last", "keep_first", "drop_all"],
849
- value="keep_last",
850
- )
851
- missing_dd = gr.Dropdown(
852
- label="Missing values",
853
- choices=["interpolate", "ffill", "drop"],
854
- value="interpolate",
855
- )
856
- freq_tb = gr.Textbox(
857
- label="Override frequency label (optional)",
858
- placeholder="e.g. Daily, Weekly, Monthly",
859
- )
860
- apply_btn = gr.Button("Apply setup", variant="primary")
861
- freq_info_md = gr.Markdown("")
862
-
863
- # ---- QueryChat placeholder ----
864
- with gr.Column(visible=False) as qc_col:
865
  gr.Markdown("---")
866
  gr.Markdown("### QueryChat")
867
  if check_querychat_available():
@@ -881,166 +1320,229 @@ with gr.Blocks(
881
  with gr.Column(visible=True) as welcome_col:
882
  gr.Markdown(_WELCOME_MD)
883
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
884
  # ===================================================================
885
  # Analysis panel (hidden until setup applied)
886
  # ===================================================================
887
  with gr.Column(visible=False) as analysis_col:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
888
  with gr.Accordion("Data Quality Report", open=False):
889
  quality_md = gr.Markdown("")
890
 
891
- with gr.Tabs():
892
- # ---------------------------------------------------------------
893
- # Tab: Single Series
894
- # ---------------------------------------------------------------
895
- with gr.Tab("Single Series"):
896
- with gr.Row():
897
- with gr.Column(scale=1, min_width=280):
898
- single_y_dd = gr.Dropdown(label="Value column", choices=[])
899
- dr_mode_radio = gr.Radio(
900
- label="Date range",
901
- choices=["All", "Last N years", "Custom"],
902
- value="All",
903
- )
904
- with gr.Column(visible=False) as dr_n_col:
905
- dr_n_slider = gr.Slider(
906
- label="Years", minimum=1, maximum=20,
907
- value=5, step=1,
908
- )
909
- with gr.Column(visible=False) as dr_custom_col:
910
- dr_start_tb = gr.Textbox(label="Start date", placeholder="YYYY-MM-DD")
911
- dr_end_tb = gr.Textbox(label="End date", placeholder="YYYY-MM-DD")
912
-
913
- single_chart_dd = gr.Dropdown(
914
- label="Chart type", choices=_CHART_TYPES,
915
- value=_CHART_TYPES[0],
916
  )
917
- single_pal_dd = gr.Dropdown(
918
- label="Color palette", choices=_PALETTE_NAMES,
919
- value=_PALETTE_NAMES[0],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920
  )
921
- single_swatch = gr.Plot(label="Palette preview", show_label=False)
922
-
923
- with gr.Column(visible=False) as color_by_col:
924
- color_by_dd = gr.Dropdown(
925
- label="Color by",
926
- choices=["month", "quarter", "year", "day_of_week"],
927
- )
928
- with gr.Column(visible=False) as period_col:
929
- period_dd = gr.Dropdown(
930
- label="Period", choices=["month", "quarter"],
931
- value="month",
932
- )
933
- with gr.Column(visible=False) as window_col:
934
- window_slider = gr.Slider(
935
- label="Window", minimum=2, maximum=52,
936
- value=12, step=1,
937
- )
938
- with gr.Column(visible=False) as lag_col:
939
- lag_slider = gr.Slider(
940
- label="Lag", minimum=1, maximum=52,
941
- value=1, step=1,
942
- )
943
- with gr.Column(visible=False) as decomp_col:
944
- decomp_dd = gr.Dropdown(
945
- label="Model",
946
- choices=["additive", "multiplicative"],
947
- value="additive",
948
- )
949
- single_update_btn = gr.Button("Update chart", variant="primary")
950
-
951
- with gr.Column(scale=3):
952
- single_plot = gr.Plot(label="Chart")
953
- with gr.Accordion("Summary Statistics", open=False):
954
- single_stats_md = gr.Markdown("")
955
- with gr.Accordion("AI Chart Interpretation", open=False):
956
- gr.Markdown(
957
- "*The chart image (PNG) is sent to OpenAI for "
958
- "interpretation. Do not include sensitive data.*"
959
- )
960
- single_interp_btn = gr.Button(
961
- "Interpret Chart with AI", variant="secondary",
962
- )
963
- single_interp_md = gr.Markdown("")
964
-
965
- # ---------------------------------------------------------------
966
- # Tab: Few Series (Panel)
967
- # ---------------------------------------------------------------
968
- with gr.Tab("Few Series (Panel)"):
969
- with gr.Row():
970
- with gr.Column(scale=1, min_width=280):
971
- panel_cols_cbg = gr.CheckboxGroup(
972
- label="Columns to plot", choices=[],
973
  )
974
- panel_chart_dd = gr.Dropdown(
975
- label="Chart type", choices=["line", "bar"],
976
- value="line",
 
977
  )
978
- panel_shared_cb = gr.Checkbox(
979
- label="Shared Y axis", value=True,
 
 
980
  )
981
- panel_pal_dd = gr.Dropdown(
982
- label="Color palette", choices=_PALETTE_NAMES,
983
- value=_PALETTE_NAMES[0],
 
 
984
  )
985
- panel_update_btn = gr.Button("Update chart", variant="primary")
986
-
987
- with gr.Column(scale=3):
988
- panel_plot = gr.Plot(label="Panel Chart")
989
- with gr.Accordion("Per-series Summary", open=False):
990
- panel_summary_md = gr.Markdown("")
991
- with gr.Accordion("AI Chart Interpretation", open=False):
992
- gr.Markdown(
993
- "*The chart image (PNG) is sent to OpenAI for "
994
- "interpretation. Do not include sensitive data.*"
995
- )
996
- panel_interp_btn = gr.Button(
997
- "Interpret Chart with AI", variant="secondary",
998
- )
999
- panel_interp_md = gr.Markdown("")
1000
-
1001
- # ---------------------------------------------------------------
1002
- # Tab: Many Series (Spaghetti)
1003
- # ---------------------------------------------------------------
1004
- with gr.Tab("Many Series (Spaghetti)"):
1005
- with gr.Row():
1006
- with gr.Column(scale=1, min_width=280):
1007
- spag_cols_cbg = gr.CheckboxGroup(
1008
- label="Columns to include", choices=[],
1009
  )
1010
- spag_alpha_slider = gr.Slider(
1011
- label="Alpha (opacity)",
1012
- minimum=0.05, maximum=1.0, value=0.15, step=0.05,
1013
  )
1014
- spag_topn_num = gr.Number(
1015
- label="Highlight top N (0 = none)", value=0,
1016
- minimum=0, precision=0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1017
  )
1018
- spag_highlight_dd = gr.Dropdown(
1019
- label="Highlight series",
1020
- choices=["(none)"], value="(none)",
1021
  )
1022
- spag_median_cb = gr.Checkbox(
1023
- label="Show Median + IQR band", value=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1024
  )
1025
- spag_pal_dd = gr.Dropdown(
1026
- label="Color palette", choices=_PALETTE_NAMES,
1027
- value=_PALETTE_NAMES[0],
1028
  )
1029
- spag_update_btn = gr.Button("Update chart", variant="primary")
1030
-
1031
- with gr.Column(scale=3):
1032
- spag_plot = gr.Plot(label="Spaghetti Chart")
1033
- with gr.Accordion("Per-series Summary", open=False):
1034
- spag_summary_md = gr.Markdown("")
1035
- with gr.Accordion("AI Chart Interpretation", open=False):
1036
- gr.Markdown(
1037
- "*The chart image (PNG) is sent to OpenAI for "
1038
- "interpretation. Do not include sensitive data.*"
1039
- )
1040
- spag_interp_btn = gr.Button(
1041
- "Interpret Chart with AI", variant="secondary",
1042
- )
1043
- spag_interp_md = gr.Markdown("")
1044
 
1045
  # ===================================================================
1046
  # Event wiring
@@ -1049,7 +1551,11 @@ with gr.Blocks(
1049
  _DATA_LOAD_OUTPUTS = [
1050
  app_state, setup_col, date_col_dd, format_radio, long_col,
1051
  group_col_dd, value_col_dd, y_cols_cbg, delim_md,
 
 
1052
  welcome_col, analysis_col,
 
 
1053
  ]
1054
 
1055
  file_upload.change(
@@ -1067,11 +1573,16 @@ with gr.Blocks(
1067
  # Reset via page reload
1068
  reset_btn.click(fn=None, js="() => { window.location.reload(); }")
1069
 
1070
- # Format toggle
 
 
 
 
 
1071
  format_radio.change(
1072
- on_format_change,
1073
- inputs=[format_radio],
1074
- outputs=[long_col],
1075
  )
1076
 
1077
  # Long-format column changes update y_cols
@@ -1082,6 +1593,21 @@ with gr.Blocks(
1082
  outputs=[y_cols_cbg],
1083
  )
1084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1085
  # Apply setup
1086
  _APPLY_OUTPUTS = [
1087
  app_state, # 0
@@ -1106,6 +1632,15 @@ with gr.Blocks(
1106
  spag_plot, # 16
1107
  spag_summary_md, # 17
1108
  spag_interp_md, # 18
 
 
 
 
 
 
 
 
 
1109
  ]
1110
 
1111
  apply_btn.click(
@@ -1115,6 +1650,16 @@ with gr.Blocks(
1115
  value_col_dd, y_cols_cbg, dup_dd, missing_dd, freq_tb,
1116
  ],
1117
  outputs=_APPLY_OUTPUTS,
 
 
 
 
 
 
 
 
 
 
1118
  )
1119
 
1120
  # Date range mode visibility
@@ -1124,6 +1669,18 @@ with gr.Blocks(
1124
  outputs=[dr_n_col, dr_custom_col],
1125
  )
1126
 
 
 
 
 
 
 
 
 
 
 
 
 
1127
  # Chart type conditional controls
1128
  single_chart_dd.change(
1129
  on_chart_type_change,
 
98
 
99
  _PALETTE_NAMES = ["Set2", "Dark2", "Set1", "Paired", "Pastel1", "Pastel2", "Accent"]
100
  _STYLE_DICT = get_miami_mpl_style()
101
+ _MODE_SINGLE = "Single Series"
102
+ _MODE_PANEL = "Compare Few (Panel)"
103
+ _MODE_SPAG = "Compare Many (Spaghetti)"
104
+ _DATE_HINT_TOKENS = ("date", "time", "year", "month", "day", "period")
105
 
106
  # ---------------------------------------------------------------------------
107
  # State helpers
 
120
  "panel_png": None,
121
  "spag_png": None,
122
  "qc": None,
123
+ "mode_choices": [_MODE_SINGLE],
124
+ "recommended_mode": _MODE_SINGLE,
125
  }
126
 
127
 
 
189
  return "\n".join(lines)
190
 
191
 
192
+ # ---------------------------------------------------------------------------
193
+ # UX helpers
194
+ # ---------------------------------------------------------------------------
195
+
196
+ def _preview_df(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
197
+ return df.head(n).copy()
198
+
199
+
200
+ def _format_sidebar_status_md(df: pd.DataFrame | None, date_col: str | None = None,
201
+ data_format: str | None = None, y_count: int | None = None,
202
+ freq_label: str | None = None, cleaned_rows: int | None = None) -> str:
203
+ if df is None:
204
+ return "*No data loaded yet.*"
205
+
206
+ row_count = cleaned_rows if cleaned_rows is not None else len(df)
207
+ col_count = len(df.columns)
208
+ parts = [
209
+ "### Dataset Status",
210
+ f"- Rows: **{row_count:,}**",
211
+ f"- Columns: **{col_count}**",
212
+ ]
213
+ if date_col:
214
+ parts.append(f"- Date column: **{date_col}**")
215
+ if data_format:
216
+ parts.append(f"- Structure: **{data_format}**")
217
+ if y_count is not None:
218
+ parts.append(f"- Value series selected: **{y_count}**")
219
+ if freq_label:
220
+ parts.append(f"- Frequency: **{freq_label}**")
221
+ return "\n".join(parts)
222
+
223
+
224
+ def _format_raw_profile_md(df: pd.DataFrame, date_col: str, data_format: str,
225
+ y_cols: list[str]) -> str:
226
+ numeric_cols = int(df.select_dtypes(include=[np.number]).shape[1])
227
+ object_cols = int(df.select_dtypes(include=["object"]).shape[1])
228
+ return "\n".join([
229
+ "### Dataset Profile",
230
+ "| Metric | Value |",
231
+ "|:--|:--|",
232
+ f"| Rows | {len(df):,} |",
233
+ f"| Columns | {len(df.columns)} |",
234
+ f"| Suggested date column | {date_col} |",
235
+ f"| Detected structure | {data_format} |",
236
+ f"| Numeric columns | {numeric_cols} |",
237
+ f"| Text columns | {object_cols} |",
238
+ f"| Value series selected | {len(y_cols)} |",
239
+ ])
240
+
241
+
242
+ def _get_mode_config(y_count: int) -> tuple[list[str], str, str]:
243
+ if y_count <= 1:
244
+ return (
245
+ [_MODE_SINGLE],
246
+ _MODE_SINGLE,
247
+ "Single series detected. Multi-series comparison modes are hidden.",
248
+ )
249
+
250
+ if y_count <= 8:
251
+ return (
252
+ [_MODE_SINGLE, _MODE_PANEL],
253
+ _MODE_PANEL,
254
+ "Best fit: compare a few series in panel view. Spaghetti is hidden to reduce clutter.",
255
+ )
256
+
257
+ return (
258
+ [_MODE_SINGLE, _MODE_PANEL, _MODE_SPAG],
259
+ _MODE_SPAG,
260
+ "Many series detected. Spaghetti is the recommended default.",
261
+ )
262
+
263
+
264
+ def _chart_availability(df_plot: pd.DataFrame, date_col: str, y_col: str,
265
+ freq_info: FrequencyInfo | None) -> dict[str, str]:
266
+ blocked: dict[str, str] = {}
267
+
268
+ if y_col not in df_plot.columns:
269
+ return {name: "Value column not found in active data." for name in _CHART_TYPES}
270
+
271
+ n_obs = int(df_plot[y_col].dropna().shape[0])
272
+ if n_obs <= 1:
273
+ return {name: "Need at least 2 non-missing observations." for name in _CHART_TYPES}
274
+
275
+ date_series = pd.to_datetime(df_plot[date_col], errors="coerce").dropna()
276
+ span_days = int((date_series.max() - date_series.min()).days) if len(date_series) >= 2 else 0
277
+ has_time_features = "month" in df_plot.columns
278
+ freq_label = freq_info.label if freq_info else "Unknown"
279
+ period_map = {"Monthly": 12, "Quarterly": 4, "Weekly": 52, "Daily": 365}
280
+ period = period_map.get(freq_label)
281
+
282
+ if not has_time_features:
283
+ blocked["Line – Colored Markers"] = "Calendar features unavailable."
284
+ blocked["Seasonal Plot"] = "Calendar features unavailable."
285
+ blocked["Seasonal Sub-series"] = "Calendar features unavailable."
286
+
287
+ if has_time_features and n_obs < 12:
288
+ blocked["Seasonal Plot"] = "Need at least 12 observations."
289
+ blocked["Seasonal Sub-series"] = "Need at least 12 observations."
290
+
291
+ if n_obs < 8:
292
+ blocked["ACF / PACF"] = "Need at least 8 observations."
293
+
294
+ if period is None:
295
+ blocked["Decomposition"] = "Requires Daily/Weekly/Monthly/Quarterly frequency."
296
+ elif n_obs < max(8, period * 2):
297
+ blocked["Decomposition"] = f"Need at least {max(8, period * 2)} observations."
298
+
299
+ if span_days < 365:
300
+ blocked["Year-over-Year Change"] = "Need at least one year of coverage."
301
+
302
+ if n_obs < 3:
303
+ blocked["Lag Plot"] = "Need at least 3 observations."
304
+
305
+ return blocked
306
+
307
+
308
+ def _available_chart_choices(df_plot: pd.DataFrame, date_col: str, y_col: str,
309
+ freq_info: FrequencyInfo | None) -> tuple[list[str], str]:
310
+ blocked = _chart_availability(df_plot, date_col, y_col, freq_info)
311
+ available = [name for name in _CHART_TYPES if name not in blocked]
312
+ if not available:
313
+ available = ["Line with Markers"]
314
+ notes = ["**Chart availability (auto-gated):**"]
315
+ if blocked:
316
+ for chart_name, reason in blocked.items():
317
+ notes.append(f"- {chart_name}: {reason}")
318
+ else:
319
+ notes.append("- All chart types are available.")
320
+ return available, "\n".join(notes)
321
+
322
+
323
+ def _mode_visibility(mode: str) -> tuple[bool, bool, bool]:
324
+ return (
325
+ mode == _MODE_SINGLE,
326
+ mode == _MODE_PANEL,
327
+ mode == _MODE_SPAG,
328
+ )
329
+
330
+
331
+ def _choose_default_date_col(df: pd.DataFrame) -> str | None:
332
+ cols = list(df.columns)
333
+ if not cols:
334
+ return None
335
+
336
+ for col in cols:
337
+ if pd.api.types.is_datetime64_any_dtype(df[col]):
338
+ return col
339
+
340
+ for col in cols:
341
+ name = str(col).lower()
342
+ if any(tok in name for tok in _DATE_HINT_TOKENS):
343
+ return col
344
+
345
+ suggestions = suggest_date_columns(df)
346
+ if suggestions:
347
+ return suggestions[0]
348
+ return cols[0]
349
+
350
+
351
+ def _derive_setup_options(df: pd.DataFrame, date_col: str | None, data_format: str,
352
+ group_col: str | None = None, value_col: str | None = None,
353
+ current_y: list[str] | None = None) -> dict:
354
+ all_cols = list(df.columns)
355
+ resolved_date = date_col if date_col in all_cols else (all_cols[0] if all_cols else None)
356
+
357
+ if not resolved_date:
358
+ return {
359
+ "resolved_date": None,
360
+ "string_cols": [],
361
+ "value_options": [],
362
+ "group_default": None,
363
+ "value_default": None,
364
+ "available_y": [],
365
+ "default_y": [],
366
+ }
367
+
368
+ other_cols = [c for c in all_cols if c != resolved_date]
369
+ string_cols = [
370
+ c for c in other_cols
371
+ if df[c].dtype == object or pd.api.types.is_string_dtype(df[c])
372
+ ]
373
+ numeric_suggest = suggest_numeric_columns(df)
374
+
375
+ group_default = (
376
+ group_col if group_col and group_col in string_cols
377
+ else (string_cols[0] if string_cols else None)
378
+ )
379
+ value_options = [c for c in numeric_suggest if c != resolved_date and c != group_default]
380
+ value_default = (
381
+ value_col if value_col and value_col in value_options
382
+ else (value_options[0] if value_options else None)
383
+ )
384
+
385
+ if data_format == "Long" and group_default and value_default:
386
+ try:
387
+ effective = pivot_long_to_wide(df, resolved_date, group_default, value_default)
388
+ available_y = [c for c in effective.columns if c != resolved_date]
389
+ except Exception:
390
+ available_y = value_options.copy()
391
+ else:
392
+ available_y = value_options.copy()
393
+
394
+ kept = [c for c in (current_y or []) if c in available_y]
395
+ default_y = kept if kept else available_y[:4]
396
+
397
+ return {
398
+ "resolved_date": resolved_date,
399
+ "string_cols": string_cols,
400
+ "value_options": value_options,
401
+ "group_default": group_default,
402
+ "value_default": value_default,
403
+ "available_y": available_y,
404
+ "default_y": default_y,
405
+ }
406
+
407
+
408
  # ---------------------------------------------------------------------------
409
  # Data helpers
410
  # ---------------------------------------------------------------------------
 
575
 
576
  ---
577
 
578
+ ### Guided Workflow
579
 
580
+ <div style="display:grid; grid-template-columns:repeat(auto-fit, minmax(150px, 1fr)); gap:0.75rem; margin:1rem 0;">
581
  <div class="step-card">
582
  <div class="step-number">1</div>
583
  <div class="step-title">Load Data</div>
584
+ <div class="step-desc">Upload your CSV or load a demo dataset from the sidebar.</div>
585
  </div>
586
  <div class="step-card">
587
  <div class="step-number">2</div>
588
+ <div class="step-title">Understand</div>
589
+ <div class="step-desc">Review auto-detected structure and inspect a raw-data preview.</div>
590
  </div>
591
  <div class="step-card">
592
  <div class="step-number">3</div>
593
+ <div class="step-title">Prepare</div>
594
+ <div class="step-desc">Set date/value columns and apply cleaning options in the main canvas.</div>
595
+ </div>
596
+ <div class="step-card">
597
+ <div class="step-number">4</div>
598
+ <div class="step-title">Visualize</div>
599
+ <div class="step-desc">Only relevant visualization modes are shown based on series count.</div>
600
+ </div>
601
+ <div class="step-card">
602
+ <div class="step-number">5</div>
603
+ <div class="step-title">Interpret</div>
604
+ <div class="step-desc">Generate AI interpretation when your chart is ready.</div>
605
  </div>
606
  </div>
607
 
 
642
  state["raw_df_original"] = df
643
 
644
  all_cols = list(df.columns)
645
+ default_date = _choose_default_date_col(df)
646
+ infer_date = default_date if default_date in all_cols else (all_cols[0] if all_cols else None)
647
+ if infer_date is None:
648
+ return (
649
+ state, gr.Column(visible=False), gr.Dropdown(), gr.Radio(), gr.Column(visible=False),
650
+ gr.Dropdown(), gr.Dropdown(), gr.CheckboxGroup(choices=[], value=[]), "",
651
+ _format_sidebar_status_md(None), "", pd.DataFrame(), gr.Dropdown(), "",
652
+ gr.Column(visible=True), gr.Column(visible=False),
653
+ gr.Radio(choices=[_MODE_SINGLE], value=_MODE_SINGLE), "",
654
+ gr.Dropdown(choices=_CHART_TYPES, value=_CHART_TYPES[0]), "",
655
+ gr.Column(visible=False), gr.Column(visible=False), gr.Column(visible=False),
656
+ pd.DataFrame(),
657
+ )
658
 
659
+ is_long, _, _ = detect_long_format(df, infer_date)
660
  fmt = "Long" if is_long else "Wide"
661
+ setup_opts = _derive_setup_options(
662
+ df,
663
+ date_col=infer_date,
664
+ data_format=fmt,
665
+ group_col=None,
666
+ value_col=None,
667
+ current_y=None,
 
 
 
 
 
 
 
 
 
 
 
668
  )
669
+ resolved_date = setup_opts["resolved_date"]
670
+ group_default = setup_opts["group_default"]
671
+ value_default = setup_opts["value_default"]
672
+ available_y = setup_opts["available_y"]
673
+ default_y = setup_opts["default_y"]
 
 
 
 
 
 
 
 
674
  delim_text = f"Detected delimiter: `{repr(delim)}`" if delim else ""
675
+ profile_md = _format_raw_profile_md(df, resolved_date, fmt, default_y)
676
+ status_md = _format_sidebar_status_md(
677
+ df=df,
678
+ date_col=resolved_date,
679
+ data_format=fmt,
680
+ y_count=len(default_y),
681
+ )
682
 
683
  return (
684
  state, # app_state
685
  gr.Column(visible=True), # setup_col
686
+ gr.Dropdown(choices=all_cols, value=resolved_date), # date_col_dd
687
  gr.Radio(value=fmt), # format_radio
688
  gr.Column(visible=is_long), # long_col
689
+ gr.Dropdown(choices=setup_opts["string_cols"], value=group_default), # group_col_dd
690
+ gr.Dropdown(choices=setup_opts["value_options"], value=value_default), # value_col_dd
691
  gr.CheckboxGroup(choices=available_y, value=default_y), # y_cols_cbg
692
  delim_text, # delim_md
693
+ status_md, # status_md
694
+ profile_md, # raw_profile_md
695
+ _preview_df(df), # raw_preview_df
696
+ gr.Dropdown(choices=all_cols, value=resolved_date), # cast_col_dd
697
+ "", # cast_status_md
698
+ gr.Column(visible=False), # welcome_col
699
  gr.Column(visible=False), # analysis_col
700
+ gr.Radio(choices=[_MODE_SINGLE], value=_MODE_SINGLE), # viz_mode_radio
701
+ "Apply setup to unlock visualization modes.", # mode_hint_md
702
+ gr.Dropdown(choices=_CHART_TYPES, value=_CHART_TYPES[0]), # single_chart_dd
703
+ "*Apply setup to tailor chart options to your data.*", # single_gate_md
704
+ gr.Column(visible=False), # single_mode_col
705
+ gr.Column(visible=False), # panel_mode_col
706
+ gr.Column(visible=False), # spag_mode_col
707
+ pd.DataFrame(), # cleaned_preview_df
708
  )
709
 
710
 
 
716
  gr.Column(visible=False), gr.Dropdown(), gr.Radio(),
717
  gr.Column(visible=False), gr.Dropdown(), gr.Dropdown(),
718
  gr.CheckboxGroup(choices=[], value=[]), "",
719
+ "*No data loaded yet.*",
720
+ "",
721
+ pd.DataFrame(),
722
+ gr.Dropdown(),
723
+ "",
724
  gr.Column(visible=True), gr.Column(visible=False),
725
+ gr.Radio(choices=[_MODE_SINGLE], value=_MODE_SINGLE),
726
+ "",
727
+ gr.Dropdown(choices=_CHART_TYPES, value=_CHART_TYPES[0]),
728
+ "",
729
+ gr.Column(visible=False), gr.Column(visible=False), gr.Column(visible=False),
730
+ pd.DataFrame(),
731
  )
732
  path = file_obj if isinstance(file_obj, str) else str(file_obj)
733
  df, delim = _read_file_to_df(path)
 
741
  gr.Column(), gr.Dropdown(), gr.Radio(),
742
  gr.Column(), gr.Dropdown(), gr.Dropdown(),
743
  gr.CheckboxGroup(), "",
744
+ gr.Markdown(), gr.Markdown(), gr.Dataframe(), gr.Dropdown(), gr.Markdown(),
745
  gr.Column(), gr.Column(),
746
+ gr.Radio(), gr.Markdown(), gr.Dropdown(), gr.Markdown(),
747
+ gr.Column(), gr.Column(), gr.Column(),
748
+ gr.Dataframe(),
749
  )
750
  demo_path = _DEMO_FILES[choice]
751
  df = pd.read_csv(demo_path)
 
756
  return gr.Column(visible=(fmt == "Long"))
757
 
758
 
759
+ def on_setup_inputs_change(date_col, data_format, group_col, value_col, current_y, state):
760
+ raw_df = state.get("raw_df_original")
761
+ if raw_df is None:
762
+ return (
763
+ gr.Column(visible=(data_format == "Long")),
764
+ gr.Dropdown(), gr.Dropdown(), gr.CheckboxGroup(),
765
+ "", _format_sidebar_status_md(None),
766
+ )
767
+
768
+ opts = _derive_setup_options(
769
+ raw_df,
770
+ date_col=date_col,
771
+ data_format=data_format,
772
+ group_col=group_col,
773
+ value_col=value_col,
774
+ current_y=list(current_y) if current_y else [],
775
+ )
776
+ resolved_date = opts["resolved_date"]
777
+ profile_md = _format_raw_profile_md(raw_df, resolved_date, data_format, opts["default_y"])
778
+ status_md = _format_sidebar_status_md(
779
+ raw_df,
780
+ date_col=resolved_date,
781
+ data_format=data_format,
782
+ y_count=len(opts["default_y"]),
783
+ )
784
+ return (
785
+ gr.Column(visible=(data_format == "Long")),
786
+ gr.Dropdown(choices=opts["string_cols"], value=opts["group_default"]),
787
+ gr.Dropdown(choices=opts["value_options"], value=opts["value_default"]),
788
+ gr.CheckboxGroup(choices=opts["available_y"], value=opts["default_y"]),
789
+ profile_md,
790
+ status_md,
791
+ )
792
+
793
+
794
  def on_long_cols_change(date_col, group_col, value_col, state):
795
  raw_df = state.get("raw_df_original")
796
  if raw_df is None or not group_col or not value_col:
 
803
  return gr.CheckboxGroup(choices=[], value=[])
804
 
805
 
806
+ def on_y_selection_change(date_col, data_format, y_cols, state):
807
+ raw_df = state.get("raw_df_original")
808
+ if raw_df is None:
809
+ return "", _format_sidebar_status_md(None)
810
+
811
+ all_cols = list(raw_df.columns)
812
+ resolved_date = date_col if date_col in all_cols else (all_cols[0] if all_cols else "")
813
+ y_list = list(y_cols) if y_cols else []
814
+ profile_md = _format_raw_profile_md(raw_df, resolved_date, data_format, y_list)
815
+ status_md = _format_sidebar_status_md(
816
+ raw_df,
817
+ date_col=resolved_date,
818
+ data_format=data_format,
819
+ y_count=len(y_list),
820
+ )
821
+ return profile_md, status_md
822
+
823
+
824
+ def on_cast_apply(state, cast_col, cast_type, date_col, data_format, group_col, value_col, y_cols):
825
+ raw_df = state.get("raw_df_original")
826
+ if raw_df is None or not cast_col or cast_col not in raw_df.columns:
827
  return (
828
  state,
829
+ gr.Dataframe(),
830
+ "",
831
+ _format_sidebar_status_md(None),
832
+ gr.Dropdown(),
833
+ gr.Column(visible=(data_format == "Long")),
834
+ gr.Dropdown(),
835
+ gr.Dropdown(),
836
+ gr.CheckboxGroup(),
837
+ gr.Dropdown(),
838
+ "*Select a valid column to cast.*",
839
  )
840
 
841
+ updated = raw_df.copy()
842
+ try:
843
+ if cast_type == "Numeric (coerce)":
844
+ updated[cast_col] = pd.to_numeric(updated[cast_col], errors="coerce")
845
+ elif cast_type == "Datetime (coerce)":
846
+ updated[cast_col] = pd.to_datetime(updated[cast_col], errors="coerce")
847
+ else:
848
+ updated[cast_col] = updated[cast_col].astype(str)
849
+ except Exception as exc:
850
  return (
851
  state,
852
+ gr.Dataframe(value=_preview_df(raw_df)),
853
+ "",
854
+ _format_sidebar_status_md(raw_df, date_col=date_col, data_format=data_format),
855
+ gr.Dropdown(choices=list(raw_df.columns), value=date_col),
856
+ gr.Column(visible=(data_format == "Long")),
857
+ gr.Dropdown(),
858
+ gr.Dropdown(),
859
+ gr.CheckboxGroup(),
860
+ gr.Dropdown(choices=list(raw_df.columns), value=cast_col),
861
+ f"*Type cast failed: {exc}*",
862
+ )
863
+
864
+ state["raw_df_original"] = updated
865
+ all_cols = list(updated.columns)
866
+ next_date = date_col if date_col in all_cols else _choose_default_date_col(updated)
867
+
868
+ opts = _derive_setup_options(
869
+ updated,
870
+ date_col=next_date,
871
+ data_format=data_format,
872
+ group_col=group_col,
873
+ value_col=value_col,
874
+ current_y=list(y_cols) if y_cols else [],
875
+ )
876
+
877
+ profile_md = _format_raw_profile_md(updated, opts["resolved_date"], data_format, opts["default_y"])
878
+ status_md = _format_sidebar_status_md(
879
+ updated,
880
+ date_col=opts["resolved_date"],
881
+ data_format=data_format,
882
+ y_count=len(opts["default_y"]),
883
+ )
884
+
885
+ return (
886
+ state,
887
+ gr.Dataframe(value=_preview_df(updated)),
888
+ profile_md,
889
+ status_md,
890
+ gr.Dropdown(choices=all_cols, value=opts["resolved_date"]),
891
+ gr.Column(visible=(data_format == "Long")),
892
+ gr.Dropdown(choices=opts["string_cols"], value=opts["group_default"]),
893
+ gr.Dropdown(choices=opts["value_options"], value=opts["value_default"]),
894
+ gr.CheckboxGroup(choices=opts["available_y"], value=opts["default_y"]),
895
+ gr.Dropdown(choices=all_cols, value=cast_col),
896
+ f"*Applied cast: `{cast_col}` -> {cast_type}*",
897
+ )
898
+
899
+
900
+ def on_apply_setup(state, date_col, data_format, group_col, value_col,
901
+ y_cols, dup_action, missing_action, freq_override):
902
+ def _error_return(message: str):
903
+ return (
904
+ state, # 0 app_state
905
+ gr.Column(visible=False), # 1 welcome_col
906
+ gr.Column(visible=True), # 2 analysis_col
907
+ message, # 3 quality_md
908
+ "", # 4 freq_info_md
909
+ gr.Dropdown(), # 5 single_y_dd
910
+ gr.Dropdown(), # 6 color_by_dd
911
+ None, # 7 single_plot
912
+ "", # 8 single_stats_md
913
+ "", # 9 single_interp_md
914
+ gr.CheckboxGroup(), # 10 panel_cols_cbg
915
+ None, # 11 panel_plot
916
+ "", # 12 panel_summary_md
917
+ "", # 13 panel_interp_md
918
+ gr.CheckboxGroup(), # 14 spag_cols_cbg
919
+ gr.Dropdown(), # 15 spag_highlight_dd
920
+ None, # 16 spag_plot
921
+ "", # 17 spag_summary_md
922
+ "", # 18 spag_interp_md
923
+ gr.Radio(choices=[_MODE_SINGLE], value=_MODE_SINGLE), # 19 viz_mode_radio
924
+ "Load data and apply setup.", # 20 mode_hint_md
925
+ gr.Dropdown(choices=_CHART_TYPES, value=_CHART_TYPES[0]), # 21 single_chart_dd
926
+ "", # 22 single_gate_md
927
+ gr.Column(visible=False), # 23 single_mode_col
928
+ gr.Column(visible=False), # 24 panel_mode_col
929
+ gr.Column(visible=False), # 25 spag_mode_col
930
+ pd.DataFrame(), # 26 cleaned_preview_df
931
+ _format_sidebar_status_md(None), # 27 status_md
932
  )
933
 
934
+ if not y_cols:
935
+ return _error_return("*Select at least one value column.*")
936
+
937
+ raw_df = state.get("raw_df_original")
938
+ if raw_df is None:
939
+ return _error_return("*No data loaded.*")
940
+
941
  # Pivot if long format
942
  if data_format == "Long" and group_col and value_col:
943
  effective_df = pivot_long_to_wide(raw_df, date_col, group_col, value_col)
 
993
  y_list = list(y_cols)
994
  panel_default = y_list[:4] if len(y_list) >= 2 else y_list
995
  highlight_choices = ["(none)"] + y_list
996
+ mode_choices, recommended_mode, mode_hint = _get_mode_config(len(y_list))
997
+ single_visible, panel_visible, spag_visible = _mode_visibility(recommended_mode)
998
+ state["mode_choices"] = mode_choices
999
+ state["recommended_mode"] = recommended_mode
1000
+
1001
+ chart_choices, chart_gate_md = _available_chart_choices(
1002
+ cleaned, date_col, y_list[0], freq
1003
+ )
1004
+
1005
+ status_md = _format_sidebar_status_md(
1006
+ df=cleaned,
1007
+ date_col=date_col,
1008
+ data_format=data_format,
1009
+ y_count=len(y_list),
1010
+ freq_label=freq.label,
1011
+ cleaned_rows=report.rows_after,
1012
+ )
1013
 
1014
  return (
1015
  state, # 0 app_state
 
1017
  gr.Column(visible=True), # 2 analysis_col
1018
  quality_md, # 3 quality_md
1019
  freq_text, # 4 freq_info_md
 
1020
  gr.Dropdown(choices=y_list, value=y_list[0]), # 5 single_y_dd
1021
  gr.Dropdown(choices=color_by_choices,
1022
  value=color_by_choices[0] if color_by_choices else None),# 6 color_by_dd
1023
  None, # 7 single_plot
1024
  "", # 8 single_stats_md
1025
  "", # 9 single_interp_md
 
1026
  gr.CheckboxGroup(choices=y_list, value=panel_default), # 10 panel_cols_cbg
1027
  None, # 11 panel_plot
1028
  "", # 12 panel_summary_md
1029
  "", # 13 panel_interp_md
 
1030
  gr.CheckboxGroup(choices=y_list, value=y_list), # 14 spag_cols_cbg
1031
+ gr.Dropdown(choices=highlight_choices, value="(none)"), # 15 spag_highlight_dd
1032
  None, # 16 spag_plot
1033
  "", # 17 spag_summary_md
1034
  "", # 18 spag_interp_md
1035
+ gr.Radio(choices=mode_choices, value=recommended_mode), # 19 viz_mode_radio
1036
+ mode_hint, # 20 mode_hint_md
1037
+ gr.Dropdown(choices=chart_choices, value=chart_choices[0]), # 21 single_chart_dd
1038
+ chart_gate_md, # 22 single_gate_md
1039
+ gr.Column(visible=single_visible), # 23 single_mode_col
1040
+ gr.Column(visible=panel_visible), # 24 panel_mode_col
1041
+ gr.Column(visible=spag_visible), # 25 spag_mode_col
1042
+ _preview_df(cleaned), # 26 cleaned_preview_df
1043
+ status_md, # 27 status_md
1044
  )
1045
 
1046
 
 
1053
  )
1054
 
1055
 
1056
+ def on_viz_mode_change(mode):
1057
+ single_visible, panel_visible, spag_visible = _mode_visibility(mode)
1058
+ return (
1059
+ gr.Column(visible=single_visible),
1060
+ gr.Column(visible=panel_visible),
1061
+ gr.Column(visible=spag_visible),
1062
+ )
1063
+
1064
+
1065
+ def on_single_y_change(state, y_col, current_chart):
1066
+ cleaned_df = state.get("cleaned_df")
1067
+ date_col = state.get("date_col")
1068
+ freq_info = state.get("freq_info")
1069
+ if cleaned_df is None or not y_col or not date_col:
1070
+ return gr.Dropdown(choices=_CHART_TYPES, value=_CHART_TYPES[0]), ""
1071
+
1072
+ chart_choices, chart_gate_md = _available_chart_choices(cleaned_df, date_col, y_col, freq_info)
1073
+ next_chart = current_chart if current_chart in chart_choices else chart_choices[0]
1074
+ return gr.Dropdown(choices=chart_choices, value=next_chart), chart_gate_md
1075
+
1076
+
1077
  def on_chart_type_change(chart_type):
1078
  return (
1079
  gr.Column(visible=("Colored Markers" in chart_type)),
 
1107
  if df_plot.empty:
1108
  return state, None, "*No data in selected range.*"
1109
 
1110
+ blocked = _chart_availability(df_plot, date_col, y_col, freq_info)
1111
+ if chart_type in blocked:
1112
+ return state, None, f"*{chart_type} unavailable: {blocked[chart_type]}*"
1113
+
1114
  fig, err = _generate_single_chart(
1115
  df_plot, date_col, y_col, chart_type, palette_colors,
1116
  color_by, period, window, lag, decomp_model, freq_info,
 
1237
  return render_interpretation_markdown(interp)
1238
 
1239
 
1240
+ def on_auto_generate(state, viz_mode,
1241
+ single_y, dr_mode, dr_n, dr_start, dr_end,
1242
+ single_chart, single_pal, color_by, period, window, lag, decomp_model,
1243
+ panel_cols, panel_chart, panel_shared, panel_pal,
1244
+ spag_cols, spag_alpha, spag_topn, spag_highlight, spag_median, spag_pal):
1245
+ if viz_mode == _MODE_PANEL:
1246
+ next_state, fig, summary_md = on_panel_update(
1247
+ state, panel_cols, panel_chart, panel_shared, panel_pal
1248
+ )
1249
+ return next_state, None, "", fig, summary_md, None, ""
1250
+
1251
+ if viz_mode == _MODE_SPAG:
1252
+ next_state, fig, summary_md = on_spag_update(
1253
+ state, spag_cols, spag_alpha, spag_topn, spag_highlight, spag_median, spag_pal
1254
+ )
1255
+ return next_state, None, "", None, "", fig, summary_md
1256
+
1257
+ next_state, fig, stats_md = on_single_update(
1258
+ state, single_y, dr_mode, dr_n, dr_start, dr_end,
1259
+ single_chart, single_pal, color_by, period, window, lag, decomp_model
1260
+ )
1261
+ return next_state, fig, stats_md, None, "", None, ""
1262
+
1263
+
1264
  # ---------------------------------------------------------------------------
1265
  # Build the Gradio app
1266
  # ---------------------------------------------------------------------------
 
1281
  '<span class="subtitle-text">ISA 444 &middot; Miami University</span>'
1282
  '</div>'
1283
  )
 
 
 
 
 
1284
  gr.Markdown("### Data Input")
1285
 
1286
  file_upload = gr.File(
 
1295
  )
1296
  reset_btn = gr.Button("Reset all", variant="secondary", size="sm")
1297
  delim_md = gr.Markdown("")
1298
+ status_md = gr.Markdown("*No data loaded yet.*")
1299
 
1300
+ with gr.Accordion("About", open=False):
1301
+ gr.Markdown("**Vibe-Coded By**")
1302
+ gr.HTML(_DEVELOPER_CARD)
1303
+ gr.Markdown("v0.2.0 &middot; Last updated Feb 2026", elem_classes=["caption"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1304
  gr.Markdown("---")
1305
  gr.Markdown("### QueryChat")
1306
  if check_querychat_available():
 
1320
  with gr.Column(visible=True) as welcome_col:
1321
  gr.Markdown(_WELCOME_MD)
1322
 
1323
+ # ===================================================================
1324
+ # Setup panel (hidden until data loaded)
1325
+ # ===================================================================
1326
+ with gr.Column(visible=False) as setup_col:
1327
+ gr.Markdown("## Step 1. Understand Data")
1328
+ gr.Markdown("*Check inferred structure and preview your raw file before cleaning.*")
1329
+ raw_profile_md = gr.Markdown("")
1330
+ raw_preview_df = gr.Dataframe(
1331
+ label="Raw data preview (first 10 rows)",
1332
+ interactive=False,
1333
+ wrap=True,
1334
+ )
1335
+
1336
+ gr.Markdown("## Step 2. Prepare Data")
1337
+ gr.Markdown("*If the date guess is wrong, change the date column - value choices update automatically.*")
1338
+ with gr.Row():
1339
+ with gr.Column(scale=1, min_width=300):
1340
+ gr.Markdown("### Structure")
1341
+ date_col_dd = gr.Dropdown(label="Date column", choices=[])
1342
+ format_radio = gr.Radio(
1343
+ label="Data format", choices=["Wide", "Long"], value="Wide",
1344
+ )
1345
+ with gr.Column(visible=False) as long_col:
1346
+ group_col_dd = gr.Dropdown(label="Group column", choices=[])
1347
+ value_col_dd = gr.Dropdown(label="Value column", choices=[])
1348
+ y_cols_cbg = gr.CheckboxGroup(label="Value column(s)", choices=[])
1349
+
1350
+ with gr.Column(scale=1, min_width=300):
1351
+ gr.Markdown("### Cleaning")
1352
+ dup_dd = gr.Dropdown(
1353
+ label="Duplicate dates",
1354
+ choices=["keep_last", "keep_first", "drop_all"],
1355
+ value="keep_last",
1356
+ )
1357
+ missing_dd = gr.Dropdown(
1358
+ label="Missing values",
1359
+ choices=["interpolate", "ffill", "drop"],
1360
+ value="interpolate",
1361
+ )
1362
+ freq_tb = gr.Textbox(
1363
+ label="Override frequency label (optional)",
1364
+ placeholder="e.g. Daily, Weekly, Monthly",
1365
+ )
1366
+ apply_btn = gr.Button("Apply setup", variant="primary")
1367
+ freq_info_md = gr.Markdown("")
1368
+
1369
+ with gr.Accordion("Type Casting (optional)", open=False):
1370
+ gr.Markdown("*Use this when a column is read with the wrong dtype.*")
1371
+ cast_col_dd = gr.Dropdown(label="Column", choices=[])
1372
+ cast_type_dd = gr.Dropdown(
1373
+ label="Cast to",
1374
+ choices=["Numeric (coerce)", "Datetime (coerce)", "String"],
1375
+ value="Numeric (coerce)",
1376
+ )
1377
+ cast_apply_btn = gr.Button("Apply cast", variant="secondary", size="sm")
1378
+ cast_status_md = gr.Markdown("")
1379
+
1380
  # ===================================================================
1381
  # Analysis panel (hidden until setup applied)
1382
  # ===================================================================
1383
  with gr.Column(visible=False) as analysis_col:
1384
+ gr.Markdown("## Step 3. Visualize")
1385
+ mode_hint_md = gr.Markdown("")
1386
+ viz_mode_radio = gr.Radio(
1387
+ label="Visualization mode",
1388
+ choices=[_MODE_SINGLE],
1389
+ value=_MODE_SINGLE,
1390
+ )
1391
+
1392
+ with gr.Accordion("Cleaned Data Preview", open=False):
1393
+ cleaned_preview_df = gr.Dataframe(
1394
+ label="Cleaned data preview (first 10 rows)",
1395
+ interactive=False,
1396
+ wrap=True,
1397
+ )
1398
+
1399
  with gr.Accordion("Data Quality Report", open=False):
1400
  quality_md = gr.Markdown("")
1401
 
1402
+ with gr.Column(visible=False) as single_mode_col:
1403
+ with gr.Row():
1404
+ with gr.Column(scale=1, min_width=280):
1405
+ single_y_dd = gr.Dropdown(label="Value column", choices=[])
1406
+ dr_mode_radio = gr.Radio(
1407
+ label="Date range",
1408
+ choices=["All", "Last N years", "Custom"],
1409
+ value="All",
1410
+ )
1411
+ with gr.Column(visible=False) as dr_n_col:
1412
+ dr_n_slider = gr.Slider(
1413
+ label="Years", minimum=1, maximum=20,
1414
+ value=5, step=1,
 
 
 
 
 
 
 
 
 
 
 
 
1415
  )
1416
+ with gr.Column(visible=False) as dr_custom_col:
1417
+ dr_start_tb = gr.Textbox(label="Start date", placeholder="YYYY-MM-DD")
1418
+ dr_end_tb = gr.Textbox(label="End date", placeholder="YYYY-MM-DD")
1419
+
1420
+ single_chart_dd = gr.Dropdown(
1421
+ label="Chart type", choices=_CHART_TYPES,
1422
+ value=_CHART_TYPES[0],
1423
+ )
1424
+ single_gate_md = gr.Markdown("")
1425
+ single_pal_dd = gr.Dropdown(
1426
+ label="Color palette", choices=_PALETTE_NAMES,
1427
+ value=_PALETTE_NAMES[0],
1428
+ )
1429
+ single_swatch = gr.Plot(label="Palette preview", show_label=False)
1430
+
1431
+ with gr.Column(visible=False) as color_by_col:
1432
+ color_by_dd = gr.Dropdown(
1433
+ label="Color by",
1434
+ choices=["month", "quarter", "year", "day_of_week"],
1435
  )
1436
+ with gr.Column(visible=False) as period_col:
1437
+ period_dd = gr.Dropdown(
1438
+ label="Period", choices=["month", "quarter"],
1439
+ value="month",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1440
  )
1441
+ with gr.Column(visible=False) as window_col:
1442
+ window_slider = gr.Slider(
1443
+ label="Window", minimum=2, maximum=52,
1444
+ value=12, step=1,
1445
  )
1446
+ with gr.Column(visible=False) as lag_col:
1447
+ lag_slider = gr.Slider(
1448
+ label="Lag", minimum=1, maximum=52,
1449
+ value=1, step=1,
1450
  )
1451
+ with gr.Column(visible=False) as decomp_col:
1452
+ decomp_dd = gr.Dropdown(
1453
+ label="Model",
1454
+ choices=["additive", "multiplicative"],
1455
+ value="additive",
1456
  )
1457
+ single_update_btn = gr.Button("Update chart", variant="primary")
1458
+
1459
+ with gr.Column(scale=3):
1460
+ single_plot = gr.Plot(label="Chart")
1461
+ with gr.Accordion("Summary Statistics", open=False):
1462
+ single_stats_md = gr.Markdown("")
1463
+ with gr.Accordion("AI Chart Interpretation", open=False):
1464
+ gr.Markdown(
1465
+ "*The chart image (PNG) is sent to OpenAI for "
1466
+ "interpretation. Do not include sensitive data.*"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1467
  )
1468
+ single_interp_btn = gr.Button(
1469
+ "Interpret Chart with AI", variant="secondary",
 
1470
  )
1471
+ single_interp_md = gr.Markdown("")
1472
+
1473
+ with gr.Column(visible=False) as panel_mode_col:
1474
+ with gr.Row():
1475
+ with gr.Column(scale=1, min_width=280):
1476
+ panel_cols_cbg = gr.CheckboxGroup(
1477
+ label="Columns to plot", choices=[],
1478
+ )
1479
+ panel_chart_dd = gr.Dropdown(
1480
+ label="Chart type", choices=["line", "bar"],
1481
+ value="line",
1482
+ )
1483
+ panel_shared_cb = gr.Checkbox(
1484
+ label="Shared Y axis", value=True,
1485
+ )
1486
+ panel_pal_dd = gr.Dropdown(
1487
+ label="Color palette", choices=_PALETTE_NAMES,
1488
+ value=_PALETTE_NAMES[0],
1489
+ )
1490
+ panel_update_btn = gr.Button("Update chart", variant="primary")
1491
+
1492
+ with gr.Column(scale=3):
1493
+ panel_plot = gr.Plot(label="Panel Chart")
1494
+ with gr.Accordion("Per-series Summary", open=False):
1495
+ panel_summary_md = gr.Markdown("")
1496
+ with gr.Accordion("AI Chart Interpretation", open=False):
1497
+ gr.Markdown(
1498
+ "*The chart image (PNG) is sent to OpenAI for "
1499
+ "interpretation. Do not include sensitive data.*"
1500
  )
1501
+ panel_interp_btn = gr.Button(
1502
+ "Interpret Chart with AI", variant="secondary",
 
1503
  )
1504
+ panel_interp_md = gr.Markdown("")
1505
+
1506
+ with gr.Column(visible=False) as spag_mode_col:
1507
+ with gr.Row():
1508
+ with gr.Column(scale=1, min_width=280):
1509
+ spag_cols_cbg = gr.CheckboxGroup(
1510
+ label="Columns to include", choices=[],
1511
+ )
1512
+ spag_alpha_slider = gr.Slider(
1513
+ label="Alpha (opacity)",
1514
+ minimum=0.05, maximum=1.0, value=0.15, step=0.05,
1515
+ )
1516
+ spag_topn_num = gr.Number(
1517
+ label="Highlight top N (0 = none)", value=0,
1518
+ minimum=0, precision=0,
1519
+ )
1520
+ spag_highlight_dd = gr.Dropdown(
1521
+ label="Highlight series",
1522
+ choices=["(none)"], value="(none)",
1523
+ )
1524
+ spag_median_cb = gr.Checkbox(
1525
+ label="Show Median + IQR band", value=False,
1526
+ )
1527
+ spag_pal_dd = gr.Dropdown(
1528
+ label="Color palette", choices=_PALETTE_NAMES,
1529
+ value=_PALETTE_NAMES[0],
1530
+ )
1531
+ spag_update_btn = gr.Button("Update chart", variant="primary")
1532
+
1533
+ with gr.Column(scale=3):
1534
+ spag_plot = gr.Plot(label="Spaghetti Chart")
1535
+ with gr.Accordion("Per-series Summary", open=False):
1536
+ spag_summary_md = gr.Markdown("")
1537
+ with gr.Accordion("AI Chart Interpretation", open=False):
1538
+ gr.Markdown(
1539
+ "*The chart image (PNG) is sent to OpenAI for "
1540
+ "interpretation. Do not include sensitive data.*"
1541
  )
1542
+ spag_interp_btn = gr.Button(
1543
+ "Interpret Chart with AI", variant="secondary",
 
1544
  )
1545
+ spag_interp_md = gr.Markdown("")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1546
 
1547
  # ===================================================================
1548
  # Event wiring
 
1551
  _DATA_LOAD_OUTPUTS = [
1552
  app_state, setup_col, date_col_dd, format_radio, long_col,
1553
  group_col_dd, value_col_dd, y_cols_cbg, delim_md,
1554
+ status_md, raw_profile_md, raw_preview_df,
1555
+ cast_col_dd, cast_status_md,
1556
  welcome_col, analysis_col,
1557
+ viz_mode_radio, mode_hint_md, single_chart_dd, single_gate_md,
1558
+ single_mode_col, panel_mode_col, spag_mode_col, cleaned_preview_df,
1559
  ]
1560
 
1561
  file_upload.change(
 
1573
  # Reset via page reload
1574
  reset_btn.click(fn=None, js="() => { window.location.reload(); }")
1575
 
1576
+ date_col_dd.change(
1577
+ on_setup_inputs_change,
1578
+ inputs=[date_col_dd, format_radio, group_col_dd, value_col_dd, y_cols_cbg, app_state],
1579
+ outputs=[long_col, group_col_dd, value_col_dd, y_cols_cbg, raw_profile_md, status_md],
1580
+ )
1581
+
1582
  format_radio.change(
1583
+ on_setup_inputs_change,
1584
+ inputs=[date_col_dd, format_radio, group_col_dd, value_col_dd, y_cols_cbg, app_state],
1585
+ outputs=[long_col, group_col_dd, value_col_dd, y_cols_cbg, raw_profile_md, status_md],
1586
  )
1587
 
1588
  # Long-format column changes update y_cols
 
1593
  outputs=[y_cols_cbg],
1594
  )
1595
 
1596
+ y_cols_cbg.change(
1597
+ on_y_selection_change,
1598
+ inputs=[date_col_dd, format_radio, y_cols_cbg, app_state],
1599
+ outputs=[raw_profile_md, status_md],
1600
+ )
1601
+
1602
+ cast_apply_btn.click(
1603
+ on_cast_apply,
1604
+ inputs=[app_state, cast_col_dd, cast_type_dd, date_col_dd, format_radio, group_col_dd, value_col_dd, y_cols_cbg],
1605
+ outputs=[
1606
+ app_state, raw_preview_df, raw_profile_md, status_md, date_col_dd,
1607
+ long_col, group_col_dd, value_col_dd, y_cols_cbg, cast_col_dd, cast_status_md,
1608
+ ],
1609
+ )
1610
+
1611
  # Apply setup
1612
  _APPLY_OUTPUTS = [
1613
  app_state, # 0
 
1632
  spag_plot, # 16
1633
  spag_summary_md, # 17
1634
  spag_interp_md, # 18
1635
+ viz_mode_radio, # 19
1636
+ mode_hint_md, # 20
1637
+ single_chart_dd, # 21
1638
+ single_gate_md, # 22
1639
+ single_mode_col, # 23
1640
+ panel_mode_col, # 24
1641
+ spag_mode_col, # 25
1642
+ cleaned_preview_df, # 26
1643
+ status_md, # 27
1644
  ]
1645
 
1646
  apply_btn.click(
 
1650
  value_col_dd, y_cols_cbg, dup_dd, missing_dd, freq_tb,
1651
  ],
1652
  outputs=_APPLY_OUTPUTS,
1653
+ ).then(
1654
+ on_auto_generate,
1655
+ inputs=[
1656
+ app_state, viz_mode_radio,
1657
+ single_y_dd, dr_mode_radio, dr_n_slider, dr_start_tb, dr_end_tb,
1658
+ single_chart_dd, single_pal_dd, color_by_dd, period_dd, window_slider, lag_slider, decomp_dd,
1659
+ panel_cols_cbg, panel_chart_dd, panel_shared_cb, panel_pal_dd,
1660
+ spag_cols_cbg, spag_alpha_slider, spag_topn_num, spag_highlight_dd, spag_median_cb, spag_pal_dd,
1661
+ ],
1662
+ outputs=[app_state, single_plot, single_stats_md, panel_plot, panel_summary_md, spag_plot, spag_summary_md],
1663
  )
1664
 
1665
  # Date range mode visibility
 
1669
  outputs=[dr_n_col, dr_custom_col],
1670
  )
1671
 
1672
+ viz_mode_radio.change(
1673
+ on_viz_mode_change,
1674
+ inputs=[viz_mode_radio],
1675
+ outputs=[single_mode_col, panel_mode_col, spag_mode_col],
1676
+ )
1677
+
1678
+ single_y_dd.change(
1679
+ on_single_y_change,
1680
+ inputs=[app_state, single_y_dd, single_chart_dd],
1681
+ outputs=[single_chart_dd, single_gate_md],
1682
+ )
1683
+
1684
  # Chart type conditional controls
1685
  single_chart_dd.change(
1686
  on_chart_type_change,
src/cleaning.py CHANGED
@@ -100,27 +100,30 @@ def suggest_date_columns(df: pd.DataFrame) -> list[str]:
100
  candidates: list[str] = []
101
 
102
  for col in df.columns:
 
 
103
  # 1. Already datetime
104
  if pd.api.types.is_datetime64_any_dtype(df[col]):
105
  if col not in candidates:
106
  candidates.append(col)
107
  continue
108
 
109
- # 2. Parseable as datetime (check up to first 5 non-null values)
110
- sample = df[col].dropna().head(5)
111
- if not sample.empty:
112
  try:
113
  with warnings.catch_warnings():
114
  warnings.simplefilter("ignore", UserWarning)
115
- pd.to_datetime(sample)
116
- if col not in candidates:
117
- candidates.append(col)
118
- continue
 
119
  except (ValueError, TypeError, OverflowError):
120
  pass
121
 
122
  # 3. Column name heuristic
123
- if _DATE_NAME_TOKENS.search(str(col)):
124
  if col not in candidates:
125
  candidates.append(col)
126
 
 
100
  candidates: list[str] = []
101
 
102
  for col in df.columns:
103
+ name_has_token = bool(_DATE_NAME_TOKENS.search(str(col)))
104
+
105
  # 1. Already datetime
106
  if pd.api.types.is_datetime64_any_dtype(df[col]):
107
  if col not in candidates:
108
  candidates.append(col)
109
  continue
110
 
111
+ # 2. Parseable as datetime (check a sample of non-null values)
112
+ sample = df[col].dropna().head(20)
113
+ if not sample.empty and (name_has_token or not pd.api.types.is_numeric_dtype(df[col])):
114
  try:
115
  with warnings.catch_warnings():
116
  warnings.simplefilter("ignore", UserWarning)
117
+ parsed = pd.to_datetime(sample, errors="coerce")
118
+ if parsed.notna().mean() >= 0.8:
119
+ if col not in candidates:
120
+ candidates.append(col)
121
+ continue
122
  except (ValueError, TypeError, OverflowError):
123
  pass
124
 
125
  # 3. Column name heuristic
126
+ if name_has_token:
127
  if col not in candidates:
128
  candidates.append(col)
129