fmegahed commited on
Commit
22ced1b
·
1 Parent(s): 9e5d7c3

Moving app to gradio to avoid flickering issues

Browse files
Files changed (9) hide show
  1. .gitignore +2 -1
  2. .streamlit/config.toml +0 -7
  3. Dockerfile +4 -6
  4. README.md +1 -1
  5. app.py +1023 -1037
  6. requirements.txt +2 -2
  7. src/ai_interpretation.py +57 -52
  8. src/querychat_helpers.py +19 -7
  9. src/ui_theme.py +196 -135
.gitignore CHANGED
@@ -5,4 +5,5 @@ requirements.md
5
  DEPLOY.md
6
  *.Rproj
7
  .Rproj.user/
8
- nul
 
 
5
  DEPLOY.md
6
  *.Rproj
7
  .Rproj.user/
8
+ nul
9
+ CLAUDE.md
.streamlit/config.toml DELETED
@@ -1,7 +0,0 @@
1
- [server]
2
- headless = true
3
- fileWatcherType = "none"
4
- runOnSave = false
5
-
6
- [browser]
7
- gatherUsageStats = false
 
 
 
 
 
 
 
 
Dockerfile CHANGED
@@ -18,9 +18,7 @@ COPY --chown=user:user . .
18
 
19
  EXPOSE 7860
20
 
21
- CMD ["streamlit", "run", "app.py", \
22
- "--server.port=7860", \
23
- "--server.address=0.0.0.0", \
24
- "--server.enableXsrfProtection=false", \
25
- "--server.enableCORS=false", \
26
- "--browser.gatherUsageStats=false"]
 
18
 
19
  EXPOSE 7860
20
 
21
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
22
+ ENV GRADIO_SERVER_PORT="7860"
23
+
24
+ CMD ["python", "app.py"]
 
 
README.md CHANGED
@@ -10,7 +10,7 @@ pinned: false
10
 
11
  # Time Series Visualizer + AI Chart Interpreter
12
 
13
- A Streamlit app for Miami University Business Analytics students to upload CSV
14
  time-series data, create publication-quality charts, and get AI-powered chart
15
  interpretation.
16
 
 
10
 
11
  # Time Series Visualizer + AI Chart Interpreter
12
 
13
+ A Gradio app for Miami University Business Analytics students to upload CSV
14
  time-series data, create publication-quality charts, and get AI-powered chart
15
  interpretation.
16
 
app.py CHANGED
@@ -1,14 +1,15 @@
1
  """
2
  Time Series Visualizer + AI Chart Interpreter
3
  =============================================
4
- Main Streamlit application. Run with:
5
 
6
- streamlit run app.py --server.port=7860
7
  """
8
 
9
  from __future__ import annotations
10
 
11
  import hashlib
 
12
  from pathlib import Path
13
 
14
  from dotenv import load_dotenv
@@ -17,17 +18,19 @@ load_dotenv()
17
  import matplotlib
18
  matplotlib.use("Agg")
19
 
 
20
  import pandas as pd
21
- import streamlit as st
22
 
23
  from src.ui_theme import (
24
- apply_miami_theme,
 
25
  get_miami_mpl_style,
26
  get_palette_colors,
27
  render_palette_preview,
28
  )
29
  from src.cleaning import (
30
- read_csv_upload,
31
  suggest_date_columns,
32
  suggest_numeric_columns,
33
  clean_dataframe,
@@ -42,7 +45,6 @@ from src.diagnostics import (
42
  compute_summary_stats,
43
  compute_acf_pacf,
44
  compute_decomposition,
45
- compute_rolling_stats,
46
  compute_yoy_change,
47
  compute_multi_series_summary,
48
  )
@@ -63,7 +65,7 @@ from src.plotting import (
63
  from src.ai_interpretation import (
64
  check_api_key_available,
65
  interpret_chart,
66
- render_interpretation,
67
  )
68
  from src.querychat_helpers import (
69
  check_querychat_available,
@@ -80,10 +82,11 @@ _DEMO_FILES = {
80
  "Manufacturing Employment by State (wide, monthly)": _DATA_DIR / "demo_manufacturing_wide.csv",
81
  "Manufacturing Employment by State (long, monthly)": _DATA_DIR / "demo_manufacturing_long.csv",
82
  }
 
83
 
84
  _CHART_TYPES = [
85
  "Line with Markers",
86
- "Line Colored Markers",
87
  "Seasonal Plot",
88
  "Seasonal Sub-series",
89
  "ACF / PACF",
@@ -94,1117 +97,1100 @@ _CHART_TYPES = [
94
  ]
95
 
96
  _PALETTE_NAMES = ["Set2", "Dark2", "Set1", "Paired", "Pastel1", "Pastel2", "Accent"]
97
- _VIEW_SPECS = [
98
- ("Single Series", "single"),
99
- ("Few Series (Panel)", "panel"),
100
- ("Many Series (Spaghetti)", "spaghetti"),
101
- ]
102
- _VIEW_LABELS = [label for label, _ in _VIEW_SPECS]
103
- _VIEW_SLUG_BY_LABEL = dict(_VIEW_SPECS)
104
- _VIEW_LABEL_BY_SLUG = {slug: label for label, slug in _VIEW_SPECS}
105
- _ANALYSIS_STATE_KEYS = [
106
- "tab_a_y", "dr_mode", "dr_n", "dr_custom",
107
- "chart_type_a", "pal_a", "color_by_a", "period_a", "window_a", "lag_a", "decomp_a",
108
- "_single_df_plot", "_single_fig", "_single_active_y", "_single_chart_type",
109
- "_single_input_key", "_single_stats",
110
- "panel_cols", "panel_chart", "panel_shared", "pal_b", "_panel_fig",
111
- "_panel_input_key", "_panel_summary_df",
112
- "spag_cols", "spag_alpha", "spag_topn", "spag_highlight", "spag_median", "pal_c", "_spag_fig",
113
- "_spag_input_key", "_spag_summary_df",
114
- ]
 
 
115
 
116
 
117
  # ---------------------------------------------------------------------------
118
- # Helpers
119
  # ---------------------------------------------------------------------------
120
 
121
- def _df_hash(df: pd.DataFrame) -> str:
122
- """Fast hash of a DataFrame for cache-key / change-detection."""
123
- return hashlib.md5(
124
- pd.util.hash_pandas_object(df).values.tobytes()
125
- ).hexdigest()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
 
128
- def _load_demo(path: Path) -> pd.DataFrame:
129
- return pd.read_csv(path)
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- def _scalar_query_param(value):
133
- """Return the first item for multi-valued query params."""
134
- if isinstance(value, list):
135
- return value[0] if value else None
136
- return value
137
 
 
 
 
138
 
139
- def _initial_view_label() -> str:
140
- """Resolve initial view from query params when available."""
141
- requested = _scalar_query_param(st.query_params.get("view"))
142
- return _VIEW_LABEL_BY_SLUG.get(requested, _VIEW_LABELS[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- def _reset_all_state() -> None:
146
- """Clear all session/query state and rerun."""
147
- for key in list(st.session_state.keys()):
148
- del st.session_state[key]
149
- st.query_params.clear()
150
- st.rerun()
151
 
 
 
 
152
 
153
- def _sync_view_query_param() -> None:
154
- """Write current active view to URL query params."""
155
- active = st.session_state.get("active_view")
156
- if active in _VIEW_SLUG_BY_LABEL:
157
- st.query_params["view"] = _VIEW_SLUG_BY_LABEL[active]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- def _clear_analysis_state(reset_querychat: bool = False) -> None:
161
- """Clear per-view chart controls/outputs."""
162
- for key in _ANALYSIS_STATE_KEYS:
163
- st.session_state.pop(key, None)
164
- if reset_querychat:
165
- st.session_state["qc"] = None
166
- st.session_state["qc_hash"] = None
167
- st.session_state["enable_querychat"] = False
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
- def _on_view_change() -> None:
171
- """Reset chart/data-filter state when users switch analysis views."""
172
- active = st.session_state.get("active_view")
173
- prev = st.session_state.get("_prev_active_view")
174
- if prev and prev != active:
175
- _clear_analysis_state(reset_querychat=True)
176
- st.session_state["_prev_active_view"] = active
177
- _sync_view_query_param()
 
 
 
178
 
 
 
 
 
 
179
 
180
- @st.cache_data(show_spinner=False)
181
- def _clean_pipeline(_raw_hash, raw_df, date_col, y_cols, dup_action, missing_action):
182
- cleaned, report = clean_dataframe(raw_df, date_col, list(y_cols),
183
- dup_action=dup_action,
184
- missing_action=missing_action)
185
  freq = detect_frequency(cleaned, date_col)
186
  cleaned = add_time_features(cleaned, date_col)
187
- return cleaned, report, freq
188
-
189
 
190
- @st.fragment
191
- def _querychat_fragment(cleaned_df, date_col, y_cols, freq_label):
192
- current_hash = _df_hash(cleaned_df) + str(y_cols)
193
- if st.session_state.qc_hash != current_hash:
194
- st.session_state.qc = create_querychat(
195
- cleaned_df, name="uploaded_data",
196
- date_col=date_col, y_cols=y_cols,
197
- freq_label=freq_label,
198
- )
199
- st.session_state.qc_hash = current_hash
200
- st.session_state.qc.ui()
201
-
202
-
203
- @st.fragment
204
- def _data_quality_fragment(report: CleaningReport | None) -> None:
205
- if report is None:
206
- return
207
- with st.expander("Data Quality Report", expanded=False):
208
- _render_cleaning_report(report)
209
-
210
-
211
- @st.fragment
212
- def _single_chart_fragment(working_df, date_col, y_cols, freq_info, style_dict):
213
- if len(y_cols) == 1:
214
- st.session_state["tab_a_y"] = y_cols[0]
215
- elif st.session_state.get("tab_a_y") not in y_cols:
216
- st.session_state["tab_a_y"] = y_cols[0]
217
-
218
- with st.form("single_chart_form", border=False):
219
- if len(y_cols) == 1:
220
- active_y = y_cols[0]
221
- st.caption(f"Value column: `{active_y}`")
222
- else:
223
- active_y = st.selectbox("Select value column", y_cols, key="tab_a_y")
224
-
225
- dr_mode = st.radio(
226
- "Date range",
227
- ["All", "Last N years", "Custom"],
228
- horizontal=True,
229
- key="dr_mode",
230
  )
231
 
232
- df_plot = working_df.copy()
233
- n_years = st.session_state.get("dr_n", 5)
234
- sel = st.session_state.get("dr_custom")
235
-
236
- if dr_mode == "Last N years":
237
- n_years = st.slider("Years", 1, 20, 5, key="dr_n")
238
- cutoff = df_plot[date_col].max() - pd.DateOffset(years=n_years)
239
- df_plot = df_plot[df_plot[date_col] >= cutoff]
240
- elif dr_mode == "Custom":
241
- d_min = df_plot[date_col].min().date()
242
- d_max = df_plot[date_col].max().date()
243
- sel = st.slider("Date range", d_min, d_max, (d_min, d_max), key="dr_custom")
244
- df_plot = df_plot[
245
- (df_plot[date_col].dt.date >= sel[0])
246
- & (df_plot[date_col].dt.date <= sel[1])
247
- ]
248
-
249
- chart_type = st.selectbox("Chart type", _CHART_TYPES, key="chart_type_a")
250
- palette_name = st.selectbox("Color palette", _PALETTE_NAMES, key="pal_a")
251
- palette_colors = get_palette_colors(palette_name, max(12, len(y_cols)))
252
- swatch_fig = render_palette_preview(palette_colors[:8])
253
- st.pyplot(swatch_fig, width="stretch")
254
-
255
- color_by = None
256
- if "Colored Markers" in chart_type:
257
- if "month" in working_df.columns:
258
- color_by = st.selectbox(
259
- "Color by",
260
- ["month", "quarter", "year", "day_of_week"],
261
- key="color_by_a",
262
- )
263
- else:
264
- other_cols = [c for c in working_df.columns if c not in (date_col, active_y)][:5]
265
- if other_cols:
266
- color_by = st.selectbox("Color by", other_cols, key="color_by_a")
267
-
268
- period_label = "month"
269
- window_size = 12
270
- lag_val = 1
271
- decomp_model = "additive"
272
-
273
- if chart_type in ("Seasonal Plot", "Seasonal Sub-series"):
274
- period_label = st.selectbox("Period", ["month", "quarter"], key="period_a")
275
- if chart_type == "Rolling Mean Overlay":
276
- window_size = st.slider("Window", 2, 52, 12, key="window_a")
277
- if chart_type == "Lag Plot":
278
- lag_val = st.slider("Lag", 1, 52, 1, key="lag_a")
279
- if chart_type == "Decomposition":
280
- decomp_model = st.selectbox("Model", ["additive", "multiplicative"], key="decomp_a")
281
-
282
- update_single = st.form_submit_button("Update chart", use_container_width=True)
283
-
284
- input_key = (
285
- _df_hash(working_df), active_y, dr_mode, n_years, sel,
286
- chart_type, palette_name, color_by, period_label, window_size, lag_val, decomp_model,
287
- freq_info.label if freq_info else None,
288
- )
289
- should_compute = update_single or st.session_state.get("_single_fig") is None
290
-
291
- if should_compute:
292
- fig = None
293
- stats = None
294
-
295
- if df_plot.empty:
296
- st.warning("No data in selected range.")
297
- else:
298
- try:
299
- if chart_type == "Line with Markers":
300
- fig = plot_line_with_markers(
301
- df_plot, date_col, active_y,
302
- title=f"{active_y} over Time",
303
- style_dict=style_dict, palette_colors=palette_colors,
304
- )
305
-
306
- elif "Colored Markers" in chart_type and color_by is not None:
307
- fig = plot_line_colored_markers(
308
- df_plot, date_col, active_y,
309
- color_by=color_by, palette_colors=palette_colors,
310
- title=f"{active_y} colored by {color_by}",
311
- style_dict=style_dict,
312
- )
313
-
314
- elif chart_type == "Seasonal Plot":
315
- fig = plot_seasonal(
316
- df_plot, date_col, active_y,
317
- period=period_label,
318
- palette_name_colors=palette_colors,
319
- title=f"Seasonal Plot - {active_y}",
320
- style_dict=style_dict,
321
- )
322
-
323
- elif chart_type == "Seasonal Sub-series":
324
- fig = plot_seasonal_subseries(
325
- df_plot, date_col, active_y,
326
- period=period_label,
327
- title=f"Seasonal Sub-series - {active_y}",
328
- style_dict=style_dict, palette_colors=palette_colors,
329
- )
330
-
331
- elif chart_type == "ACF / PACF":
332
- series = df_plot[active_y].dropna()
333
- acf_vals, acf_ci, pacf_vals, pacf_ci = compute_acf_pacf(series)
334
- fig = plot_acf_pacf(
335
- acf_vals, acf_ci, pacf_vals, pacf_ci,
336
- title=f"ACF / PACF - {active_y}",
337
- style_dict=style_dict,
338
- )
339
-
340
- elif chart_type == "Decomposition":
341
- period_int = None
342
- if freq_info and freq_info.label == "Monthly":
343
- period_int = 12
344
- elif freq_info and freq_info.label == "Quarterly":
345
- period_int = 4
346
- elif freq_info and freq_info.label == "Weekly":
347
- period_int = 52
348
- elif freq_info and freq_info.label == "Daily":
349
- period_int = 365
350
-
351
- result = compute_decomposition(
352
- df_plot, date_col, active_y,
353
- model=decomp_model, period=period_int,
354
- )
355
- fig = plot_decomposition(
356
- result,
357
- title=f"Decomposition - {active_y} ({decomp_model})",
358
- style_dict=style_dict,
359
- )
360
-
361
- elif chart_type == "Rolling Mean Overlay":
362
- fig = plot_rolling_overlay(
363
- df_plot, date_col, active_y,
364
- window=window_size,
365
- title=f"Rolling {window_size}-pt Mean - {active_y}",
366
- style_dict=style_dict, palette_colors=palette_colors,
367
- )
368
-
369
- elif chart_type == "Year-over-Year Change":
370
- yoy_result = compute_yoy_change(df_plot, date_col, active_y)
371
- yoy_df = pd.DataFrame({
372
- "date": yoy_result[date_col],
373
- "abs_change": yoy_result["yoy_abs_change"],
374
- "pct_change": yoy_result["yoy_pct_change"],
375
- }).dropna()
376
- fig = plot_yoy_change(
377
- df_plot, date_col, active_y, yoy_df,
378
- title=f"Year-over-Year Change - {active_y}",
379
- style_dict=style_dict,
380
- )
381
-
382
- elif chart_type == "Lag Plot":
383
- fig = plot_lag(
384
- df_plot[active_y],
385
- lag=lag_val,
386
- title=f"Lag-{lag_val} Plot - {active_y}",
387
- style_dict=style_dict,
388
- )
389
-
390
- except Exception as exc:
391
- st.error(f"Chart error: {exc}")
392
-
393
- if fig is not None:
394
- stats = compute_summary_stats(df_plot, date_col, active_y)
395
-
396
- st.session_state["_single_input_key"] = input_key
397
- st.session_state["_single_df_plot"] = df_plot if not df_plot.empty else None
398
- st.session_state["_single_fig"] = fig
399
- st.session_state["_single_active_y"] = active_y if not df_plot.empty else None
400
- st.session_state["_single_chart_type"] = chart_type if not df_plot.empty else None
401
- st.session_state["_single_stats"] = stats
402
-
403
- fig = st.session_state.get("_single_fig")
404
- if fig is not None:
405
- st.pyplot(fig, width="stretch")
406
  else:
407
- st.info("Choose options above, then click `Update chart`.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
 
409
 
410
- @st.fragment
411
- def _single_insights_fragment(freq_info, date_col):
412
- df_plot = st.session_state.get("_single_df_plot")
413
- active_y = st.session_state.get("_single_active_y")
414
- chart_type = st.session_state.get("_single_chart_type")
415
- fig = st.session_state.get("_single_fig")
416
- stats = st.session_state.get("_single_stats")
417
 
418
- if df_plot is None or active_y is None or stats is None:
419
- return
 
 
 
420
 
421
- with st.expander("Summary Statistics", expanded=False):
422
- _render_summary_stats(stats)
423
 
424
- _render_ai_interpretation(
425
- fig, chart_type, freq_info, df_plot, date_col, active_y, "interpret_a",
 
 
 
 
 
426
  )
427
 
428
 
429
- @st.fragment
430
- def _panel_chart_fragment(working_df, date_col, y_cols, style_dict):
431
- if len(y_cols) < 2:
432
- st.info("Select 2+ value columns in the sidebar to use panel plots.")
433
- st.session_state["_panel_fig"] = None
434
- st.session_state["_panel_summary_df"] = None
435
- return
436
 
437
- st.subheader("Panel Plot (Small Multiples)")
438
 
439
- if "panel_cols" not in st.session_state:
440
- st.session_state["panel_cols"] = y_cols[:4]
441
- else:
442
- st.session_state["panel_cols"] = [c for c in st.session_state["panel_cols"] if c in y_cols]
443
-
444
- with st.form("panel_chart_form", border=False):
445
- panel_cols = st.multiselect("Columns to plot", y_cols, key="panel_cols")
446
-
447
- pc1, pc2 = st.columns(2)
448
- with pc1:
449
- panel_chart = st.selectbox("Chart type", ["line", "bar"], key="panel_chart")
450
- with pc2:
451
- if "panel_shared" not in st.session_state:
452
- st.session_state["panel_shared"] = True
453
- shared_y = st.checkbox("Shared Y axis", key="panel_shared")
454
-
455
- palette_name_b = st.selectbox("Color palette", _PALETTE_NAMES, key="pal_b")
456
- update_panel = st.form_submit_button("Update chart", use_container_width=True)
457
-
458
- input_key = (_df_hash(working_df), tuple(panel_cols), panel_chart, shared_y, palette_name_b)
459
- should_compute = update_panel or st.session_state.get("_panel_fig") is None
460
-
461
- if should_compute:
462
- fig_panel = None
463
- summary_df = None
464
- if panel_cols:
465
- palette_b = get_palette_colors(palette_name_b, len(panel_cols))
466
- try:
467
- fig_panel = plot_panel(
468
- working_df, date_col, panel_cols,
469
- chart_type=panel_chart,
470
- shared_y=shared_y,
471
- title="Panel Comparison",
472
- style_dict=style_dict,
473
- palette_colors=palette_b,
474
- )
475
- summary_df = compute_multi_series_summary(working_df, date_col, panel_cols)
476
- except Exception as exc:
477
- st.error(f"Panel chart error: {exc}")
478
 
479
- st.session_state["_panel_input_key"] = input_key
480
- st.session_state["_panel_fig"] = fig_panel
481
- st.session_state["_panel_summary_df"] = summary_df
 
 
 
482
 
483
- fig_panel = st.session_state.get("_panel_fig")
484
- if fig_panel is not None:
485
- st.pyplot(fig_panel, width="stretch")
486
- else:
487
- st.info("Choose panel options above, then click `Update chart`.")
488
-
489
-
490
- @st.fragment
491
- def _panel_insights_fragment(working_df, date_col, freq_info):
492
- panel_cols = st.session_state.get("panel_cols") or []
493
- fig_panel = st.session_state.get("_panel_fig")
494
- panel_chart = st.session_state.get("panel_chart", "line")
495
- summary_df = st.session_state.get("_panel_summary_df")
496
-
497
- if not panel_cols or fig_panel is None or summary_df is None:
498
- return
499
-
500
- with st.expander("Per-series Summary", expanded=False):
501
- st.dataframe(
502
- summary_df.style.format({
503
- "mean": "{:,.2f}",
504
- "std": "{:,.2f}",
505
- "min": "{:,.2f}",
506
- "max": "{:,.2f}",
507
- "trend_slope": "{:,.4f}",
508
- "adf_pvalue": "{:.4f}",
509
- }),
510
- width="stretch",
511
- )
512
 
513
- _render_ai_interpretation(
514
- fig_panel, f"Panel ({panel_chart})", freq_info,
515
- working_df, date_col, ", ".join(panel_cols), "interpret_b",
516
- )
517
 
 
 
518
 
519
- @st.fragment
520
- def _spaghetti_chart_fragment(working_df, date_col, y_cols, style_dict):
521
- if len(y_cols) < 2:
522
- st.info("Select 2+ value columns in the sidebar to use spaghetti plots.")
523
- st.session_state["_spag_fig"] = None
524
- st.session_state["_spag_summary_df"] = None
525
- return
526
 
527
- st.subheader("Spaghetti Plot")
 
528
 
529
- if "spag_cols" not in st.session_state:
530
- st.session_state["spag_cols"] = list(y_cols)
531
- else:
532
- st.session_state["spag_cols"] = [c for c in st.session_state["spag_cols"] if c in y_cols]
533
-
534
- with st.form("spag_chart_form", border=False):
535
- spag_cols = st.multiselect("Columns to include", y_cols, key="spag_cols")
536
-
537
- sc1, sc2, sc3 = st.columns(3)
538
- with sc1:
539
- alpha_val = st.slider("Alpha", 0.05, 1.0, 0.15, 0.05, key="spag_alpha")
540
- with sc2:
541
- top_n = st.number_input("Highlight top N", 0, len(spag_cols), 0, key="spag_topn")
542
- top_n = top_n if top_n > 0 else None
543
- with sc3:
544
- highlight = st.selectbox(
545
- "Highlight series",
546
- ["(none)"] + spag_cols,
547
- key="spag_highlight",
548
- )
549
- highlight_col = highlight if highlight != "(none)" else None
550
 
551
- show_median = st.checkbox("Show Median + IQR band", key="spag_median")
552
- palette_name_c = st.selectbox("Color palette", _PALETTE_NAMES, key="pal_c")
553
- update_spag = st.form_submit_button("Update chart", use_container_width=True)
554
 
555
- input_key = (
556
- _df_hash(working_df), tuple(spag_cols), alpha_val, top_n, highlight_col,
557
- show_median, palette_name_c,
558
- )
559
- should_compute = update_spag or st.session_state.get("_spag_fig") is None
560
-
561
- if should_compute:
562
- fig_spag = None
563
- spag_summary = None
564
- if spag_cols:
565
- palette_c = get_palette_colors(palette_name_c, len(spag_cols))
566
- try:
567
- fig_spag = plot_spaghetti(
568
- working_df, date_col, spag_cols,
569
- alpha=alpha_val,
570
- highlight_col=highlight_col,
571
- top_n=top_n,
572
- show_median_band=show_median,
573
- title="Spaghetti Plot",
574
- style_dict=style_dict,
575
- palette_colors=palette_c,
576
- )
577
- spag_summary = compute_multi_series_summary(working_df, date_col, spag_cols)
578
- except Exception as exc:
579
- st.error(f"Spaghetti chart error: {exc}")
580
 
581
- st.session_state["_spag_input_key"] = input_key
582
- st.session_state["_spag_fig"] = fig_spag
583
- st.session_state["_spag_summary_df"] = spag_summary
584
 
585
- fig_spag = st.session_state.get("_spag_fig")
586
- if fig_spag is not None:
587
- st.pyplot(fig_spag, width="stretch")
588
- else:
589
- st.info("Choose spaghetti options above, then click `Update chart`.")
590
-
591
-
592
- @st.fragment
593
- def _spaghetti_insights_fragment(working_df, date_col, freq_info):
594
- spag_cols = st.session_state.get("spag_cols") or []
595
- fig_spag = st.session_state.get("_spag_fig")
596
- spag_summary = st.session_state.get("_spag_summary_df")
597
-
598
- if not spag_cols or fig_spag is None or spag_summary is None:
599
- return
600
-
601
- with st.expander("Per-series Summary", expanded=False):
602
- st.dataframe(
603
- spag_summary.style.format({
604
- "mean": "{:,.2f}",
605
- "std": "{:,.2f}",
606
- "min": "{:,.2f}",
607
- "max": "{:,.2f}",
608
- "trend_slope": "{:,.4f}",
609
- "adf_pvalue": "{:.4f}",
610
- }),
611
- width="stretch",
612
- )
613
 
614
- _render_ai_interpretation(
615
- fig_spag, "Spaghetti Plot", freq_info,
616
- working_df, date_col, ", ".join(spag_cols), "interpret_c",
617
- )
 
 
 
 
618
 
619
 
620
- def _render_cleaning_report(report: CleaningReport) -> None:
621
- """Show a data-quality card."""
622
- c1, c2, c3 = st.columns(3)
623
- c1.metric("Rows before", f"{report.rows_before:,}")
624
- c2.metric("Rows after", f"{report.rows_after:,}")
625
- c3.metric("Duplicates found", f"{report.duplicates_found:,}")
626
 
627
- if report.missing_before:
628
- with st.expander("Missing values"):
629
- cols = list(report.missing_before.keys())
630
- mc1, mc2 = st.columns(2)
631
- with mc1:
632
- st.write("**Before cleaning**")
633
- for c in cols:
634
- st.write(f"- {c}: {report.missing_before[c]}")
635
- with mc2:
636
- st.write("**After cleaning**")
637
- for c in cols:
638
- st.write(f"- {c}: {report.missing_after.get(c, 0)}")
639
 
640
- if report.parsing_warnings:
641
- with st.expander("Parsing warnings"):
642
- for w in report.parsing_warnings:
643
- st.warning(w)
644
-
645
-
646
- def _render_summary_stats(stats) -> None:
647
- """Render SummaryStats as metric cards (flat, no nesting)."""
648
- row1 = st.columns(4)
649
- row1[0].metric("Count", f"{stats.count:,}")
650
- row1[1].metric("Missing", f"{stats.missing_count} ({stats.missing_pct:.1f}%)")
651
- row1[2].metric("Mean", f"{stats.mean_val:,.2f}")
652
- row1[3].metric("Std Dev", f"{stats.std_val:,.2f}")
653
-
654
- row2 = st.columns(4)
655
- row2[0].metric("Min", f"{stats.min_val:,.2f}")
656
- row2[1].metric("25th %ile", f"{stats.p25:,.2f}")
657
- row2[2].metric("Median", f"{stats.median_val:,.2f}")
658
- row2[3].metric("75th %ile / Max", f"{stats.p75:,.2f} / {stats.max_val:,.2f}")
659
-
660
- row3 = st.columns(4)
661
- row3[0].metric(
662
- "Trend slope",
663
- f"{stats.trend_slope:,.4f}" if pd.notna(stats.trend_slope) else "N/A",
664
- help="Slope from OLS on a numeric index.",
665
- )
666
- row3[1].metric(
667
- "Trend p-value",
668
- f"{stats.trend_pvalue:.4f}" if pd.notna(stats.trend_pvalue) else "N/A",
669
- )
670
- row3[2].metric(
671
- "ADF statistic",
672
- f"{stats.adf_statistic:.4f}" if pd.notna(stats.adf_statistic) else "N/A",
673
- help="Augmented Dickey-Fuller test statistic.",
674
- )
675
- row3[3].metric(
676
- "ADF p-value",
677
- f"{stats.adf_pvalue:.4f}" if pd.notna(stats.adf_pvalue) else "N/A",
678
- help="p < 0.05 suggests the series is stationary.",
679
- )
680
- st.caption(
681
- f"Date range: {stats.date_start.date()} to {stats.date_end.date()} "
682
- f"({stats.date_span_days:,} days)"
683
- )
684
 
 
685
 
686
- def _render_ai_interpretation(fig, chart_type_label, freq_info, df_plot,
687
- date_col, y_label, button_key):
688
- """Reusable AI Chart Interpretation block for any tab."""
689
- with st.expander("AI Chart Interpretation", expanded=False):
690
- st.caption(
691
- "The chart image (PNG) is sent to OpenAI for interpretation. "
692
- "Do not include sensitive data in your charts."
693
  )
694
- if not check_api_key_available():
695
- st.warning("Set `OPENAI_API_KEY` to enable AI interpretation.")
696
- elif fig is not None:
697
- if st.button("Interpret Chart with AI", key=button_key):
698
- with st.spinner("Analyzing chart..."):
699
- png = fig_to_png_bytes(fig)
700
- date_range_str = (
701
- f"{df_plot[date_col].min().date()} to "
702
- f"{df_plot[date_col].max().date()}"
703
- )
704
- metadata = {
705
- "chart_type": chart_type_label,
706
- "frequency_label": freq_info.label if freq_info else "Unknown",
707
- "date_range": date_range_str,
708
- "y_column": y_label,
709
- }
710
- interp = interpret_chart(png, metadata)
711
- render_interpretation(interp)
712
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713
 
714
- # ---------------------------------------------------------------------------
715
- # Page config
716
- # ---------------------------------------------------------------------------
717
- st.set_page_config(
718
- page_title="Time Series Visualizer",
719
- page_icon="\U0001f4c8",
720
- layout="wide",
721
- )
722
- apply_miami_theme()
723
- style_dict = get_miami_mpl_style()
724
 
725
  # ---------------------------------------------------------------------------
726
- # Session state initialisation
727
  # ---------------------------------------------------------------------------
728
- for key in [
729
- "raw_df", "raw_df_original", "cleaned_df", "cleaning_report", "freq_info",
730
- "date_col", "y_cols", "qc", "qc_hash",
731
- "_upload_id", "_upload_delim", "_clean_key",
732
- "_prev_data_format", "_prev_pivot_key", "_prev_active_view",
733
- "setup_applied", "_last_applied_settings_key",
734
- ]:
735
- if key not in st.session_state:
736
- st.session_state[key] = None
737
- if st.session_state["setup_applied"] is None:
738
- st.session_state["setup_applied"] = False
739
 
740
- # ---------------------------------------------------------------------------
741
- # Sidebar — Data input
742
- # ---------------------------------------------------------------------------
743
- with st.sidebar:
744
- st.markdown(
745
- """
746
- <div style="text-align:center; margin-bottom:0.5rem;">
747
- <span style="font-size:1.6rem; font-weight:800; color:#C41230;">
748
- Time Series Visualizer
749
- </span><br>
750
- <span style="font-size:0.82rem; color:#000;">
751
- ISA 444 &middot; Miami University
752
- </span>
753
- </div>
754
- """,
755
- unsafe_allow_html=True,
756
- )
757
- # st.divider()
758
- st.subheader("Vibe-Coded By")
759
- st.markdown(
760
- """
761
- <div class="dev-card">
762
- <div class="dev-row">
763
- <svg class="dev-avatar" viewBox="0 0 16 16" aria-hidden="true" focusable="false">
764
- <path d="M11 6a3 3 0 1 1-6 0 3 3 0 0 1 6 0"/>
765
- <path fill-rule="evenodd" d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8m8-7a7 7 0 0 0-5.468 11.37c.69-1.198 1.97-2.015 3.526-2.015h3.884c1.556 0 2.835.817 3.526 2.014A7 7 0 0 0 8 1"/>
766
- </svg>
767
- <div>
768
- <div class="dev-name">Fadel M. Megahed</div>
769
- <div class="dev-role">
770
- Raymond E. Glos Professor, Farmer School of Business<br>
771
- Miami University
772
- </div>
773
- </div>
774
- </div>
775
- <div class="dev-links">
776
- <a class="dev-link" href="mailto:fmegahed@miamioh.edu">
777
- <svg viewBox="0 0 16 16" aria-hidden="true" focusable="false">
778
- <path d="M0 4a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2V4zm2-1a1 1 0 0 0-1 1v.217l7 4.2 7-4.2V4a1 1 0 0 0-1-1H2zm13 2.383-4.708 2.825L15 11.105zM14.247 12.6 9.114 8.98 8 9.67 6.886 8.98 1.753 12.6A1 1 0 0 0 2 13h12a1 1 0 0 0 .247-.4zM1 11.105l4.708-2.897L1 5.383z"/>
779
- </svg>
780
- Email
781
- </a>
782
- <a class="dev-link" href="https://www.linkedin.com/in/fadel-megahed-289046b4/" target="_blank">
783
- <svg viewBox="0 0 16 16" aria-hidden="true" focusable="false">
784
- <path d="M0 1.146C0 .513.526 0 1.175 0h13.65C15.475 0 16 .513 16 1.146v13.708c0 .633-.525 1.146-1.175 1.146H1.175C.526 16 0 15.487 0 14.854zM4.943 13.5V6H2.542v7.5zM3.743 4.927c.837 0 1.358-.554 1.358-1.248-.015-.709-.521-1.248-1.342-1.248-.821 0-1.358.54-1.358 1.248 0 .694.521 1.248 1.327 1.248zm4.908 8.573V9.359c0-.22.016-.44.08-.598.176-.44.576-.897 1.248-.897.88 0 1.232.676 1.232 1.667v4.0h2.401V9.247c0-2.22-1.184-3.252-2.764-3.252-1.274 0-1.845.7-2.165 1.193h.016V6H6.35c.03.7 0 7.5 0 7.5z"/>
785
- </svg>
786
- LinkedIn
787
- </a>
788
- <a class="dev-link" href="https://miamioh.edu/fsb/directory/?up=/directory/megahefm" target="_blank">
789
- <svg viewBox="0 0 16 16" aria-hidden="true" focusable="false">
790
- <path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8m7-7a7 7 0 0 0-2.468.45c.303.393.58.825.82 1.3A5.5 5.5 0 0 1 7 3.5zm2 0v2.5a5.5 5.5 0 0 1 1.648-.75 7 7 0 0 0-.82-1.3A7 7 0 0 0 9 1m3.97 3.06a6.5 6.5 0 0 0-1.71-.9c.21.53.36 1.1.44 1.69h2.21a7 7 0 0 0-.94-.79M15 8a7 7 0 0 0-.33-2h-2.34a6.5 6.5 0 0 1 0 4h2.34c.22-.64.33-1.32.33-2m-1.03 3.94a7 7 0 0 0 .94-.79h-2.21a6.5 6.5 0 0 1-.44 1.69c.62-.22 1.2-.53 1.71-.9M9 15a7 7 0 0 0 1.648-.75c.24-.48.517-.91.82-1.3A7 7 0 0 0 9 15m-2 0v-2.5a5.5 5.5 0 0 1-1.648.75c.24.48.517.91.82 1.3A7 7 0 0 0 7 15M4.03 11.94a6.5 6.5 0 0 0 1.71.9A6.5 6.5 0 0 1 5.3 11.15H3.09c.25.3.58.57.94.79M1 8c0 .68.11 1.36.33 2h2.34a6.5 6.5 0 0 1 0-4H1.33A7 7 0 0 0 1 8m1.03-3.94c.36.37.78.68 1.24.9a6.5 6.5 0 0 1 .44-1.69H2.06a7 7 0 0 0-.03.79"/>
791
- </svg>
792
- Website
793
- </a>
794
- <a class="dev-link" href="https://github.com/fmegahed/" target="_blank">
795
- <svg viewBox="0 0 16 16" aria-hidden="true" focusable="false">
796
- <path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27s1.36.09 2 .27c1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.01 8.01 0 0 0 16 8c0-4.42-3.58-8-8-8"/>
797
- </svg>
798
- GitHub
799
- </a>
800
- </div>
801
- </div>
802
- """,
803
- unsafe_allow_html=True,
804
- )
805
- st.caption("v0.2.0 &middot; Last updated Feb 2026")
806
- st.divider()
807
- st.header("Data Input")
808
 
809
- uploaded = st.file_uploader("Upload a CSV file", type=["csv", "tsv", "txt"], key="csv_upload")
 
810
 
811
- demo_choice = st.selectbox(
812
- "Or load a demo dataset",
813
- ["(none)"] + list(_DEMO_FILES.keys()),
814
- key="demo_select",
815
- )
816
- if st.button("Reset all", key="reset_sidebar", use_container_width=True):
817
- _reset_all_state()
818
-
819
- # Load data
820
- def _on_new_data(df: pd.DataFrame) -> None:
821
- """Store new dataset and clear stale format/pivot keys."""
822
- st.session_state.raw_df_original = df
823
- st.session_state.raw_df = df
824
- st.session_state.cleaned_df = None
825
- st.session_state.cleaning_report = None
826
- st.session_state.freq_info = None
827
- st.session_state.date_col = None
828
- st.session_state.y_cols = None
829
- st.session_state._clean_key = None
830
- st.session_state["setup_applied"] = False
831
- st.session_state["_last_applied_settings_key"] = None
832
- # Clear format-related keys so auto-detection runs fresh
833
- for _k in ("sidebar_data_format", "sidebar_group_col",
834
- "sidebar_value_col", "sidebar_y_cols",
835
- "_prev_data_format", "_prev_pivot_key",
836
- "sidebar_dup_action", "sidebar_missing_action", "sidebar_freq_override"):
837
- st.session_state.pop(_k, None)
838
- _clear_analysis_state(reset_querychat=True)
839
- st.session_state["active_view"] = _VIEW_LABELS[0]
840
- st.session_state["_prev_active_view"] = st.session_state["active_view"]
841
- _sync_view_query_param()
842
-
843
- if uploaded is not None:
844
- file_id = (uploaded.name, uploaded.size)
845
- if st.session_state.get("_upload_id") != file_id:
846
- df_raw, delim = read_csv_upload(uploaded)
847
- _on_new_data(df_raw)
848
- st.session_state._upload_delim = delim
849
- st.session_state._upload_id = file_id
850
- st.caption(f"Detected delimiter: `{repr(st.session_state._upload_delim)}`")
851
- elif demo_choice != "(none)":
852
- demo_key = ("demo", demo_choice)
853
- if st.session_state.get("_upload_id") != demo_key:
854
- _on_new_data(_load_demo(_DEMO_FILES[demo_choice]))
855
- st.session_state._upload_id = demo_key
856
- # else: keep whatever was already in session state
857
-
858
- raw_df_orig: pd.DataFrame | None = st.session_state.raw_df_original
859
- raw_df: pd.DataFrame | None = st.session_state.raw_df
860
-
861
- if raw_df_orig is not None:
862
- st.divider()
863
- st.subheader("Column and Cleaning Setup")
864
- st.caption("Batch changes below, then click `Apply setup`.")
865
-
866
- date_suggestions = suggest_date_columns(raw_df_orig)
867
- all_cols = list(raw_df_orig.columns)
868
- default_date_idx = all_cols.index(date_suggestions[0]) if date_suggestions else 0
869
-
870
- if "sidebar_date_col" not in st.session_state:
871
- st.session_state["sidebar_date_col"] = all_cols[default_date_idx]
872
- if "sidebar_dup_action" not in st.session_state:
873
- st.session_state["sidebar_dup_action"] = "keep_last"
874
- if "sidebar_missing_action" not in st.session_state:
875
- st.session_state["sidebar_missing_action"] = "interpolate"
876
- if "sidebar_freq_override" not in st.session_state:
877
- st.session_state["sidebar_freq_override"] = ""
878
-
879
- with st.form("sidebar_setup_form", border=False):
880
- date_col = st.selectbox("Date column", all_cols, key="sidebar_date_col")
881
- is_long, auto_group, auto_value = detect_long_format(raw_df_orig, date_col)
882
-
883
- if "sidebar_data_format" not in st.session_state:
884
- st.session_state["sidebar_data_format"] = "Long" if is_long else "Wide"
885
-
886
- data_format = st.radio(
887
- "Data format",
888
- ["Wide", "Long"],
889
- key="sidebar_data_format",
890
- horizontal=True,
891
  )
892
 
893
- if st.session_state.get("_prev_data_format") != data_format:
894
- st.session_state.pop("sidebar_y_cols", None)
895
- st.session_state["_prev_data_format"] = data_format
896
-
897
- group_col = None
898
- value_col_sel = None
899
- if data_format == "Long":
900
- other_cols = [c for c in all_cols if c != date_col]
901
- string_cols = [
902
- c for c in other_cols
903
- if raw_df_orig[c].dtype == object
904
- or pd.api.types.is_string_dtype(raw_df_orig[c])
905
- ]
906
- numeric_cols = [
907
- c for c in other_cols
908
- if pd.api.types.is_numeric_dtype(raw_df_orig[c])
909
- ]
910
-
911
- if string_cols:
912
- if "sidebar_group_col" not in st.session_state:
913
- st.session_state["sidebar_group_col"] = (
914
- auto_group if auto_group and auto_group in string_cols
915
- else string_cols[0]
916
- )
917
- group_col = st.selectbox("Group column", string_cols, key="sidebar_group_col")
918
- else:
919
- st.warning("No categorical columns available for long-format grouping.")
920
 
921
- value_options = [c for c in numeric_cols if c != group_col] if group_col else numeric_cols
922
 
923
- if value_options:
924
- if "sidebar_value_col" not in st.session_state:
925
- st.session_state["sidebar_value_col"] = (
926
- auto_value if auto_value and auto_value in value_options
927
- else value_options[0]
928
- )
929
- value_col_sel = st.selectbox("Value column", value_options, key="sidebar_value_col")
930
- else:
931
- st.warning("No numeric value column available for long-format pivoting.")
932
-
933
- pivot_key = (group_col, value_col_sel)
934
- if st.session_state.get("_prev_pivot_key") != pivot_key:
935
- st.session_state.pop("sidebar_y_cols", None)
936
- st.session_state["_prev_pivot_key"] = pivot_key
937
-
938
- if group_col and value_col_sel:
939
- effective_df = pivot_long_to_wide(
940
- raw_df_orig, date_col, group_col, value_col_sel,
941
- )
942
- n_groups = raw_df_orig[group_col].nunique()
943
- st.caption(f"Pivot preview: **{n_groups}** groups from `{group_col}`")
944
- available_y = [c for c in effective_df.columns if c != date_col]
945
- else:
946
- effective_df = raw_df_orig
947
- available_y = []
948
- else:
949
- effective_df = raw_df_orig
950
- numeric_suggestions = suggest_numeric_columns(raw_df_orig)
951
- available_y = [c for c in numeric_suggestions if c != date_col]
952
-
953
- if "sidebar_y_cols" in st.session_state:
954
- st.session_state["sidebar_y_cols"] = [
955
- c for c in st.session_state["sidebar_y_cols"] if c in available_y
956
- ]
957
- if "sidebar_y_cols" not in st.session_state:
958
- st.session_state["sidebar_y_cols"] = available_y[:4] if available_y else []
959
- y_cols = st.multiselect("Value column(s)", available_y, key="sidebar_y_cols")
960
-
961
- st.markdown("##### Cleaning Options")
962
- dup_action = st.selectbox(
963
- "Duplicate dates",
964
- ["keep_last", "keep_first", "drop_all"],
965
- key="sidebar_dup_action",
966
  )
967
- missing_action = st.selectbox(
968
- "Missing values",
969
- ["interpolate", "ffill", "drop"],
970
- key="sidebar_missing_action",
971
  )
972
- freq_override = st.text_input(
973
- "Override frequency label (optional)",
974
- help="e.g. Daily, Weekly, Monthly, Quarterly, Yearly",
975
- key="sidebar_freq_override",
976
  )
977
- apply_setup = st.form_submit_button("Apply setup", use_container_width=True)
978
-
979
- if apply_setup:
980
- st.session_state.raw_df = effective_df
981
- st.session_state.date_col = date_col
982
- st.session_state.y_cols = y_cols
983
-
984
- settings_key = (
985
- st.session_state._upload_id,
986
- date_col,
987
- data_format,
988
- st.session_state.get("sidebar_group_col"),
989
- st.session_state.get("sidebar_value_col"),
990
- tuple(y_cols),
991
- dup_action,
992
- missing_action,
993
- freq_override.strip(),
994
- )
995
- if st.session_state.get("_last_applied_settings_key") != settings_key:
996
- _clear_analysis_state(reset_querychat=True)
997
- st.session_state["_last_applied_settings_key"] = settings_key
998
- st.session_state["setup_applied"] = True
999
-
1000
- if y_cols:
1001
- cleaned_df, report, freq_info = _clean_pipeline(
1002
- _df_hash(effective_df), effective_df, date_col, tuple(y_cols),
1003
- dup_action, missing_action,
1004
- )
1005
- if freq_override.strip():
1006
- freq_info = FrequencyInfo(
1007
- label=freq_override.strip(),
1008
- median_delta=freq_info.median_delta,
1009
- is_regular=freq_info.is_regular,
1010
- )
1011
-
1012
- st.session_state.cleaned_df = cleaned_df
1013
- st.session_state.cleaning_report = report
1014
- st.session_state.freq_info = freq_info
1015
- st.session_state._clean_key = (
1016
- date_col, tuple(y_cols), dup_action, missing_action,
1017
- st.session_state._upload_id,
1018
- )
1019
- else:
1020
- st.session_state.cleaned_df = None
1021
- st.session_state.cleaning_report = None
1022
- st.session_state.freq_info = None
1023
- st.session_state._clean_key = None
1024
- st.session_state.qc = None
1025
- st.session_state.qc_hash = None
1026
-
1027
- if not st.session_state.get("setup_applied"):
1028
- st.info("Configure columns and cleaning options, then click `Apply setup`.")
1029
-
1030
- if st.session_state.get("setup_applied") and st.session_state.get("y_cols"):
1031
- cleaned_df = st.session_state.cleaned_df
1032
- date_col = st.session_state.date_col
1033
- y_cols = st.session_state.y_cols
1034
- freq_info = st.session_state.freq_info
1035
-
1036
- st.success("Setup applied. Continue in the main panel to choose an analysis view.")
1037
- if freq_info is not None:
1038
- st.caption(f"Frequency: **{freq_info.label}** "
1039
- f"({'regular' if freq_info.is_regular else 'irregular'})")
1040
 
 
 
 
 
1041
  if check_querychat_available():
1042
- st.divider()
1043
- st.subheader("QueryChat")
1044
- enable_qc = st.toggle(
1045
- "Enable QueryChat filtering",
1046
- key="enable_querychat",
1047
- help="Use natural-language prompts to filter the dataset (e.g., 'last 5 years'); chart views then use the filtered data.",
1048
  )
1049
- if enable_qc and cleaned_df is not None and freq_info is not None:
1050
- _querychat_fragment(cleaned_df, date_col, y_cols, freq_info.label)
1051
- else:
1052
- st.session_state.qc = None
1053
- st.session_state.qc_hash = None
1054
  else:
1055
- st.divider()
1056
- st.info(
1057
- "Set `OPENAI_API_KEY` to enable QueryChat "
1058
- "(natural-language data filtering)."
1059
  )
1060
- # st.divider()
1061
- # st.caption(
1062
- # "**Privacy:** All processing is in-memory. "
1063
- # "If you click **Interpret Chart with AI**, the chart image is sent to OpenAI — "
1064
- # "do not include sensitive data in your charts. "
1065
- # "QueryChat protects your privacy by only passing metadata (not your data) to OpenAI."
1066
- # )
1067
 
1068
- # ---------------------------------------------------------------------------
1069
- # Main area — guard
1070
- # ---------------------------------------------------------------------------
1071
- cleaned_df: pd.DataFrame | None = st.session_state.cleaned_df
1072
- date_col: str | None = st.session_state.date_col
1073
- y_cols: list[str] | None = st.session_state.y_cols
1074
- freq_info: FrequencyInfo | None = st.session_state.freq_info
1075
- report: CleaningReport | None = st.session_state.cleaning_report
1076
-
1077
- if cleaned_df is None or not y_cols:
1078
- st.title("Time Series Visualizer")
1079
- st.caption("ISA 444 · Miami University · Farmer School of Business")
1080
-
1081
- st.markdown("") # spacer
1082
-
1083
- # --- Getting started steps as visual cards ---
1084
- st.markdown("#### Get Started in 3 Steps")
1085
- c1, c2, c3 = st.columns(3)
1086
- with c1:
1087
- st.markdown(
1088
- '<div style="background:#F5F5F5; border-radius:8px; padding:1rem; '
1089
- 'border-left:4px solid #C41230; height:100%;">'
1090
- '<div style="font-size:1.6rem; font-weight:700; color:#C41230;">1</div>'
1091
- '<div style="font-weight:600; margin:0.3rem 0 0.2rem;">Load Data</div>'
1092
- '<div style="font-size:0.82rem; color:#444;">'
1093
- 'Upload a CSV from the sidebar or pick one of the built-in demo datasets.'
1094
- '</div></div>',
1095
- unsafe_allow_html=True,
1096
- )
1097
- with c2:
1098
- st.markdown(
1099
- '<div style="background:#F5F5F5; border-radius:8px; padding:1rem; '
1100
- 'border-left:4px solid #C41230; height:100%;">'
1101
- '<div style="font-size:1.6rem; font-weight:700; color:#C41230;">2</div>'
1102
- '<div style="font-weight:600; margin:0.3rem 0 0.2rem;">Pick Columns</div>'
1103
- '<div style="font-size:0.82rem; color:#444;">'
1104
- 'Select a date column and one or more numeric value columns. '
1105
- 'The app auto-detects sensible defaults.'
1106
- '</div></div>',
1107
- unsafe_allow_html=True,
1108
- )
1109
- with c3:
1110
- st.markdown(
1111
- '<div style="background:#F5F5F5; border-radius:8px; padding:1rem; '
1112
- 'border-left:4px solid #C41230; height:100%;">'
1113
- '<div style="font-size:1.6rem; font-weight:700; color:#C41230;">3</div>'
1114
- '<div style="font-weight:600; margin:0.3rem 0 0.2rem;">Explore</div>'
1115
- '<div style="font-size:0.82rem; color:#444;">'
1116
- 'Choose from 9+ chart types, view summary statistics, '
1117
- 'and get AI-powered chart interpretation.'
1118
- '</div></div>',
1119
- unsafe_allow_html=True,
1120
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121
 
1122
- st.markdown("") # spacer
1123
-
1124
- # --- Features and privacy ---
1125
- f1, f2 = st.columns(2)
1126
- with f1:
1127
- st.markdown("#### Features")
1128
- st.markdown(
1129
- "| | |\n"
1130
- "|:--|:--|\n"
1131
- "| **Smart Import** | Auto-detect delimiters, dates, and numeric formats |\n"
1132
- "| **9+ Chart Types** | Line, seasonal, ACF/PACF, decomposition, lag, and more |\n"
1133
- "| **Multi-Series** | Panel (small multiples) and spaghetti plots |\n"
1134
- "| **AI Insights** | GPT vision analyzes your charts and returns structured interpretation |\n"
1135
- "| **QueryChat** | Natural-language data filtering powered by DuckDB |"
1136
- )
1137
- with f2:
1138
- st.markdown("#### Good to Know")
1139
- st.info(
1140
- "**Privacy** — All data processing happens in-memory. "
1141
- "No data is stored on disk. Only chart images (never raw data) "
1142
- "are sent to OpenAI when you click *Interpret Chart with AI*.",
1143
- icon="\U0001f512",
1144
- )
1145
- st.info(
1146
- "**Demo Datasets** — Three built-in FRED datasets are available in the sidebar: "
1147
- "Ohio Unemployment Rate (single series), Manufacturing Employment for five "
1148
- "states in wide format, and the same data in long/stacked format. "
1149
- "All sourced from the Federal Reserve Economic Data (FRED).",
1150
- icon="\U0001f4ca",
1151
  )
1152
 
1153
- st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1154
 
1155
- # If QueryChat is active, use its filtered df
1156
- if st.session_state.qc is not None:
1157
- working_df = get_filtered_pandas_df(st.session_state.qc)
1158
- # Fall back if filtered df is empty or missing expected columns
1159
- required = {date_col} | set(y_cols)
1160
- if working_df.empty or not required.issubset(working_df.columns):
1161
- working_df = cleaned_df
1162
- else:
1163
- working_df = cleaned_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1164
 
1165
- # Data quality report
1166
- _data_quality_fragment(report)
1167
 
1168
  # ---------------------------------------------------------------------------
1169
- # View selector
1170
  # ---------------------------------------------------------------------------
1171
- if "active_view" not in st.session_state:
1172
- st.session_state["active_view"] = _initial_view_label()
1173
- if st.session_state.get("_prev_active_view") is None:
1174
- st.session_state["_prev_active_view"] = st.session_state["active_view"]
1175
-
1176
- st.subheader("Explore: Choose Analysis View")
1177
- st.caption("Switching views resets chart controls and filtered data for a clean start.")
1178
- view_col, reset_col = st.columns([6, 1])
1179
- with view_col:
1180
- active_view = st.radio(
1181
- "Analysis view",
1182
- _VIEW_LABELS,
1183
- key="active_view",
1184
- horizontal=True,
1185
- on_change=_on_view_change,
1186
  )
1187
- with reset_col:
1188
- if st.button("Reset all", key="reset_main", use_container_width=True):
1189
- _reset_all_state()
1190
-
1191
- # ===================================================================
1192
- # Tab A — Single Series
1193
- # ===================================================================
1194
- if active_view == "Single Series":
1195
- _single_chart_fragment(working_df, date_col, y_cols, freq_info, style_dict)
1196
- _single_insights_fragment(freq_info, date_col)
1197
-
1198
- # ===================================================================
1199
- # Tab B — Few Series (Panel)
1200
- # ===================================================================
1201
- elif active_view == "Few Series (Panel)":
1202
- _panel_chart_fragment(working_df, date_col, y_cols, style_dict)
1203
- _panel_insights_fragment(working_df, date_col, freq_info)
1204
-
1205
- # ===================================================================
1206
- # Tab C — Many Series (Spaghetti)
1207
- # ===================================================================
1208
- else:
1209
- _spaghetti_chart_fragment(working_df, date_col, y_cols, style_dict)
1210
- _spaghetti_insights_fragment(working_df, date_col, freq_info)
 
1
  """
2
  Time Series Visualizer + AI Chart Interpreter
3
  =============================================
4
+ Main Gradio application. Run with:
5
 
6
+ python app.py
7
  """
8
 
9
  from __future__ import annotations
10
 
11
  import hashlib
12
+ import io
13
  from pathlib import Path
14
 
15
  from dotenv import load_dotenv
 
18
  import matplotlib
19
  matplotlib.use("Agg")
20
 
21
+ import numpy as np
22
  import pandas as pd
23
+ import gradio as gr
24
 
25
  from src.ui_theme import (
26
+ MiamiTheme,
27
+ get_miami_css,
28
  get_miami_mpl_style,
29
  get_palette_colors,
30
  render_palette_preview,
31
  )
32
  from src.cleaning import (
33
+ detect_delimiter,
34
  suggest_date_columns,
35
  suggest_numeric_columns,
36
  clean_dataframe,
 
45
  compute_summary_stats,
46
  compute_acf_pacf,
47
  compute_decomposition,
 
48
  compute_yoy_change,
49
  compute_multi_series_summary,
50
  )
 
65
  from src.ai_interpretation import (
66
  check_api_key_available,
67
  interpret_chart,
68
+ render_interpretation_markdown,
69
  )
70
  from src.querychat_helpers import (
71
  check_querychat_available,
 
82
  "Manufacturing Employment by State (wide, monthly)": _DATA_DIR / "demo_manufacturing_wide.csv",
83
  "Manufacturing Employment by State (long, monthly)": _DATA_DIR / "demo_manufacturing_long.csv",
84
  }
85
+ _DEMO_CHOICES = ["(none)"] + list(_DEMO_FILES.keys())
86
 
87
  _CHART_TYPES = [
88
  "Line with Markers",
89
+ "Line \u2013 Colored Markers",
90
  "Seasonal Plot",
91
  "Seasonal Sub-series",
92
  "ACF / PACF",
 
97
  ]
98
 
99
  _PALETTE_NAMES = ["Set2", "Dark2", "Set1", "Paired", "Pastel1", "Pastel2", "Accent"]
100
+ _STYLE_DICT = get_miami_mpl_style()
101
+
102
+ # ---------------------------------------------------------------------------
103
+ # State helpers
104
+ # ---------------------------------------------------------------------------
105
+
106
+ def _make_empty_state() -> dict:
107
+ return {
108
+ "raw_df_original": None,
109
+ "cleaned_df": None,
110
+ "cleaning_report": None,
111
+ "freq_info": None,
112
+ "date_col": None,
113
+ "y_cols": None,
114
+ "setup_applied": False,
115
+ "single_png": None,
116
+ "panel_png": None,
117
+ "spag_png": None,
118
+ "qc": None,
119
+ }
120
 
121
 
122
  # ---------------------------------------------------------------------------
123
+ # Formatting helpers
124
  # ---------------------------------------------------------------------------
125
 
126
+ def _format_cleaning_report_md(report: CleaningReport) -> str:
127
+ lines = [
128
+ "| Metric | Value |", "|:--|:--|",
129
+ f"| **Rows before** | {report.rows_before:,} |",
130
+ f"| **Rows after** | {report.rows_after:,} |",
131
+ f"| **Duplicates found** | {report.duplicates_found:,} |",
132
+ ]
133
+ if report.missing_before:
134
+ lines += ["", "**Missing values:**", "| Column | Before | After |", "|:--|:--|:--|"]
135
+ for col in report.missing_before:
136
+ lines.append(f"| {col} | {report.missing_before[col]} | {report.missing_after.get(col, 0)} |")
137
+ if report.parsing_warnings:
138
+ lines += ["", "**Warnings:**"]
139
+ for w in report.parsing_warnings:
140
+ lines.append(f"- {w}")
141
+ return "\n".join(lines)
142
+
143
+
144
+ def _fmt(val, fmt_str):
145
+ return fmt_str.format(val) if pd.notna(val) else "N/A"
146
+
147
+
148
+ def _format_summary_stats_md(stats) -> str:
149
+ lines = [
150
+ "| Statistic | Value |", "|:--|:--|",
151
+ f"| **Count** | {stats.count:,} |",
152
+ f"| **Missing** | {stats.missing_count} ({stats.missing_pct:.1f}%) |",
153
+ f"| **Mean** | {stats.mean_val:,.2f} |",
154
+ f"| **Std Dev** | {stats.std_val:,.2f} |",
155
+ f"| **Min** | {stats.min_val:,.2f} |",
156
+ f"| **25th %ile** | {stats.p25:,.2f} |",
157
+ f"| **Median** | {stats.median_val:,.2f} |",
158
+ f"| **75th %ile** | {stats.p75:,.2f} |",
159
+ f"| **Max** | {stats.max_val:,.2f} |",
160
+ f"| **Trend slope** | {_fmt(stats.trend_slope, '{:,.4f}')} |",
161
+ f"| **Trend p-value** | {_fmt(stats.trend_pvalue, '{:.4f}')} |",
162
+ f"| **ADF statistic** | {_fmt(stats.adf_statistic, '{:.4f}')} |",
163
+ f"| **ADF p-value** | {_fmt(stats.adf_pvalue, '{:.4f}')} |",
164
+ "",
165
+ f"*Date range: {stats.date_start.date()} to {stats.date_end.date()} ({stats.date_span_days:,} days)*",
166
+ ]
167
+ return "\n".join(lines)
168
+
169
+
170
+ def _format_multi_summary_md(summary_df: pd.DataFrame) -> str:
171
+ lines = [
172
+ "| Variable | Count | Mean | Std | Min | Max | Trend Slope | ADF p |",
173
+ "|:--|--:|--:|--:|--:|--:|--:|--:|",
174
+ ]
175
+ for _, row in summary_df.iterrows():
176
+ adf = f"{row['adf_pvalue']:.4f}" if pd.notna(row['adf_pvalue']) else "N/A"
177
+ slope = f"{row['trend_slope']:,.4f}" if pd.notna(row['trend_slope']) else "N/A"
178
+ lines.append(
179
+ f"| {row['variable']} | {row['count']:,} | {row['mean']:,.2f} | "
180
+ f"{row['std']:,.2f} | {row['min']:,.2f} | {row['max']:,.2f} | "
181
+ f"{slope} | {adf} |"
182
+ )
183
+ return "\n".join(lines)
184
 
185
 
186
+ # ---------------------------------------------------------------------------
187
+ # Data helpers
188
+ # ---------------------------------------------------------------------------
189
 
190
+ def _read_file_to_df(file_path: str) -> tuple[pd.DataFrame, str]:
191
+ with open(file_path, "rb") as f:
192
+ raw = f.read()
193
+ delim = detect_delimiter(raw)
194
+ text = raw.decode("utf-8", errors="replace")
195
+ df = pd.read_csv(io.StringIO(text), sep=delim)
196
+ return df, delim
197
+
198
+
199
+ def _apply_date_filter(df, date_col, mode, n_years, custom_start, custom_end):
200
+ if mode == "Last N years" and n_years:
201
+ cutoff = df[date_col].max() - pd.DateOffset(years=int(n_years))
202
+ df = df[df[date_col] >= cutoff]
203
+ elif mode == "Custom":
204
+ try:
205
+ if custom_start and str(custom_start).strip():
206
+ df = df[df[date_col] >= pd.to_datetime(custom_start)]
207
+ except (ValueError, TypeError):
208
+ pass
209
+ try:
210
+ if custom_end and str(custom_end).strip():
211
+ df = df[df[date_col] <= pd.to_datetime(custom_end)]
212
+ except (ValueError, TypeError):
213
+ pass
214
+ return df
215
+
216
+
217
+ def _generate_single_chart(df_plot, date_col, active_y, chart_type, palette_colors,
218
+ color_by, period_label, window_size, lag_val, decomp_model,
219
+ freq_info):
220
+ """Generate a single chart figure. Returns ``(fig, error_msg)``."""
221
+ try:
222
+ if chart_type == "Line with Markers":
223
+ return plot_line_with_markers(
224
+ df_plot, date_col, active_y,
225
+ title=f"{active_y} over Time",
226
+ style_dict=_STYLE_DICT, palette_colors=palette_colors,
227
+ ), None
228
+
229
+ elif "Colored Markers" in chart_type and color_by:
230
+ return plot_line_colored_markers(
231
+ df_plot, date_col, active_y,
232
+ color_by=color_by, palette_colors=palette_colors,
233
+ title=f"{active_y} colored by {color_by}",
234
+ style_dict=_STYLE_DICT,
235
+ ), None
236
+
237
+ elif chart_type == "Seasonal Plot":
238
+ return plot_seasonal(
239
+ df_plot, date_col, active_y,
240
+ period=period_label or "month",
241
+ palette_name_colors=palette_colors,
242
+ title=f"Seasonal Plot - {active_y}",
243
+ style_dict=_STYLE_DICT,
244
+ ), None
245
+
246
+ elif chart_type == "Seasonal Sub-series":
247
+ return plot_seasonal_subseries(
248
+ df_plot, date_col, active_y,
249
+ period=period_label or "month",
250
+ title=f"Seasonal Sub-series - {active_y}",
251
+ style_dict=_STYLE_DICT, palette_colors=palette_colors,
252
+ ), None
253
+
254
+ elif chart_type == "ACF / PACF":
255
+ series = df_plot[active_y].dropna()
256
+ acf_vals, acf_ci, pacf_vals, pacf_ci = compute_acf_pacf(series)
257
+ return plot_acf_pacf(
258
+ acf_vals, acf_ci, pacf_vals, pacf_ci,
259
+ title=f"ACF / PACF - {active_y}",
260
+ style_dict=_STYLE_DICT,
261
+ ), None
262
+
263
+ elif chart_type == "Decomposition":
264
+ period_int = None
265
+ if freq_info:
266
+ period_int = {"Monthly": 12, "Quarterly": 4, "Weekly": 52, "Daily": 365}.get(freq_info.label)
267
+ result = compute_decomposition(
268
+ df_plot, date_col, active_y,
269
+ model=decomp_model or "additive", period=period_int,
270
+ )
271
+ return plot_decomposition(
272
+ result,
273
+ title=f"Decomposition - {active_y} ({decomp_model})",
274
+ style_dict=_STYLE_DICT,
275
+ ), None
276
+
277
+ elif chart_type == "Rolling Mean Overlay":
278
+ w = int(window_size) if window_size else 12
279
+ return plot_rolling_overlay(
280
+ df_plot, date_col, active_y,
281
+ window=w,
282
+ title=f"Rolling {w}-pt Mean - {active_y}",
283
+ style_dict=_STYLE_DICT, palette_colors=palette_colors,
284
+ ), None
285
+
286
+ elif chart_type == "Year-over-Year Change":
287
+ yoy_result = compute_yoy_change(df_plot, date_col, active_y)
288
+ yoy_df = pd.DataFrame({
289
+ "date": yoy_result[date_col],
290
+ "abs_change": yoy_result["yoy_abs_change"],
291
+ "pct_change": yoy_result["yoy_pct_change"],
292
+ }).dropna()
293
+ return plot_yoy_change(
294
+ df_plot, date_col, active_y, yoy_df,
295
+ title=f"Year-over-Year Change - {active_y}",
296
+ style_dict=_STYLE_DICT,
297
+ ), None
298
+
299
+ elif chart_type == "Lag Plot":
300
+ lag = int(lag_val) if lag_val else 1
301
+ return plot_lag(
302
+ df_plot[active_y],
303
+ lag=lag,
304
+ title=f"Lag-{lag} Plot - {active_y}",
305
+ style_dict=_STYLE_DICT,
306
+ ), None
307
+
308
+ except Exception as exc:
309
+ return None, str(exc)
310
+
311
+ return None, "Unknown chart type"
312
 
 
 
 
 
 
313
 
314
+ # ---------------------------------------------------------------------------
315
+ # HTML fragments
316
+ # ---------------------------------------------------------------------------
317
 
318
+ _DEVELOPER_CARD = """
319
+ <div class="dev-card">
320
+ <div class="dev-row">
321
+ <svg class="dev-avatar" viewBox="0 0 16 16" aria-hidden="true">
322
+ <path d="M11 6a3 3 0 1 1-6 0 3 3 0 0 1 6 0"/>
323
+ <path fill-rule="evenodd" d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8m8-7a7 7 0 0 0-5.468 11.37c.69-1.198 1.97-2.015 3.526-2.015h3.884c1.556 0 2.835.817 3.526 2.014A7 7 0 0 0 8 1"/>
324
+ </svg>
325
+ <div>
326
+ <div class="dev-name">Fadel M. Megahed</div>
327
+ <div class="dev-role">
328
+ Raymond E. Glos Professor, Farmer School of Business<br>
329
+ Miami University
330
+ </div>
331
+ </div>
332
+ </div>
333
+ <div class="dev-links">
334
+ <a class="dev-link" href="mailto:fmegahed@miamioh.edu">
335
+ <svg viewBox="0 0 16 16"><path d="M0 4a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2V4zm2-1a1 1 0 0 0-1 1v.217l7 4.2 7-4.2V4a1 1 0 0 0-1-1H2zm13 2.383-4.708 2.825L15 11.105zM14.247 12.6 9.114 8.98 8 9.67 6.886 8.98 1.753 12.6A1 1 0 0 0 2 13h12a1 1 0 0 0 .247-.4zM1 11.105l4.708-2.897L1 5.383z"/></svg>
336
+ Email</a>
337
+ <a class="dev-link" href="https://www.linkedin.com/in/fadel-megahed-289046b4/" target="_blank">
338
+ <svg viewBox="0 0 16 16"><path d="M0 1.146C0 .513.526 0 1.175 0h13.65C15.475 0 16 .513 16 1.146v13.708c0 .633-.525 1.146-1.175 1.146H1.175C.526 16 0 15.487 0 14.854zM4.943 13.5V6H2.542v7.5zM3.743 4.927c.837 0 1.358-.554 1.358-1.248-.015-.709-.521-1.248-1.342-1.248-.821 0-1.358.54-1.358 1.248 0 .694.521 1.248 1.327 1.248zm4.908 8.573V9.359c0-.22.016-.44.08-.598.176-.44.576-.897 1.248-.897.88 0 1.232.676 1.232 1.667v4.0h2.401V9.247c0-2.22-1.184-3.252-2.764-3.252-1.274 0-1.845.7-2.165 1.193h.016V6H6.35c.03.7 0 7.5 0 7.5z"/></svg>
339
+ LinkedIn</a>
340
+ <a class="dev-link" href="https://miamioh.edu/fsb/directory/?up=/directory/megahefm" target="_blank">
341
+ <svg viewBox="0 0 16 16"><path d="M8 0a8 8 0 1 0 0 16A8 8 0 0 0 8 0M1.018 7.5h2.49a14 14 0 0 1 .535-3.55A6 6 0 0 0 1.018 7.5m0 1h2.49c.05 1.24.217 2.44.535 3.55a6 6 0 0 1-3.025-3.55m11.964 0a6 6 0 0 1-3.025 3.55c.318-1.11.485-2.31.535-3.55zm0-1a6 6 0 0 0-3.025-3.55c.318 1.11.485 2.31.535 3.55zM8 1.016q.347.372.643.812C9.157 2.6 9.545 3.71 9.757 5H6.243c.212-1.29.6-2.4 1.114-3.172Q7.653 1.388 8 1.016M8 15q-.347-.372-.643-.812C6.843 13.4 6.455 12.29 6.243 11h3.514c-.212 1.29-.6 2.4-1.114 3.172A6 6 0 0 1 8 14.984M5.494 7.5a13 13 0 0 0 0 1h5.012a13 13 0 0 0 0-1z"/></svg>
342
+ Website</a>
343
+ <a class="dev-link" href="https://github.com/fmegahed/" target="_blank">
344
+ <svg viewBox="0 0 16 16"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27s1.36.09 2 .27c1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.01 8.01 0 0 0 16 8c0-4.42-3.58-8-8-8"/></svg>
345
+ GitHub</a>
346
+ </div>
347
+ </div>
348
+ """
349
 
350
+ _WELCOME_MD = """
351
+ # Time Series Visualizer
352
+ *ISA 444 \u00b7 Miami University \u00b7 Farmer School of Business*
353
+
354
+ ---
355
+
356
+ ### Get Started in 3 Steps
357
+
358
+ <div style="display:grid; grid-template-columns:repeat(3, 1fr); gap:1rem; margin:1rem 0;">
359
+ <div class="step-card">
360
+ <div class="step-number">1</div>
361
+ <div class="step-title">Load Data</div>
362
+ <div class="step-desc">Upload a CSV from the sidebar or pick one of the built-in demo datasets.</div>
363
+ </div>
364
+ <div class="step-card">
365
+ <div class="step-number">2</div>
366
+ <div class="step-title">Pick Columns</div>
367
+ <div class="step-desc">Select a date column and one or more numeric value columns. The app auto-detects sensible defaults.</div>
368
+ </div>
369
+ <div class="step-card">
370
+ <div class="step-number">3</div>
371
+ <div class="step-title">Explore</div>
372
+ <div class="step-desc">Choose from 9+ chart types, view summary statistics, and get AI-powered chart interpretation.</div>
373
+ </div>
374
+ </div>
375
+
376
+ ---
377
+
378
+ ### Features
379
+
380
+ | | |
381
+ |:--|:--|
382
+ | **Smart Import** | Auto-detect delimiters, dates, and numeric formats |
383
+ | **9+ Chart Types** | Line, seasonal, ACF/PACF, decomposition, lag, and more |
384
+ | **Multi-Series** | Panel (small multiples) and spaghetti plots |
385
+ | **AI Insights** | GPT vision analyzes your charts and returns structured interpretation |
386
+ | **QueryChat** | Natural-language data filtering powered by DuckDB |
387
+
388
+ ### Good to Know
389
+
390
+ **Privacy** \u2014 All data processing happens in-memory.
391
+ No data is stored on disk. Only chart images (never raw data) are sent to
392
+ OpenAI when you click *Interpret Chart with AI*.
393
+
394
+ **Demo Datasets** \u2014 Three built-in FRED datasets are available in the
395
+ sidebar: Ohio Unemployment Rate (single series), Manufacturing Employment
396
+ for five states in wide format, and the same data in long/stacked format.
397
+ """
398
 
 
 
 
 
 
 
399
 
400
+ # ---------------------------------------------------------------------------
401
+ # Event handlers
402
+ # ---------------------------------------------------------------------------
403
 
404
+ def _process_new_data(df: pd.DataFrame, delim: str | None = None):
405
+ """Shared logic for file upload and demo select.
406
+
407
+ Returns a tuple of values matching ``_DATA_LOAD_OUTPUTS``.
408
+ """
409
+ state = _make_empty_state()
410
+ state["raw_df_original"] = df
411
+
412
+ all_cols = list(df.columns)
413
+ date_suggestions = suggest_date_columns(df)
414
+ default_date = date_suggestions[0] if date_suggestions else all_cols[0]
415
+
416
+ is_long, auto_group, auto_value = detect_long_format(df, default_date)
417
+ fmt = "Long" if is_long else "Wide"
418
+
419
+ other_cols = [c for c in all_cols if c != default_date]
420
+ string_cols = [
421
+ c for c in other_cols
422
+ if df[c].dtype == object or pd.api.types.is_string_dtype(df[c])
423
+ ]
424
+ numeric_cols = [
425
+ c for c in other_cols if pd.api.types.is_numeric_dtype(df[c])
426
+ ]
427
+
428
+ group_default = (
429
+ auto_group if auto_group and auto_group in string_cols
430
+ else (string_cols[0] if string_cols else None)
431
+ )
432
+ value_options = [c for c in numeric_cols if c != group_default] if group_default else numeric_cols
433
+ value_default = (
434
+ auto_value if auto_value and auto_value in value_options
435
+ else (value_options[0] if value_options else None)
436
+ )
437
 
438
+ # Compute initial y_cols
439
+ if is_long and group_default and value_default:
440
+ try:
441
+ effective = pivot_long_to_wide(df, default_date, group_default, value_default)
442
+ available_y = [c for c in effective.columns if c != default_date]
443
+ except Exception:
444
+ available_y = list(numeric_cols)
445
+ else:
446
+ numeric_suggest = suggest_numeric_columns(df)
447
+ available_y = [c for c in numeric_suggest if c != default_date]
448
+
449
+ default_y = available_y[:4] if available_y else []
450
+ delim_text = f"Detected delimiter: `{repr(delim)}`" if delim else ""
451
+
452
+ return (
453
+ state, # app_state
454
+ gr.Column(visible=True), # setup_col
455
+ gr.Dropdown(choices=all_cols, value=default_date), # date_col_dd
456
+ gr.Radio(value=fmt), # format_radio
457
+ gr.Column(visible=is_long), # long_col
458
+ gr.Dropdown(choices=string_cols, value=group_default), # group_col_dd
459
+ gr.Dropdown(choices=value_options, value=value_default), # value_col_dd
460
+ gr.CheckboxGroup(choices=available_y, value=default_y), # y_cols_cbg
461
+ delim_text, # delim_md
462
+ gr.Column(visible=True), # welcome_col
463
+ gr.Column(visible=False), # analysis_col
464
+ )
465
 
 
 
 
 
 
 
 
 
466
 
467
+ def on_file_upload(file_obj, state):
468
+ if file_obj is None:
469
+ empty = _make_empty_state()
470
+ return (
471
+ empty,
472
+ gr.Column(visible=False), gr.Dropdown(), gr.Radio(),
473
+ gr.Column(visible=False), gr.Dropdown(), gr.Dropdown(),
474
+ gr.CheckboxGroup(choices=[], value=[]), "",
475
+ gr.Column(visible=True), gr.Column(visible=False),
476
+ )
477
+ path = file_obj if isinstance(file_obj, str) else str(file_obj)
478
+ df, delim = _read_file_to_df(path)
479
+ return _process_new_data(df, delim)
480
+
481
+
482
+ def on_demo_select(choice, state):
483
+ if choice == "(none)" or choice is None:
484
+ return (
485
+ state,
486
+ gr.Column(), gr.Dropdown(), gr.Radio(),
487
+ gr.Column(), gr.Dropdown(), gr.Dropdown(),
488
+ gr.CheckboxGroup(), "",
489
+ gr.Column(), gr.Column(),
490
+ )
491
+ demo_path = _DEMO_FILES[choice]
492
+ df = pd.read_csv(demo_path)
493
+ return _process_new_data(df, None)
494
+
495
+
496
+ def on_format_change(fmt):
497
+ return gr.Column(visible=(fmt == "Long"))
498
+
499
+
500
+ def on_long_cols_change(date_col, group_col, value_col, state):
501
+ raw_df = state.get("raw_df_original")
502
+ if raw_df is None or not group_col or not value_col:
503
+ return gr.CheckboxGroup()
504
+ try:
505
+ effective = pivot_long_to_wide(raw_df, date_col, group_col, value_col)
506
+ available = [c for c in effective.columns if c != date_col]
507
+ return gr.CheckboxGroup(choices=available, value=available[:4])
508
+ except Exception:
509
+ return gr.CheckboxGroup(choices=[], value=[])
510
+
511
+
512
+ def on_apply_setup(state, date_col, data_format, group_col, value_col,
513
+ y_cols, dup_action, missing_action, freq_override):
514
+ if not y_cols:
515
+ return (
516
+ state,
517
+ gr.Column(visible=True), gr.Column(visible=False),
518
+ "*Select at least one value column.*", "",
519
+ gr.Dropdown(), gr.Dropdown(),
520
+ None, "", "",
521
+ gr.CheckboxGroup(), None, "", "",
522
+ gr.CheckboxGroup(), gr.Dropdown(), None, "", "",
523
+ )
524
 
525
+ raw_df = state.get("raw_df_original")
526
+ if raw_df is None:
527
+ return (
528
+ state,
529
+ gr.Column(visible=True), gr.Column(visible=False),
530
+ "*No data loaded.*", "",
531
+ gr.Dropdown(), gr.Dropdown(),
532
+ None, "", "",
533
+ gr.CheckboxGroup(), None, "", "",
534
+ gr.CheckboxGroup(), gr.Dropdown(), None, "", "",
535
+ )
536
 
537
+ # Pivot if long format
538
+ if data_format == "Long" and group_col and value_col:
539
+ effective_df = pivot_long_to_wide(raw_df, date_col, group_col, value_col)
540
+ else:
541
+ effective_df = raw_df
542
 
543
+ # Clean
544
+ cleaned, report = clean_dataframe(
545
+ effective_df, date_col, list(y_cols),
546
+ dup_action=dup_action, missing_action=missing_action,
547
+ )
548
  freq = detect_frequency(cleaned, date_col)
549
  cleaned = add_time_features(cleaned, date_col)
 
 
550
 
551
+ if freq_override and freq_override.strip():
552
+ freq = FrequencyInfo(
553
+ label=freq_override.strip(),
554
+ median_delta=freq.median_delta,
555
+ is_regular=freq.is_regular,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
556
  )
557
 
558
+ state["cleaned_df"] = cleaned
559
+ state["cleaning_report"] = report
560
+ state["freq_info"] = freq
561
+ state["date_col"] = date_col
562
+ state["y_cols"] = list(y_cols)
563
+ state["setup_applied"] = True
564
+ state["single_png"] = None
565
+ state["panel_png"] = None
566
+ state["spag_png"] = None
567
+
568
+ # Create QueryChat instance if available
569
+ if check_querychat_available():
570
+ try:
571
+ state["qc"] = create_querychat(
572
+ cleaned, name="uploaded_data",
573
+ date_col=date_col, y_cols=list(y_cols),
574
+ freq_label=freq.label,
575
+ )
576
+ except Exception:
577
+ state["qc"] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  else:
579
+ state["qc"] = None
580
+
581
+ quality_md = _format_cleaning_report_md(report)
582
+ freq_text = f"Frequency: **{freq.label}** ({'regular' if freq.is_regular else 'irregular'})"
583
+
584
+ # Color-by choices
585
+ color_by_choices = []
586
+ if "month" in cleaned.columns:
587
+ color_by_choices = ["month", "quarter", "year", "day_of_week"]
588
+
589
+ y_list = list(y_cols)
590
+ panel_default = y_list[:4] if len(y_list) >= 2 else y_list
591
+ highlight_choices = ["(none)"] + y_list
592
+
593
+ return (
594
+ state, # 0 app_state
595
+ gr.Column(visible=False), # 1 welcome_col
596
+ gr.Column(visible=True), # 2 analysis_col
597
+ quality_md, # 3 quality_md
598
+ freq_text, # 4 freq_info_md
599
+ # Single series tab
600
+ gr.Dropdown(choices=y_list, value=y_list[0]), # 5 single_y_dd
601
+ gr.Dropdown(choices=color_by_choices,
602
+ value=color_by_choices[0] if color_by_choices else None),# 6 color_by_dd
603
+ None, # 7 single_plot
604
+ "", # 8 single_stats_md
605
+ "", # 9 single_interp_md
606
+ # Panel tab
607
+ gr.CheckboxGroup(choices=y_list, value=panel_default), # 10 panel_cols_cbg
608
+ None, # 11 panel_plot
609
+ "", # 12 panel_summary_md
610
+ "", # 13 panel_interp_md
611
+ # Spaghetti tab
612
+ gr.CheckboxGroup(choices=y_list, value=y_list), # 14 spag_cols_cbg
613
+ gr.Dropdown(choices=highlight_choices, value="(none)"), # 15 spag_highlight_dd
614
+ None, # 16 spag_plot
615
+ "", # 17 spag_summary_md
616
+ "", # 18 spag_interp_md
617
+ )
618
 
619
 
620
+ # ---- Visibility toggles ----
 
 
 
 
 
 
621
 
622
+ def on_dr_mode_change(mode):
623
+ return (
624
+ gr.Column(visible=(mode == "Last N years")),
625
+ gr.Column(visible=(mode == "Custom")),
626
+ )
627
 
 
 
628
 
629
+ def on_chart_type_change(chart_type):
630
+ return (
631
+ gr.Column(visible=("Colored Markers" in chart_type)),
632
+ gr.Column(visible=(chart_type in ("Seasonal Plot", "Seasonal Sub-series"))),
633
+ gr.Column(visible=(chart_type == "Rolling Mean Overlay")),
634
+ gr.Column(visible=(chart_type == "Lag Plot")),
635
+ gr.Column(visible=(chart_type == "Decomposition")),
636
  )
637
 
638
 
639
+ def on_palette_change(pal_name):
640
+ colors = get_palette_colors(pal_name, 8)
641
+ return render_palette_preview(colors)
 
 
 
 
642
 
 
643
 
644
+ # ---- Single series ----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
 
646
+ def on_single_update(state, y_col, dr_mode, dr_n, dr_start, dr_end,
647
+ chart_type, palette_name, color_by, period,
648
+ window, lag, decomp_model):
649
+ cleaned_df = state.get("cleaned_df")
650
+ date_col = state.get("date_col")
651
+ freq_info = state.get("freq_info")
652
 
653
+ if cleaned_df is None or not y_col:
654
+ return state, None, "*No data. Apply setup first.*"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
 
656
+ palette_colors = get_palette_colors(palette_name, 12)
657
+ df_plot = _apply_date_filter(cleaned_df.copy(), date_col, dr_mode, dr_n, dr_start, dr_end)
 
 
658
 
659
+ if df_plot.empty:
660
+ return state, None, "*No data in selected range.*"
661
 
662
+ fig, err = _generate_single_chart(
663
+ df_plot, date_col, y_col, chart_type, palette_colors,
664
+ color_by, period, window, lag, decomp_model, freq_info,
665
+ )
 
 
 
666
 
667
+ if err:
668
+ return state, None, f"**Chart error:** {err}"
669
 
670
+ # Summary stats
671
+ stats = compute_summary_stats(df_plot, date_col, y_col)
672
+ stats_md = _format_summary_stats_md(stats)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
 
674
+ # Store PNG for AI interpretation
675
+ state["single_png"] = fig_to_png_bytes(fig) if fig else None
 
676
 
677
+ return state, fig, stats_md
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
678
 
 
 
 
679
 
680
+ def on_single_interpret(state):
681
+ png = state.get("single_png")
682
+ if not png:
683
+ return "*Generate a chart first, then click Interpret.*"
684
+ if not check_api_key_available():
685
+ return "*Set `OPENAI_API_KEY` to enable AI interpretation.*"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
686
 
687
+ freq_info = state.get("freq_info")
688
+ metadata = {
689
+ "chart_type": "single series",
690
+ "frequency_label": freq_info.label if freq_info else "Unknown",
691
+ "y_column": state.get("y_cols", [""])[0],
692
+ }
693
+ interp = interpret_chart(png, metadata)
694
+ return render_interpretation_markdown(interp)
695
 
696
 
697
+ # ---- Panel ----
 
 
 
 
 
698
 
699
+ def on_panel_update(state, panel_cols, panel_chart, shared_y, palette_name):
700
+ cleaned_df = state.get("cleaned_df")
701
+ date_col = state.get("date_col")
 
 
 
 
 
 
 
 
 
702
 
703
+ if cleaned_df is None or not panel_cols or len(panel_cols) < 2:
704
+ return state, None, "*Select 2+ columns and apply setup first.*"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
 
706
+ palette_colors = get_palette_colors(palette_name, len(panel_cols))
707
 
708
+ try:
709
+ fig = plot_panel(
710
+ cleaned_df, date_col, list(panel_cols),
711
+ chart_type=panel_chart, shared_y=shared_y,
712
+ title="Panel Comparison",
713
+ style_dict=_STYLE_DICT, palette_colors=palette_colors,
 
714
  )
715
+ summary_df = compute_multi_series_summary(cleaned_df, date_col, list(panel_cols))
716
+ summary_md = _format_multi_summary_md(summary_df)
717
+ state["panel_png"] = fig_to_png_bytes(fig)
718
+ return state, fig, summary_md
719
+ except Exception as exc:
720
+ return state, None, f"**Panel chart error:** {exc}"
721
+
722
+
723
+ def on_panel_interpret(state):
724
+ png = state.get("panel_png")
725
+ if not png:
726
+ return "*Generate a panel chart first, then click Interpret.*"
727
+ if not check_api_key_available():
728
+ return "*Set `OPENAI_API_KEY` to enable AI interpretation.*"
729
+
730
+ freq_info = state.get("freq_info")
731
+ metadata = {
732
+ "chart_type": "panel (small multiples)",
733
+ "frequency_label": freq_info.label if freq_info else "Unknown",
734
+ "y_column": ", ".join(state.get("y_cols", [])),
735
+ }
736
+ interp = interpret_chart(png, metadata)
737
+ return render_interpretation_markdown(interp)
738
+
739
+
740
+ # ---- Spaghetti ----
741
+
742
+ def on_spag_update(state, spag_cols, alpha, topn, highlight, show_median, palette_name):
743
+ cleaned_df = state.get("cleaned_df")
744
+ date_col = state.get("date_col")
745
+
746
+ if cleaned_df is None or not spag_cols or len(spag_cols) < 2:
747
+ return state, None, "*Select 2+ columns and apply setup first.*"
748
+
749
+ highlight_col = highlight if highlight and highlight != "(none)" else None
750
+ top_n = int(topn) if topn and int(topn) > 0 else None
751
+ palette_colors = get_palette_colors(palette_name, len(spag_cols))
752
+
753
+ try:
754
+ fig = plot_spaghetti(
755
+ cleaned_df, date_col, list(spag_cols),
756
+ alpha=float(alpha),
757
+ highlight_col=highlight_col,
758
+ top_n=top_n,
759
+ show_median_band=bool(show_median),
760
+ title="Spaghetti Plot",
761
+ style_dict=_STYLE_DICT, palette_colors=palette_colors,
762
+ )
763
+ summary_df = compute_multi_series_summary(cleaned_df, date_col, list(spag_cols))
764
+ summary_md = _format_multi_summary_md(summary_df)
765
+ state["spag_png"] = fig_to_png_bytes(fig)
766
+ return state, fig, summary_md
767
+ except Exception as exc:
768
+ return state, None, f"**Spaghetti chart error:** {exc}"
769
+
770
+
771
+ def on_spag_interpret(state):
772
+ png = state.get("spag_png")
773
+ if not png:
774
+ return "*Generate a spaghetti chart first, then click Interpret.*"
775
+ if not check_api_key_available():
776
+ return "*Set `OPENAI_API_KEY` to enable AI interpretation.*"
777
+
778
+ freq_info = state.get("freq_info")
779
+ metadata = {
780
+ "chart_type": "spaghetti (overlay)",
781
+ "frequency_label": freq_info.label if freq_info else "Unknown",
782
+ "y_column": ", ".join(state.get("y_cols", [])),
783
+ }
784
+ interp = interpret_chart(png, metadata)
785
+ return render_interpretation_markdown(interp)
786
 
 
 
 
 
 
 
 
 
 
 
787
 
788
  # ---------------------------------------------------------------------------
789
+ # Build the Gradio app
790
  # ---------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
791
 
792
+ with gr.Blocks(
793
+ title="Time Series Visualizer",
794
+ ) as demo:
795
+
796
+ app_state = gr.State(_make_empty_state())
797
+
798
+ # ===================================================================
799
+ # Sidebar
800
+ # ===================================================================
801
+ with gr.Sidebar():
802
+ gr.HTML(
803
+ '<div class="app-title">'
804
+ '<span class="title-text">Time Series Visualizer</span><br>'
805
+ '<span class="subtitle-text">ISA 444 &middot; Miami University</span>'
806
+ '</div>'
807
+ )
808
+ gr.Markdown("**Vibe-Coded By**")
809
+ gr.HTML(_DEVELOPER_CARD)
810
+ gr.Markdown("v0.2.0 &middot; Last updated Feb 2026", elem_classes=["caption"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
811
 
812
+ gr.Markdown("---")
813
+ gr.Markdown("### Data Input")
814
 
815
+ file_upload = gr.File(
816
+ label="Upload a CSV file",
817
+ file_types=[".csv", ".tsv", ".txt"],
818
+ type="filepath",
819
+ )
820
+ demo_dd = gr.Dropdown(
821
+ label="Or load a demo dataset",
822
+ choices=_DEMO_CHOICES,
823
+ value="(none)",
824
+ )
825
+ reset_btn = gr.Button("Reset all", variant="secondary", size="sm")
826
+ delim_md = gr.Markdown("")
827
+
828
+ # ---- Setup controls (hidden until data loaded) ----
829
+ with gr.Column(visible=False) as setup_col:
830
+ gr.Markdown("---")
831
+ gr.Markdown("### Column & Cleaning Setup")
832
+ gr.Markdown("*Configure below, then click **Apply setup**.*")
833
+
834
+ date_col_dd = gr.Dropdown(label="Date column", choices=[])
835
+ format_radio = gr.Radio(
836
+ label="Data format", choices=["Wide", "Long"], value="Wide",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
  )
838
 
839
+ with gr.Column(visible=False) as long_col:
840
+ group_col_dd = gr.Dropdown(label="Group column", choices=[])
841
+ value_col_dd = gr.Dropdown(label="Value column", choices=[])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
842
 
843
+ y_cols_cbg = gr.CheckboxGroup(label="Value column(s)", choices=[])
844
 
845
+ gr.Markdown("**Cleaning options**")
846
+ dup_dd = gr.Dropdown(
847
+ label="Duplicate dates",
848
+ choices=["keep_last", "keep_first", "drop_all"],
849
+ value="keep_last",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
850
  )
851
+ missing_dd = gr.Dropdown(
852
+ label="Missing values",
853
+ choices=["interpolate", "ffill", "drop"],
854
+ value="interpolate",
855
  )
856
+ freq_tb = gr.Textbox(
857
+ label="Override frequency label (optional)",
858
+ placeholder="e.g. Daily, Weekly, Monthly",
 
859
  )
860
+ apply_btn = gr.Button("Apply setup", variant="primary")
861
+ freq_info_md = gr.Markdown("")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
862
 
863
+ # ---- QueryChat placeholder ----
864
+ with gr.Column(visible=False) as qc_col:
865
+ gr.Markdown("---")
866
+ gr.Markdown("### QueryChat")
867
  if check_querychat_available():
868
+ gr.Markdown(
869
+ "QueryChat natural-language filtering is available. "
870
+ "Use the chat below to filter your dataset."
 
 
 
871
  )
 
 
 
 
 
872
  else:
873
+ gr.Markdown(
874
+ "*Set `OPENAI_API_KEY` and install `querychat[gradio]` "
875
+ "to enable natural-language data filtering.*"
 
876
  )
 
 
 
 
 
 
 
877
 
878
+ # ===================================================================
879
+ # Welcome screen
880
+ # ===================================================================
881
+ with gr.Column(visible=True) as welcome_col:
882
+ gr.Markdown(_WELCOME_MD)
883
+
884
+ # ===================================================================
885
+ # Analysis panel (hidden until setup applied)
886
+ # ===================================================================
887
+ with gr.Column(visible=False) as analysis_col:
888
+ with gr.Accordion("Data Quality Report", open=False):
889
+ quality_md = gr.Markdown("")
890
+
891
+ with gr.Tabs():
892
+ # ---------------------------------------------------------------
893
+ # Tab: Single Series
894
+ # ---------------------------------------------------------------
895
+ with gr.Tab("Single Series"):
896
+ with gr.Row():
897
+ with gr.Column(scale=1, min_width=280):
898
+ single_y_dd = gr.Dropdown(label="Value column", choices=[])
899
+ dr_mode_radio = gr.Radio(
900
+ label="Date range",
901
+ choices=["All", "Last N years", "Custom"],
902
+ value="All",
903
+ )
904
+ with gr.Column(visible=False) as dr_n_col:
905
+ dr_n_slider = gr.Slider(
906
+ label="Years", minimum=1, maximum=20,
907
+ value=5, step=1,
908
+ )
909
+ with gr.Column(visible=False) as dr_custom_col:
910
+ dr_start_tb = gr.Textbox(label="Start date", placeholder="YYYY-MM-DD")
911
+ dr_end_tb = gr.Textbox(label="End date", placeholder="YYYY-MM-DD")
912
+
913
+ single_chart_dd = gr.Dropdown(
914
+ label="Chart type", choices=_CHART_TYPES,
915
+ value=_CHART_TYPES[0],
916
+ )
917
+ single_pal_dd = gr.Dropdown(
918
+ label="Color palette", choices=_PALETTE_NAMES,
919
+ value=_PALETTE_NAMES[0],
920
+ )
921
+ single_swatch = gr.Plot(label="Palette preview", show_label=False)
922
+
923
+ with gr.Column(visible=False) as color_by_col:
924
+ color_by_dd = gr.Dropdown(
925
+ label="Color by",
926
+ choices=["month", "quarter", "year", "day_of_week"],
927
+ )
928
+ with gr.Column(visible=False) as period_col:
929
+ period_dd = gr.Dropdown(
930
+ label="Period", choices=["month", "quarter"],
931
+ value="month",
932
+ )
933
+ with gr.Column(visible=False) as window_col:
934
+ window_slider = gr.Slider(
935
+ label="Window", minimum=2, maximum=52,
936
+ value=12, step=1,
937
+ )
938
+ with gr.Column(visible=False) as lag_col:
939
+ lag_slider = gr.Slider(
940
+ label="Lag", minimum=1, maximum=52,
941
+ value=1, step=1,
942
+ )
943
+ with gr.Column(visible=False) as decomp_col:
944
+ decomp_dd = gr.Dropdown(
945
+ label="Model",
946
+ choices=["additive", "multiplicative"],
947
+ value="additive",
948
+ )
949
+ single_update_btn = gr.Button("Update chart", variant="primary")
950
+
951
+ with gr.Column(scale=3):
952
+ single_plot = gr.Plot(label="Chart")
953
+ with gr.Accordion("Summary Statistics", open=False):
954
+ single_stats_md = gr.Markdown("")
955
+ with gr.Accordion("AI Chart Interpretation", open=False):
956
+ gr.Markdown(
957
+ "*The chart image (PNG) is sent to OpenAI for "
958
+ "interpretation. Do not include sensitive data.*"
959
+ )
960
+ single_interp_btn = gr.Button(
961
+ "Interpret Chart with AI", variant="secondary",
962
+ )
963
+ single_interp_md = gr.Markdown("")
964
+
965
+ # ---------------------------------------------------------------
966
+ # Tab: Few Series (Panel)
967
+ # ---------------------------------------------------------------
968
+ with gr.Tab("Few Series (Panel)"):
969
+ with gr.Row():
970
+ with gr.Column(scale=1, min_width=280):
971
+ panel_cols_cbg = gr.CheckboxGroup(
972
+ label="Columns to plot", choices=[],
973
+ )
974
+ panel_chart_dd = gr.Dropdown(
975
+ label="Chart type", choices=["line", "bar"],
976
+ value="line",
977
+ )
978
+ panel_shared_cb = gr.Checkbox(
979
+ label="Shared Y axis", value=True,
980
+ )
981
+ panel_pal_dd = gr.Dropdown(
982
+ label="Color palette", choices=_PALETTE_NAMES,
983
+ value=_PALETTE_NAMES[0],
984
+ )
985
+ panel_update_btn = gr.Button("Update chart", variant="primary")
986
+
987
+ with gr.Column(scale=3):
988
+ panel_plot = gr.Plot(label="Panel Chart")
989
+ with gr.Accordion("Per-series Summary", open=False):
990
+ panel_summary_md = gr.Markdown("")
991
+ with gr.Accordion("AI Chart Interpretation", open=False):
992
+ gr.Markdown(
993
+ "*The chart image (PNG) is sent to OpenAI for "
994
+ "interpretation. Do not include sensitive data.*"
995
+ )
996
+ panel_interp_btn = gr.Button(
997
+ "Interpret Chart with AI", variant="secondary",
998
+ )
999
+ panel_interp_md = gr.Markdown("")
1000
+
1001
+ # ---------------------------------------------------------------
1002
+ # Tab: Many Series (Spaghetti)
1003
+ # ---------------------------------------------------------------
1004
+ with gr.Tab("Many Series (Spaghetti)"):
1005
+ with gr.Row():
1006
+ with gr.Column(scale=1, min_width=280):
1007
+ spag_cols_cbg = gr.CheckboxGroup(
1008
+ label="Columns to include", choices=[],
1009
+ )
1010
+ spag_alpha_slider = gr.Slider(
1011
+ label="Alpha (opacity)",
1012
+ minimum=0.05, maximum=1.0, value=0.15, step=0.05,
1013
+ )
1014
+ spag_topn_num = gr.Number(
1015
+ label="Highlight top N (0 = none)", value=0,
1016
+ minimum=0, precision=0,
1017
+ )
1018
+ spag_highlight_dd = gr.Dropdown(
1019
+ label="Highlight series",
1020
+ choices=["(none)"], value="(none)",
1021
+ )
1022
+ spag_median_cb = gr.Checkbox(
1023
+ label="Show Median + IQR band", value=False,
1024
+ )
1025
+ spag_pal_dd = gr.Dropdown(
1026
+ label="Color palette", choices=_PALETTE_NAMES,
1027
+ value=_PALETTE_NAMES[0],
1028
+ )
1029
+ spag_update_btn = gr.Button("Update chart", variant="primary")
1030
+
1031
+ with gr.Column(scale=3):
1032
+ spag_plot = gr.Plot(label="Spaghetti Chart")
1033
+ with gr.Accordion("Per-series Summary", open=False):
1034
+ spag_summary_md = gr.Markdown("")
1035
+ with gr.Accordion("AI Chart Interpretation", open=False):
1036
+ gr.Markdown(
1037
+ "*The chart image (PNG) is sent to OpenAI for "
1038
+ "interpretation. Do not include sensitive data.*"
1039
+ )
1040
+ spag_interp_btn = gr.Button(
1041
+ "Interpret Chart with AI", variant="secondary",
1042
+ )
1043
+ spag_interp_md = gr.Markdown("")
1044
+
1045
+ # ===================================================================
1046
+ # Event wiring
1047
+ # ===================================================================
1048
+
1049
+ _DATA_LOAD_OUTPUTS = [
1050
+ app_state, setup_col, date_col_dd, format_radio, long_col,
1051
+ group_col_dd, value_col_dd, y_cols_cbg, delim_md,
1052
+ welcome_col, analysis_col,
1053
+ ]
1054
+
1055
+ file_upload.change(
1056
+ on_file_upload,
1057
+ inputs=[file_upload, app_state],
1058
+ outputs=_DATA_LOAD_OUTPUTS,
1059
+ )
1060
 
1061
+ demo_dd.change(
1062
+ on_demo_select,
1063
+ inputs=[demo_dd, app_state],
1064
+ outputs=_DATA_LOAD_OUTPUTS,
1065
+ )
1066
+
1067
+ # Reset via page reload
1068
+ reset_btn.click(fn=None, js="() => { window.location.reload(); }")
1069
+
1070
+ # Format toggle
1071
+ format_radio.change(
1072
+ on_format_change,
1073
+ inputs=[format_radio],
1074
+ outputs=[long_col],
1075
+ )
1076
+
1077
+ # Long-format column changes update y_cols
1078
+ for _comp in [group_col_dd, value_col_dd]:
1079
+ _comp.change(
1080
+ on_long_cols_change,
1081
+ inputs=[date_col_dd, group_col_dd, value_col_dd, app_state],
1082
+ outputs=[y_cols_cbg],
 
 
 
 
 
 
 
1083
  )
1084
 
1085
+ # Apply setup
1086
+ _APPLY_OUTPUTS = [
1087
+ app_state, # 0
1088
+ welcome_col, # 1
1089
+ analysis_col, # 2
1090
+ quality_md, # 3
1091
+ freq_info_md, # 4
1092
+ # Single
1093
+ single_y_dd, # 5
1094
+ color_by_dd, # 6
1095
+ single_plot, # 7
1096
+ single_stats_md, # 8
1097
+ single_interp_md, # 9
1098
+ # Panel
1099
+ panel_cols_cbg, # 10
1100
+ panel_plot, # 11
1101
+ panel_summary_md, # 12
1102
+ panel_interp_md, # 13
1103
+ # Spaghetti
1104
+ spag_cols_cbg, # 14
1105
+ spag_highlight_dd, # 15
1106
+ spag_plot, # 16
1107
+ spag_summary_md, # 17
1108
+ spag_interp_md, # 18
1109
+ ]
1110
+
1111
+ apply_btn.click(
1112
+ on_apply_setup,
1113
+ inputs=[
1114
+ app_state, date_col_dd, format_radio, group_col_dd,
1115
+ value_col_dd, y_cols_cbg, dup_dd, missing_dd, freq_tb,
1116
+ ],
1117
+ outputs=_APPLY_OUTPUTS,
1118
+ )
1119
 
1120
+ # Date range mode visibility
1121
+ dr_mode_radio.change(
1122
+ on_dr_mode_change,
1123
+ inputs=[dr_mode_radio],
1124
+ outputs=[dr_n_col, dr_custom_col],
1125
+ )
1126
+
1127
+ # Chart type conditional controls
1128
+ single_chart_dd.change(
1129
+ on_chart_type_change,
1130
+ inputs=[single_chart_dd],
1131
+ outputs=[color_by_col, period_col, window_col, lag_col, decomp_col],
1132
+ )
1133
+
1134
+ # Palette swatch preview
1135
+ single_pal_dd.change(on_palette_change, [single_pal_dd], [single_swatch])
1136
+
1137
+ # Initialise swatch on load
1138
+ demo.load(on_palette_change, [single_pal_dd], [single_swatch])
1139
+
1140
+ # ---- Single series chart + stats ----
1141
+ single_update_btn.click(
1142
+ on_single_update,
1143
+ inputs=[
1144
+ app_state, single_y_dd, dr_mode_radio, dr_n_slider,
1145
+ dr_start_tb, dr_end_tb, single_chart_dd, single_pal_dd,
1146
+ color_by_dd, period_dd, window_slider, lag_slider, decomp_dd,
1147
+ ],
1148
+ outputs=[app_state, single_plot, single_stats_md],
1149
+ )
1150
+
1151
+ single_interp_btn.click(
1152
+ on_single_interpret,
1153
+ inputs=[app_state],
1154
+ outputs=[single_interp_md],
1155
+ )
1156
+
1157
+ # ---- Panel chart + stats ----
1158
+ panel_update_btn.click(
1159
+ on_panel_update,
1160
+ inputs=[app_state, panel_cols_cbg, panel_chart_dd, panel_shared_cb, panel_pal_dd],
1161
+ outputs=[app_state, panel_plot, panel_summary_md],
1162
+ )
1163
+
1164
+ panel_interp_btn.click(
1165
+ on_panel_interpret,
1166
+ inputs=[app_state],
1167
+ outputs=[panel_interp_md],
1168
+ )
1169
+
1170
+ # ---- Spaghetti chart + stats ----
1171
+ spag_update_btn.click(
1172
+ on_spag_update,
1173
+ inputs=[
1174
+ app_state, spag_cols_cbg, spag_alpha_slider, spag_topn_num,
1175
+ spag_highlight_dd, spag_median_cb, spag_pal_dd,
1176
+ ],
1177
+ outputs=[app_state, spag_plot, spag_summary_md],
1178
+ )
1179
+
1180
+ spag_interp_btn.click(
1181
+ on_spag_interpret,
1182
+ inputs=[app_state],
1183
+ outputs=[spag_interp_md],
1184
+ )
1185
 
 
 
1186
 
1187
  # ---------------------------------------------------------------------------
1188
+ # Launch
1189
  # ---------------------------------------------------------------------------
1190
+ if __name__ == "__main__":
1191
+ demo.launch(
1192
+ server_name="0.0.0.0",
1193
+ server_port=7860,
1194
+ theme=MiamiTheme(),
1195
+ css=get_miami_css(),
 
 
 
 
 
 
 
 
 
1196
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- streamlit==1.54.0
2
  pandas==2.3.3
3
  numpy==2.4.2
4
  matplotlib==3.10.8
5
  statsmodels==0.14.6
6
  scipy==1.17.0
7
  openai==2.2.0
8
- querychat[streamlit]==0.5.1
9
  duckdb==1.4.4
10
  palettable==3.3.3
11
  pydantic==2.12.5
 
1
+ gradio>=6.0.0
2
  pandas==2.3.3
3
  numpy==2.4.2
4
  matplotlib==3.10.8
5
  statsmodels==0.14.6
6
  scipy==1.17.0
7
  openai==2.2.0
8
+ querychat[gradio]==0.5.1
9
  duckdb==1.4.4
10
  palettable==3.3.3
11
  pydantic==2.12.5
src/ai_interpretation.py CHANGED
@@ -7,7 +7,7 @@ Pydantic structured output.
7
  Provides:
8
  - Pydantic models for structured chart analysis results
9
  - Vision-based chart interpretation via OpenAI's GPT-5.2 model
10
- - Streamlit rendering of interpretation results
11
  """
12
 
13
  from __future__ import annotations
@@ -19,7 +19,6 @@ from typing import Literal
19
 
20
  import openai
21
  from pydantic import BaseModel, ConfigDict
22
- import streamlit as st
23
 
24
 
25
  # ---------------------------------------------------------------------------
@@ -184,7 +183,7 @@ def interpret_chart(
184
 
185
 
186
  # ---------------------------------------------------------------------------
187
- # Streamlit rendering
188
  # ---------------------------------------------------------------------------
189
 
190
  _DIRECTION_EMOJI = {
@@ -201,69 +200,75 @@ _SEVERITY_COLOR = {
201
  }
202
 
203
 
204
- def render_interpretation(interp: ChartInterpretation) -> None:
205
- """Render a :class:`ChartInterpretation` as a styled Streamlit card.
206
 
207
- Uses ``st.markdown``, ``st.expander``, and related widgets to lay out
208
- the interpretation in an easy-to-read format with sections for trend,
209
- seasonality, stationarity, anomalies, key observations, summary, and
210
- recommendations.
211
  """
 
212
 
213
- st.markdown("### AI Chart Interpretation")
214
- st.markdown(
215
- f"**Detected chart type:** {interp.chart_type_detected}"
216
- )
217
 
218
  # ---- Summary ----------------------------------------------------------
219
- st.markdown("---")
220
- st.markdown(f"**Summary:** {interp.summary}")
 
221
 
222
  # ---- Key observations -------------------------------------------------
223
- with st.expander("Key Observations", expanded=True):
224
- for obs in interp.key_observations:
225
- st.markdown(f"- {obs}")
 
226
 
227
  # ---- Trend ------------------------------------------------------------
228
- with st.expander("Trend Analysis"):
229
- arrow = _DIRECTION_EMOJI.get(interp.trend.direction, "")
230
- st.markdown(
231
- f"**Direction:** {interp.trend.direction.capitalize()} {arrow}"
232
- )
233
- st.markdown(interp.trend.description)
234
 
235
  # ---- Seasonality ------------------------------------------------------
236
- with st.expander("Seasonality"):
237
- status = "Detected" if interp.seasonality.detected else "Not detected"
238
- st.markdown(f"**Status:** {status}")
239
- if interp.seasonality.period:
240
- st.markdown(f"**Period:** {interp.seasonality.period}")
241
- st.markdown(interp.seasonality.description)
 
 
242
 
243
  # ---- Stationarity -----------------------------------------------------
244
- with st.expander("Stationarity"):
245
- label = (
246
- "Likely stationary"
247
- if interp.stationarity.likely_stationary
248
- else "Likely non-stationary"
249
- )
250
- st.markdown(f"**Assessment:** {label}")
251
- st.markdown(interp.stationarity.description)
 
 
252
 
253
  # ---- Anomalies --------------------------------------------------------
254
- with st.expander("Anomalies"):
255
- if not interp.anomalies:
256
- st.markdown("No anomalies detected.")
257
- else:
258
- for anomaly in interp.anomalies:
259
- color = _SEVERITY_COLOR.get(anomaly.severity, "gray")
260
- st.markdown(
261
- f"- **[{anomaly.approximate_location}]** "
262
- f":{color}[{anomaly.severity.upper()}] "
263
- f"-- {anomaly.description}"
264
- )
265
 
266
  # ---- Recommendations --------------------------------------------------
267
- with st.expander("Recommended Next Steps"):
268
- for rec in interp.recommendations:
269
- st.markdown(f"1. {rec}")
 
 
 
7
  Provides:
8
  - Pydantic models for structured chart analysis results
9
  - Vision-based chart interpretation via OpenAI's GPT-5.2 model
10
+ - Markdown rendering of interpretation results (framework-agnostic)
11
  """
12
 
13
  from __future__ import annotations
 
19
 
20
  import openai
21
  from pydantic import BaseModel, ConfigDict
 
22
 
23
 
24
  # ---------------------------------------------------------------------------
 
183
 
184
 
185
  # ---------------------------------------------------------------------------
186
+ # Markdown rendering (framework-agnostic)
187
  # ---------------------------------------------------------------------------
188
 
189
  _DIRECTION_EMOJI = {
 
200
  }
201
 
202
 
203
+ def render_interpretation_markdown(interp: ChartInterpretation) -> str:
204
+ """Render a :class:`ChartInterpretation` as a Markdown string.
205
 
206
+ Returns a formatted multi-section Markdown document suitable for
207
+ display in ``gr.Markdown`` or any other Markdown renderer.
 
 
208
  """
209
+ lines: list[str] = []
210
 
211
+ lines.append("### AI Chart Interpretation")
212
+ lines.append(f"**Detected chart type:** {interp.chart_type_detected}")
213
+ lines.append("")
 
214
 
215
  # ---- Summary ----------------------------------------------------------
216
+ lines.append("---")
217
+ lines.append(f"**Summary:** {interp.summary}")
218
+ lines.append("")
219
 
220
  # ---- Key observations -------------------------------------------------
221
+ lines.append("#### Key Observations")
222
+ for obs in interp.key_observations:
223
+ lines.append(f"- {obs}")
224
+ lines.append("")
225
 
226
  # ---- Trend ------------------------------------------------------------
227
+ lines.append("#### Trend Analysis")
228
+ arrow = _DIRECTION_EMOJI.get(interp.trend.direction, "")
229
+ lines.append(f"**Direction:** {interp.trend.direction.capitalize()} {arrow}")
230
+ lines.append("")
231
+ lines.append(interp.trend.description)
232
+ lines.append("")
233
 
234
  # ---- Seasonality ------------------------------------------------------
235
+ lines.append("#### Seasonality")
236
+ status = "Detected" if interp.seasonality.detected else "Not detected"
237
+ lines.append(f"**Status:** {status}")
238
+ if interp.seasonality.period:
239
+ lines.append(f"**Period:** {interp.seasonality.period}")
240
+ lines.append("")
241
+ lines.append(interp.seasonality.description)
242
+ lines.append("")
243
 
244
  # ---- Stationarity -----------------------------------------------------
245
+ lines.append("#### Stationarity")
246
+ label = (
247
+ "Likely stationary"
248
+ if interp.stationarity.likely_stationary
249
+ else "Likely non-stationary"
250
+ )
251
+ lines.append(f"**Assessment:** {label}")
252
+ lines.append("")
253
+ lines.append(interp.stationarity.description)
254
+ lines.append("")
255
 
256
  # ---- Anomalies --------------------------------------------------------
257
+ lines.append("#### Anomalies")
258
+ if not interp.anomalies:
259
+ lines.append("No anomalies detected.")
260
+ else:
261
+ for anomaly in interp.anomalies:
262
+ lines.append(
263
+ f"- **[{anomaly.approximate_location}]** "
264
+ f"*{anomaly.severity.upper()}* "
265
+ f"-- {anomaly.description}"
266
+ )
267
+ lines.append("")
268
 
269
  # ---- Recommendations --------------------------------------------------
270
+ lines.append("#### Recommended Next Steps")
271
+ for i, rec in enumerate(interp.recommendations, 1):
272
+ lines.append(f"{i}. {rec}")
273
+
274
+ return "\n".join(lines)
src/querychat_helpers.py CHANGED
@@ -2,7 +2,7 @@
2
  QueryChat initialization and filtered DataFrame helpers.
3
 
4
  Provides convenience wrappers around the ``querychat`` library for
5
- natural-language filtering of time-series DataFrames inside a Streamlit
6
  app. All functions degrade gracefully when the package or an API key
7
  is unavailable.
8
  """
@@ -13,10 +13,9 @@ import os
13
  from typing import List, Optional
14
 
15
  import pandas as pd
16
- import streamlit as st
17
 
18
  try:
19
- from querychat.streamlit import QueryChat as _QueryChat
20
 
21
  _QUERYCHAT_AVAILABLE = True
22
  except ImportError: # pragma: no cover
@@ -78,7 +77,7 @@ def create_querychat(
78
  if not _QUERYCHAT_AVAILABLE:
79
  raise RuntimeError(
80
  "The 'querychat' package is not installed. "
81
- "Install it with: pip install 'querychat[streamlit]'"
82
  )
83
 
84
  if y_cols is None:
@@ -125,7 +124,7 @@ def create_querychat(
125
  # Filtered DataFrame extraction
126
  # ---------------------------------------------------------------------------
127
 
128
- def get_filtered_pandas_df(qc) -> pd.DataFrame:
129
  """Extract the currently filtered DataFrame from a QueryChat instance.
130
 
131
  The underlying ``qc.df()`` may return a *narwhals* DataFrame rather
@@ -136,6 +135,9 @@ def get_filtered_pandas_df(qc) -> pd.DataFrame:
136
  ----------
137
  qc:
138
  A QueryChat instance previously created via :func:`create_querychat`.
 
 
 
139
 
140
  Returns
141
  -------
@@ -143,12 +145,22 @@ def get_filtered_pandas_df(qc) -> pd.DataFrame:
143
  The filtered data as a pandas DataFrame.
144
  """
145
  try:
146
- result = qc.df()
 
 
 
147
 
148
  # narwhals (or polars) DataFrames expose .to_pandas()
149
  if hasattr(result, "to_pandas"):
150
  return result.to_pandas()
151
 
 
 
 
 
 
 
 
152
  # Already a pandas DataFrame
153
  if isinstance(result, pd.DataFrame):
154
  return result
@@ -159,7 +171,7 @@ def get_filtered_pandas_df(qc) -> pd.DataFrame:
159
  # If anything goes wrong, surface the unfiltered data so the app
160
  # can continue to function.
161
  try:
162
- raw = qc.df()
163
  if isinstance(raw, pd.DataFrame):
164
  return raw
165
  except Exception: # noqa: BLE001
 
2
  QueryChat initialization and filtered DataFrame helpers.
3
 
4
  Provides convenience wrappers around the ``querychat`` library for
5
+ natural-language filtering of time-series DataFrames inside a Gradio
6
  app. All functions degrade gracefully when the package or an API key
7
  is unavailable.
8
  """
 
13
  from typing import List, Optional
14
 
15
  import pandas as pd
 
16
 
17
  try:
18
+ from querychat.gradio import QueryChat as _QueryChat
19
 
20
  _QUERYCHAT_AVAILABLE = True
21
  except ImportError: # pragma: no cover
 
77
  if not _QUERYCHAT_AVAILABLE:
78
  raise RuntimeError(
79
  "The 'querychat' package is not installed. "
80
+ "Install it with: pip install 'querychat[gradio]'"
81
  )
82
 
83
  if y_cols is None:
 
124
  # Filtered DataFrame extraction
125
  # ---------------------------------------------------------------------------
126
 
127
+ def get_filtered_pandas_df(qc, state_dict=None) -> pd.DataFrame:
128
  """Extract the currently filtered DataFrame from a QueryChat instance.
129
 
130
  The underlying ``qc.df()`` may return a *narwhals* DataFrame rather
 
135
  ----------
136
  qc:
137
  A QueryChat instance previously created via :func:`create_querychat`.
138
+ state_dict:
139
+ The Gradio state dictionary from ``qc.ui()``. Required for the
140
+ Gradio variant of QueryChat.
141
 
142
  Returns
143
  -------
 
145
  The filtered data as a pandas DataFrame.
146
  """
147
  try:
148
+ if state_dict is not None:
149
+ result = qc.df(state_dict)
150
+ else:
151
+ result = qc.df()
152
 
153
  # narwhals (or polars) DataFrames expose .to_pandas()
154
  if hasattr(result, "to_pandas"):
155
  return result.to_pandas()
156
 
157
+ # narwhals also has .to_native() which may give pandas directly
158
+ if hasattr(result, "to_native"):
159
+ native = result.to_native()
160
+ if isinstance(native, pd.DataFrame):
161
+ return native
162
+ return pd.DataFrame(native)
163
+
164
  # Already a pandas DataFrame
165
  if isinstance(result, pd.DataFrame):
166
  return result
 
171
  # If anything goes wrong, surface the unfiltered data so the app
172
  # can continue to function.
173
  try:
174
+ raw = qc.df() if state_dict is None else qc.df(state_dict)
175
  if isinstance(raw, pd.DataFrame):
176
  return raw
177
  except Exception: # noqa: BLE001
src/ui_theme.py CHANGED
@@ -1,10 +1,11 @@
1
  """
2
  ui_theme.py
3
  -----------
4
- Miami University branded theme and styling utilities for Streamlit apps.
5
 
6
  Provides:
7
- - CSS injection for Streamlit components (buttons, sidebar, metrics, cards)
 
8
  - Matplotlib rcParams styled with Miami branding
9
  - ColorBrewer palette loading via palettable with graceful fallback
10
  - Color-swatch preview figure generation
@@ -15,9 +16,11 @@ from __future__ import annotations
15
  import itertools
16
  from typing import Dict, List, Optional
17
 
 
 
 
18
  import matplotlib.figure
19
  import matplotlib.pyplot as plt
20
- import streamlit as st
21
 
22
  # ---------------------------------------------------------------------------
23
  # Brand constants — Miami University (Ohio) official palette
@@ -36,139 +39,197 @@ _HOVER_RED = "#9E0E26"
36
 
37
 
38
  # ---------------------------------------------------------------------------
39
- # Streamlit CSS injection
40
  # ---------------------------------------------------------------------------
41
- def apply_miami_theme() -> None:
42
- """Inject Miami-branded CSS into the active Streamlit page.
43
-
44
- Styles affected:
45
- * Primary buttons -- Miami Red background with white text
46
- * Card containers -- subtle border and rounded corners
47
- * Sidebar header -- Miami Red accent bar
48
- * Metric cards -- light background with left red accent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  """
50
- css = f"""
51
- <style>
52
- /* ---- Primary buttons ---- */
53
- .stButton > button[kind="primary"],
54
- .stButton > button {{
55
- background-color: {MIAMI_RED};
56
- color: {_WHITE};
57
- border: none;
58
- border-radius: 6px;
59
- padding: 0.5rem 1.25rem;
60
- font-weight: 600;
61
- transition: background-color 0.2s ease;
62
- }}
63
- .stButton > button:hover {{
64
- background-color: {_HOVER_RED};
65
- color: {_WHITE};
66
- border: none;
67
- }}
68
- .stButton > button:active,
69
- .stButton > button:focus {{
70
- background-color: {_HOVER_RED};
71
- color: {_WHITE};
72
- box-shadow: none;
73
- }}
74
-
75
- /* ---- Expander card borders (box-shadow to avoid layout shift) ---- */
76
- div[data-testid="stExpander"] {{
77
- box-shadow: 0 0 0 1px {_BORDER_GRAY};
78
- border-radius: 8px;
79
- }}
80
-
81
- /* ---- Sidebar header accent ---- */
82
- section[data-testid="stSidebar"] > div:first-child {{
83
- border-top: 4px solid {MIAMI_RED};
84
- }}
85
- section[data-testid="stSidebar"] h1,
86
- section[data-testid="stSidebar"] h2,
87
- section[data-testid="stSidebar"] h3 {{
88
- color: {MIAMI_RED};
89
- }}
90
-
91
- /* ---- Metric cards (inset shadow for left accent, no layout impact) ---- */
92
- div[data-testid="stMetric"] {{
93
- background-color: {_LIGHT_GRAY};
94
- box-shadow: inset 4px 0 0 0 {MIAMI_RED};
95
- border-radius: 6px;
96
- padding: 0.6rem 0.75rem 0.6rem 1rem;
97
- }}
98
- div[data-testid="stMetric"] label {{
99
- color: {_BLACK};
100
- font-size: 0.78rem;
101
- }}
102
- div[data-testid="stMetric"] div[data-testid="stMetricValue"] {{
103
- color: {_BLACK};
104
- font-weight: 700;
105
- font-size: 0.95rem;
106
- }}
107
-
108
- /* ---- Sidebar developer card ---- */
109
- .dev-card {{
110
- padding: 0;
111
- background: transparent;
112
- }}
113
- .dev-row {{
114
- display: flex;
115
- gap: 0.5rem;
116
- align-items: flex-start;
117
- }}
118
- .dev-avatar {{
119
- width: 28px;
120
- height: 28px;
121
- min-width: 28px;
122
- fill: {_BLACK};
123
- }}
124
- .dev-name {{
125
- font-weight: 600;
126
- color: {_BLACK};
127
- font-size: 0.82rem;
128
- line-height: 1.3;
129
- }}
130
- .dev-role {{
131
- font-size: 0.7rem;
132
- color: #6c757d;
133
- line-height: 1.3;
134
- }}
135
- .dev-links {{
136
- display: flex;
137
- gap: 0.3rem;
138
- flex-wrap: wrap;
139
- margin-top: 0.35rem;
140
- }}
141
- .dev-link,
142
- .dev-link:visited,
143
- .dev-link:link {{
144
- display: inline-flex;
145
- align-items: center;
146
- gap: 0.2rem;
147
- padding: 0.15rem 0.4rem;
148
- border: 1px solid {MIAMI_RED};
149
- border-radius: 4px;
150
- font-size: 0.65rem;
151
- color: {MIAMI_RED} !important;
152
- text-decoration: none;
153
- background: {_WHITE};
154
- line-height: 1.4;
155
- white-space: nowrap;
156
- }}
157
- .dev-link svg {{
158
- width: 11px;
159
- height: 11px;
160
- fill: {MIAMI_RED};
161
- }}
162
- .dev-link:hover {{
163
- background-color: {MIAMI_RED};
164
- color: {_WHITE} !important;
165
- }}
166
- .dev-link:hover svg {{
167
- fill: {_WHITE};
168
- }}
169
- </style>
 
170
  """
171
- st.markdown(css, unsafe_allow_html=True)
172
 
173
 
174
  # ---------------------------------------------------------------------------
@@ -359,7 +420,7 @@ def render_palette_preview(
359
  Returns
360
  -------
361
  matplotlib.figure.Figure
362
- A Figure instance ready to be passed to ``st.pyplot()`` or saved.
363
  """
364
  n = len(colors)
365
  fig_width = max(swatch_width * n, 2.0)
@@ -384,5 +445,5 @@ def render_palette_preview(
384
  ax.set_aspect("equal")
385
  ax.axis("off")
386
  fig.subplots_adjust(left=0, right=1, top=1, bottom=0)
387
- plt.close(fig) # prevent display in non-Streamlit contexts
388
  return fig
 
1
  """
2
  ui_theme.py
3
  -----------
4
+ Miami University branded theme and styling utilities.
5
 
6
  Provides:
7
+ - Gradio theme subclass (MiamiTheme) with Miami branding
8
+ - Custom CSS string for elements beyond theme control
9
  - Matplotlib rcParams styled with Miami branding
10
  - ColorBrewer palette loading via palettable with graceful fallback
11
  - Color-swatch preview figure generation
 
16
  import itertools
17
  from typing import Dict, List, Optional
18
 
19
+ import gradio as gr
20
+ from gradio.themes.base import Base
21
+ from gradio.themes.utils import colors, fonts, sizes
22
  import matplotlib.figure
23
  import matplotlib.pyplot as plt
 
24
 
25
  # ---------------------------------------------------------------------------
26
  # Brand constants — Miami University (Ohio) official palette
 
39
 
40
 
41
  # ---------------------------------------------------------------------------
42
+ # Gradio theme
43
  # ---------------------------------------------------------------------------
44
+
45
+ _miami_red_palette = colors.Color(
46
+ c50="#fff5f6",
47
+ c100="#ffe0e4",
48
+ c200="#ffc7ce",
49
+ c300="#ffa3ad",
50
+ c400="#ff6b7d",
51
+ c500="#C41230",
52
+ c600="#a30f27",
53
+ c700="#850c1f",
54
+ c800="#6b0a19",
55
+ c900="#520714",
56
+ c950="#3d0510",
57
+ name="miami_red",
58
+ )
59
+
60
+
61
+ class MiamiTheme(Base):
62
+ """Gradio theme subclass with Miami University branding."""
63
+
64
+ def __init__(self, **kwargs):
65
+ super().__init__(
66
+ primary_hue=_miami_red_palette,
67
+ secondary_hue=colors.gray,
68
+ neutral_hue=colors.gray,
69
+ spacing_size=sizes.spacing_md,
70
+ radius_size=sizes.radius_sm,
71
+ text_size=sizes.text_md,
72
+ font=(
73
+ fonts.GoogleFont("Source Sans Pro"),
74
+ fonts.Font("ui-sans-serif"),
75
+ fonts.Font("system-ui"),
76
+ fonts.Font("sans-serif"),
77
+ ),
78
+ font_mono=(
79
+ fonts.Font("ui-monospace"),
80
+ fonts.Font("SFMono-Regular"),
81
+ fonts.Font("monospace"),
82
+ ),
83
+ **kwargs,
84
+ )
85
+ super().set(
86
+ # Buttons
87
+ button_primary_background_fill="*primary_500",
88
+ button_primary_background_fill_hover="*primary_700",
89
+ button_primary_text_color="white",
90
+ button_primary_border_color="*primary_500",
91
+ # Block titles
92
+ block_title_text_weight="600",
93
+ block_title_text_color="*primary_500",
94
+ # Body
95
+ body_text_color="*neutral_900",
96
+ # Sidebar accent
97
+ block_border_width="1px",
98
+ block_border_color="*neutral_200",
99
+ # Checkbox / Radio
100
+ checkbox_background_color_selected="*primary_500",
101
+ checkbox_border_color_selected="*primary_500",
102
+ )
103
+
104
+
105
+ def get_miami_css() -> str:
106
+ """Return custom CSS for elements that ``gr.themes.Base`` cannot control.
107
+
108
+ This string is passed to ``gr.Blocks(css=...)`` alongside the
109
+ :class:`MiamiTheme`.
110
  """
111
+ return f"""
112
+ /* ---- Sidebar header accent ---- */
113
+ .sidebar > .panel {{
114
+ border-top: 4px solid {MIAMI_RED} !important;
115
+ }}
116
+
117
+ /* ---- Developer card ---- */
118
+ .dev-card {{
119
+ padding: 0;
120
+ background: transparent;
121
+ }}
122
+ .dev-row {{
123
+ display: flex;
124
+ gap: 0.5rem;
125
+ align-items: flex-start;
126
+ }}
127
+ .dev-avatar {{
128
+ width: 28px;
129
+ height: 28px;
130
+ min-width: 28px;
131
+ fill: {_BLACK};
132
+ }}
133
+ .dev-name {{
134
+ font-weight: 600;
135
+ color: {_BLACK};
136
+ font-size: 0.82rem;
137
+ line-height: 1.3;
138
+ }}
139
+ .dev-role {{
140
+ font-size: 0.7rem;
141
+ color: #6c757d;
142
+ line-height: 1.3;
143
+ }}
144
+ .dev-links {{
145
+ display: flex;
146
+ gap: 0.3rem;
147
+ flex-wrap: wrap;
148
+ margin-top: 0.35rem;
149
+ }}
150
+ .dev-link,
151
+ .dev-link:visited,
152
+ .dev-link:link {{
153
+ display: inline-flex;
154
+ align-items: center;
155
+ gap: 0.2rem;
156
+ padding: 0.15rem 0.4rem;
157
+ border: 1px solid {MIAMI_RED};
158
+ border-radius: 4px;
159
+ font-size: 0.65rem;
160
+ color: {MIAMI_RED} !important;
161
+ text-decoration: none;
162
+ background: {_WHITE};
163
+ line-height: 1.4;
164
+ white-space: nowrap;
165
+ }}
166
+ .dev-link svg {{
167
+ width: 11px;
168
+ height: 11px;
169
+ fill: {MIAMI_RED};
170
+ }}
171
+ .dev-link:hover {{
172
+ background-color: {MIAMI_RED};
173
+ color: {_WHITE} !important;
174
+ }}
175
+ .dev-link:hover svg {{
176
+ fill: {_WHITE};
177
+ }}
178
+
179
+ /* ---- Metric-like stat cards ---- */
180
+ .stat-card {{
181
+ background-color: {_LIGHT_GRAY};
182
+ box-shadow: inset 4px 0 0 0 {MIAMI_RED};
183
+ border-radius: 6px;
184
+ padding: 0.6rem 0.75rem 0.6rem 1rem;
185
+ }}
186
+ .stat-card .stat-label {{
187
+ color: {_BLACK};
188
+ font-size: 0.78rem;
189
+ }}
190
+ .stat-card .stat-value {{
191
+ color: {_BLACK};
192
+ font-weight: 700;
193
+ font-size: 0.95rem;
194
+ }}
195
+
196
+ /* ---- Step cards on welcome screen ---- */
197
+ .step-card {{
198
+ background: {_LIGHT_GRAY};
199
+ border-radius: 8px;
200
+ padding: 1rem;
201
+ border-left: 4px solid {MIAMI_RED};
202
+ height: 100%;
203
+ }}
204
+ .step-card .step-number {{
205
+ font-size: 1.6rem;
206
+ font-weight: 700;
207
+ color: {MIAMI_RED};
208
+ }}
209
+ .step-card .step-title {{
210
+ font-weight: 600;
211
+ margin: 0.3rem 0 0.2rem;
212
+ }}
213
+ .step-card .step-desc {{
214
+ font-size: 0.82rem;
215
+ color: #444;
216
+ }}
217
+
218
+ /* ---- App title in sidebar ---- */
219
+ .app-title {{
220
+ text-align: center;
221
+ margin-bottom: 0.5rem;
222
+ }}
223
+ .app-title .title-text {{
224
+ font-size: 1.6rem;
225
+ font-weight: 800;
226
+ color: {MIAMI_RED};
227
+ }}
228
+ .app-title .subtitle-text {{
229
+ font-size: 0.82rem;
230
+ color: {_BLACK};
231
+ }}
232
  """
 
233
 
234
 
235
  # ---------------------------------------------------------------------------
 
420
  Returns
421
  -------
422
  matplotlib.figure.Figure
423
+ A Figure instance ready to be passed to ``gr.Plot`` or saved.
424
  """
425
  n = len(colors)
426
  fig_width = max(swatch_width * n, 2.0)
 
445
  ax.set_aspect("equal")
446
  ax.axis("off")
447
  fig.subplots_adjust(left=0, right=1, top=1, bottom=0)
448
+ plt.close(fig) # prevent display in non-Gradio contexts
449
  return fig