tarekmasryo commited on
Commit
0d51bbb
·
verified ·
1 Parent(s): f6ab830

Delete src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +0 -1668
src/app.py DELETED
@@ -1,1668 +0,0 @@
1
- import warnings
2
- import io
3
- import zipfile
4
- from datetime import datetime
5
-
6
- warnings.filterwarnings("ignore")
7
-
8
- import numpy as np
9
- import pandas as pd
10
- import matplotlib.pyplot as plt
11
- import seaborn as sns
12
- import streamlit as st
13
- from matplotlib.ticker import FuncFormatter
14
- from scipy import stats
15
-
16
- # ==================== PAGE CONFIG ====================
17
- st.set_page_config(
18
- page_title="Seaborn & Matplotlib Visual Lab",
19
- page_icon="📊",
20
- layout="wide",
21
- initial_sidebar_state="expanded",
22
- )
23
-
24
- # ==================== GLOBAL STYLE ====================
25
- st.markdown(
26
- """
27
- <style>
28
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap');
29
-
30
- * {
31
- font-family: 'Inter', sans-serif;
32
- }
33
-
34
- .stApp {
35
- background: radial-gradient(circle at 0% 0%, #020617 0, #020617 45%, #020617 100%);
36
- color: #e5e7eb;
37
- }
38
-
39
- .block-container {
40
- padding-top: 1.4rem;
41
- padding-bottom: 3rem;
42
- }
43
-
44
- .main-header {
45
- font-size: 3.1rem;
46
- font-weight: 800;
47
- background: linear-gradient(135deg, #38bdf8 0%, #6366f1 40%, #a855f7 100%);
48
- -webkit-background-clip: text;
49
- -webkit-text-fill-color: transparent;
50
- margin-bottom: 0.2rem;
51
- }
52
-
53
- .subtitle {
54
- font-size: 1.05rem;
55
- color: #9ca3af;
56
- margin-bottom: 1.1rem;
57
- }
58
-
59
- .metric-row {
60
- display: flex;
61
- flex-wrap: wrap;
62
- gap: 0.9rem;
63
- margin-top: 0.3rem;
64
- margin-bottom: 1.0rem;
65
- }
66
-
67
- .metric-card {
68
- background: radial-gradient(circle at 0% 0%, rgba(56, 189, 248, 0.18), rgba(15, 23, 42, 0.96));
69
- padding: 0.9rem 1.2rem;
70
- border-radius: 14px;
71
- color: #e5e7eb;
72
- box-shadow:
73
- 0 14px 40px rgba(15, 23, 42, 0.9),
74
- 0 0 0 1px rgba(148, 163, 184, 0.45);
75
- min-width: 160px;
76
- }
77
-
78
- .metric-card-label {
79
- font-size: 0.75rem;
80
- letter-spacing: 0.08em;
81
- text-transform: uppercase;
82
- color: #9ca3af;
83
- }
84
-
85
- .metric-card-value {
86
- font-size: 1.45rem;
87
- font-weight: 700;
88
- margin-top: 0.15rem;
89
- }
90
-
91
- .info-box {
92
- background: radial-gradient(circle at 0% 0%, rgba(45, 212, 191, 0.25), rgba(56, 189, 248, 0.10));
93
- padding: 0.9rem 1.2rem;
94
- border-radius: 14px;
95
- border-left: 4px solid #22d3ee;
96
- margin: 0.7rem 0 1.0rem 0;
97
- color: #e0f2fe;
98
- box-shadow: 0 16px 40px rgba(15, 23, 42, 0.9);
99
- backdrop-filter: blur(18px);
100
- }
101
-
102
- .tip-box {
103
- background: linear-gradient(135deg, rgba(250, 204, 21, 0.12), rgba(251, 191, 36, 0.04));
104
- padding: 0.75rem 1rem;
105
- border-radius: 10px;
106
- border-left: 3px solid rgba(250, 204, 21, 0.7);
107
- margin: 0.4rem 0;
108
- font-size: 0.9rem;
109
- color: #facc15;
110
- }
111
-
112
- .code-box {
113
- background: #020617;
114
- color: #e5e7eb;
115
- padding: 0.9rem 1rem;
116
- border-radius: 12px;
117
- border: 1px solid rgba(148, 163, 184, 0.5);
118
- margin: 0.5rem 0;
119
- font-size: 0.9rem;
120
- box-shadow: 0 14px 36px rgba(15, 23, 42, 0.9);
121
- }
122
-
123
- .plot-container {
124
- background: radial-gradient(circle at 0% 0%, rgba(148, 163, 184, 0.16), rgba(15, 23, 42, 0.96));
125
- padding: 1.4rem 1.5rem;
126
- border-radius: 18px;
127
- box-shadow:
128
- 0 20px 50px rgba(15, 23, 42, 0.95),
129
- 0 0 0 1px rgba(148, 163, 184, 0.4);
130
- margin: 1.0rem 0 1.4rem 0;
131
- border: 1px solid rgba(148, 163, 184, 0.35);
132
- }
133
-
134
- .control-panel {
135
- background: linear-gradient(145deg, rgba(15, 23, 42, 0.98), rgba(30, 64, 175, 0.85));
136
- padding: 0.8rem 1.1rem;
137
- border-radius: 999px;
138
- box-shadow:
139
- 0 16px 40px rgba(15, 23, 42, 0.9),
140
- 0 0 0 1px rgba(129, 140, 248, 0.7);
141
- color: #e5e7eb;
142
- margin-bottom: 0.7rem;
143
- }
144
-
145
- .control-panel-header {
146
- font-size: 0.85rem;
147
- font-weight: 700;
148
- text-transform: uppercase;
149
- letter-spacing: 0.12em;
150
- margin: 0;
151
- color: #e5e7eb;
152
- opacity: 0.98;
153
- }
154
-
155
- .stTabs [data-baseweb="tab-list"] {
156
- gap: 0.7rem;
157
- background: transparent;
158
- padding: 0.4rem 0 0.8rem 0;
159
- border-radius: 0;
160
- border-bottom: 1px solid rgba(148, 163, 184, 0.35);
161
- }
162
-
163
- .stTabs [data-baseweb="tab"] {
164
- height: 3.3rem;
165
- padding: 0 1.8rem;
166
- font-weight: 600;
167
- border-radius: 999px;
168
- background: rgba(15, 23, 42, 0.86);
169
- border: 1px solid rgba(148, 163, 184, 0.5);
170
- color: #e5e7eb;
171
- transition: transform 0.18s ease, background 0.18s ease,
172
- border-color 0.18s ease, box-shadow 0.18s ease;
173
- }
174
-
175
- .stTabs [data-baseweb="tab"]:hover {
176
- transform: translateY(-1px);
177
- border-color: rgba(129, 140, 248, 0.9);
178
- box-shadow: 0 10px 22px rgba(15, 23, 42, 0.85);
179
- }
180
-
181
- .stTabs [data-baseweb="tab"][aria-selected="true"] {
182
- background: linear-gradient(135deg, #6366f1, #ec4899);
183
- color: #ffffff;
184
- border-color: transparent;
185
- box-shadow:
186
- 0 0 0 1px rgba(15, 23, 42, 0.9),
187
- 0 14px 30px rgba(15, 23, 42, 0.95);
188
- }
189
- </style>
190
- """,
191
- unsafe_allow_html=True,
192
- )
193
-
194
- # ==================== SESSION STATE ====================
195
- if "gallery" not in st.session_state:
196
- st.session_state["gallery"] = []
197
-
198
- if "export_dpi" not in st.session_state:
199
- st.session_state["export_dpi"] = 300
200
-
201
- # ==================== HELPERS ====================
202
- def use_theme(context: str = "notebook", style: str = "whitegrid", palette: str = "deep") -> None:
203
- sns.set_theme(context=context, style=style)
204
- sns.set_palette(palette)
205
- plt.rcParams.update(
206
- {
207
- "figure.figsize": (10, 6),
208
- "savefig.dpi": 300,
209
- "figure.dpi": 150,
210
- "axes.spines.top": False,
211
- "axes.spines.right": False,
212
- "figure.autolayout": True,
213
- "grid.alpha": 0.3,
214
- "grid.linestyle": "--",
215
- "font.size": 10,
216
- "axes.labelsize": 11,
217
- "axes.titlesize": 13,
218
- "legend.fontsize": 9,
219
- }
220
- )
221
-
222
-
223
- def apply_dark(fig: plt.Figure, dark: bool = False) -> None:
224
- if not dark:
225
- return
226
- fig.patch.set_facecolor("#020617")
227
- for ax in fig.get_axes():
228
- ax.set_facecolor("#020617")
229
- ax.tick_params(colors="#e5e7eb")
230
- for spine in ax.spines.values():
231
- spine.set_color("#4b5563")
232
- for item in [ax.title, ax.xaxis.label, ax.yaxis.label]:
233
- if item:
234
- item.set_color("#e5e7eb")
235
- for t in ax.get_xticklabels() + ax.get_yticklabels():
236
- t.set_color("#e5e7eb")
237
- legend = ax.get_legend()
238
- if legend:
239
- legend.get_frame().set_facecolor("#020617")
240
- for text in legend.get_texts():
241
- text.set_color("#e5e7eb")
242
-
243
-
244
- @st.cache_data
245
- def load_builtin_data() -> dict:
246
- return {
247
- "Tips": sns.load_dataset("tips"),
248
- "Penguins": sns.load_dataset("penguins").dropna(),
249
- "Flights": sns.load_dataset("flights"),
250
- "Iris": sns.load_dataset("iris"),
251
- "Diamonds (1K sample)": sns.load_dataset("diamonds").sample(1000, random_state=42),
252
- "Titanic": sns.load_dataset("titanic"),
253
- "Car Crashes": sns.load_dataset("car_crashes"),
254
- }
255
-
256
-
257
- def save_to_gallery(fig: plt.Figure, name: str, description: str) -> None:
258
- buf = io.BytesIO()
259
- dpi = st.session_state.get("export_dpi", 300)
260
- fig.savefig(
261
- buf,
262
- dpi=dpi,
263
- bbox_inches="tight",
264
- format="png",
265
- facecolor=fig.get_facecolor(),
266
- )
267
- buf.seek(0)
268
- st.session_state["gallery"].append(
269
- {
270
- "name": name,
271
- "description": description,
272
- "image": buf.getvalue(),
273
- "timestamp": datetime.now(),
274
- }
275
- )
276
-
277
-
278
- def show_code_example(code: str, description: str = "") -> None:
279
- if description:
280
- st.markdown(
281
- f'<div class="tip-box"><strong>Tip:</strong> {description}</div>',
282
- unsafe_allow_html=True,
283
- )
284
- st.markdown('<div class="code-box">', unsafe_allow_html=True)
285
- st.code(code, language="python")
286
- st.markdown("</div>", unsafe_allow_html=True)
287
-
288
- # ==================== HEADER ====================
289
- st.markdown(
290
- '<h1 class="main-header">Seaborn & Matplotlib Visual Lab</h1>',
291
- unsafe_allow_html=True,
292
- )
293
- st.markdown(
294
- '<p class="subtitle">Interactive environment to explore, compare, and export visualizations with Seaborn and Matplotlib.</p>',
295
- unsafe_allow_html=True,
296
- )
297
-
298
- # ==================== SIDEBAR ====================
299
- with st.sidebar:
300
- st.markdown("### Data settings")
301
-
302
- # Built-in datasets only
303
- builtin = load_builtin_data()
304
- dataset_label = st.selectbox(
305
- "Built-in only",
306
- list(builtin.keys()),
307
- key="sb_dataset",
308
- )
309
- df = builtin[dataset_label]
310
-
311
- st.markdown("---")
312
-
313
- with st.expander("Visual theme", expanded=False):
314
- context = st.selectbox(
315
- "Seaborn context",
316
- ["notebook", "paper", "talk", "poster"],
317
- index=0,
318
- key="sb_context",
319
- )
320
- style = st.selectbox(
321
- "Seaborn style",
322
- ["whitegrid", "darkgrid", "white", "dark", "ticks"],
323
- index=0,
324
- key="sb_style",
325
- )
326
- palette = st.selectbox(
327
- "Color palette",
328
- ["deep", "muted", "bright", "pastel", "dark", "colorblind", "Set2", "husl"],
329
- index=0,
330
- key="sb_palette",
331
- )
332
- use_theme(context, style, palette)
333
-
334
- theme_mode = st.radio(
335
- "Figure mode",
336
- ["Light", "Dark"],
337
- index=1,
338
- horizontal=True,
339
- key="sb_theme_mode",
340
- )
341
- DARK = theme_mode == "Dark"
342
-
343
- st.markdown("---")
344
-
345
- st.markdown("### Export settings")
346
- dpi = st.slider(
347
- "Image quality (DPI)",
348
- 72,
349
- 600,
350
- 300,
351
- step=50,
352
- key="sb_dpi",
353
- )
354
- st.session_state["export_dpi"] = dpi
355
-
356
- if st.session_state["gallery"]:
357
- st.success(f"{len(st.session_state['gallery'])} plots in gallery")
358
- if st.button("Clear gallery", key="sb_clear_gallery"):
359
- st.session_state["gallery"] = []
360
- st.rerun()
361
-
362
- # fallback
363
- if df is None:
364
- df = builtin["Tips"]
365
- dataset_label = "Tips"
366
-
367
- numeric_cols_all = df.select_dtypes(include=[np.number]).columns.tolist()
368
- categorical_cols_all = df.select_dtypes(include=["object", "category"]).columns.tolist()
369
- missing_ratio = float(df.isna().mean().mean() * 100)
370
-
371
- # ==================== TOP METRICS ====================
372
- st.markdown(
373
- f"""
374
- <div class="metric-row">
375
- <div class="metric-card">
376
- <div class="metric-card-label">Dataset</div>
377
- <div class="metric-card-value">{dataset_label}</div>
378
- </div>
379
- <div class="metric-card">
380
- <div class="metric-card-label">Rows</div>
381
- <div class="metric-card-value">{len(df):,}</div>
382
- </div>
383
- <div class="metric-card">
384
- <div class="metric-card-label">Columns</div>
385
- <div class="metric-card-value">{len(df.columns):,}</div>
386
- </div>
387
- <div class="metric-card">
388
- <div class="metric-card-label">Numeric features</div>
389
- <div class="metric-card-value">{len(numeric_cols_all)}</div>
390
- </div>
391
- <div class="metric-card">
392
- <div class="metric-card-label">Categorical features</div>
393
- <div class="metric-card-value">{len(categorical_cols_all)}</div>
394
- </div>
395
- <div class="metric-card">
396
- <div class="metric-card-label">Missing ratio</div>
397
- <div class="metric-card-value">{missing_ratio:.1f}%</div>
398
- </div>
399
- </div>
400
- """,
401
- unsafe_allow_html=True,
402
- )
403
-
404
- # ==================== TABS ====================
405
- tab_overview, tab_seaborn, tab_mpl, tab_compare, tab_gallery = st.tabs(
406
- [
407
- "Overview",
408
- "Seaborn builder",
409
- "Matplotlib builder",
410
- "Compare",
411
- "Gallery",
412
- ]
413
- )
414
-
415
- # ==================== TAB: OVERVIEW ====================
416
- with tab_overview:
417
- st.markdown("## Overview")
418
- st.markdown(
419
- '<div class="info-box"><strong>Goal:</strong> Quick health check of the current dataset and a first look at its distributions.</div>',
420
- unsafe_allow_html=True,
421
- )
422
-
423
- col_left, col_right = st.columns([2, 1])
424
-
425
- with col_left:
426
- st.markdown("### Sample")
427
- st.dataframe(df.head(10), use_container_width=True)
428
-
429
- if numeric_cols_all:
430
- st.markdown("### Quick distribution")
431
- dist_col = st.selectbox(
432
- "Numeric column",
433
- numeric_cols_all,
434
- key="ov_dist_col",
435
- )
436
- fig, ax = plt.subplots(figsize=(10, 4))
437
- sns.histplot(df, x=dist_col, bins=30, kde=True, ax=ax)
438
- ax.set_title(f"{dist_col} distribution", fontsize=13, fontweight="bold")
439
- apply_dark(fig, DARK)
440
- st.pyplot(fig)
441
-
442
- with col_right:
443
- st.markdown("### Types & missing")
444
- schema_data = {
445
- "column": df.columns,
446
- "dtype": df.dtypes.astype(str),
447
- "missing_%": (df.isna().mean() * 100).round(1),
448
- }
449
- schema_df = pd.DataFrame(schema_data)
450
- st.dataframe(schema_df, height=260, use_container_width=True)
451
-
452
- if len(numeric_cols_all) >= 2:
453
- st.markdown("### Small correlation view")
454
- cols_small = numeric_cols_all[: min(4, len(numeric_cols_all))]
455
- corr = df[cols_small].corr()
456
- fig2, ax2 = plt.subplots(figsize=(4, 4))
457
- sns.heatmap(
458
- corr,
459
- annot=True,
460
- fmt=".2f",
461
- cmap="vlag",
462
- center=0,
463
- square=True,
464
- cbar=False,
465
- ax=ax2,
466
- )
467
- ax2.set_title("Correlation (subset)", fontsize=11, fontweight="bold")
468
- apply_dark(fig2, DARK)
469
- st.pyplot(fig2)
470
-
471
- # ==================== TAB: SEABORN BUILDER ====================
472
- with tab_seaborn:
473
- st.markdown("## Seaborn builder")
474
- st.markdown(
475
- '<div class="info-box"><strong>Goal:</strong> Build Seaborn plots by selecting columns and options. The code snippet updates automatically.</div>',
476
- unsafe_allow_html=True,
477
- )
478
-
479
- if df.empty:
480
- st.warning("No data loaded.")
481
- else:
482
- col_plot, col_ctrl = st.columns([7, 3])
483
-
484
- with col_ctrl:
485
- # Pill with header INSIDE
486
- st.markdown(
487
- """
488
- <div class="control-panel">
489
- <div class="control-panel-header">PLOT SETUP</div>
490
- </div>
491
- """,
492
- unsafe_allow_html=True,
493
- )
494
-
495
- family = st.selectbox(
496
- "Plot family",
497
- [
498
- "Distribution",
499
- "Relationship",
500
- "Category",
501
- "Matrix / Heatmap",
502
- "Multi-variable",
503
- ],
504
- key="sb_family",
505
- )
506
-
507
- code_str = ""
508
- description = ""
509
- fig_seaborn = None
510
-
511
- if family == "Distribution":
512
- kind = st.selectbox(
513
- "Plot type",
514
- [
515
- "Histogram",
516
- "KDE",
517
- "Histogram + KDE",
518
- "Box",
519
- "Violin",
520
- "ECDF",
521
- ],
522
- key="sb_dist_kind",
523
- )
524
- if not numeric_cols_all:
525
- num_col = None
526
- st.error("No numeric columns in this dataset.")
527
- else:
528
- num_col = st.selectbox(
529
- "Numeric column",
530
- numeric_cols_all,
531
- key="sb_dist_num",
532
- )
533
-
534
- hue_col = None
535
- if categorical_cols_all and kind in ["Histogram", "KDE", "Histogram + KDE", "ECDF"]:
536
- use_hue_dist = st.checkbox(
537
- "Color by category",
538
- value=False,
539
- key="sb_dist_use_hue",
540
- )
541
- if use_hue_dist:
542
- hue_col = st.selectbox(
543
- "Hue",
544
- categorical_cols_all,
545
- key="sb_dist_hue",
546
- )
547
- bins = st.slider(
548
- "Bins (for histogram)",
549
- 5,
550
- 80,
551
- 30,
552
- key="sb_dist_bins",
553
- )
554
- log_scale = st.checkbox(
555
- "Log scale on x",
556
- value=False,
557
- key="sb_dist_log",
558
- )
559
-
560
- elif family == "Relationship":
561
- rel_kind = st.selectbox(
562
- "Plot type",
563
- [
564
- "Scatter",
565
- "Regression",
566
- "Line",
567
- ],
568
- key="sb_rel_kind",
569
- )
570
- if len(numeric_cols_all) < 2:
571
- x_rel = y_rel = None
572
- st.error("Need at least two numeric columns.")
573
- else:
574
- x_rel = st.selectbox(
575
- "X variable",
576
- numeric_cols_all,
577
- key="sb_rel_x",
578
- )
579
- y_rel = st.selectbox(
580
- "Y variable",
581
- [c for c in numeric_cols_all if c != x_rel],
582
- key="sb_rel_y",
583
- )
584
- hue_rel = None
585
- if categorical_cols_all and rel_kind in ["Scatter", "Line"]:
586
- use_hue_rel = st.checkbox(
587
- "Color by category",
588
- value=False,
589
- key="sb_rel_use_hue",
590
- )
591
- if use_hue_rel:
592
- hue_rel = st.selectbox(
593
- "Hue",
594
- categorical_cols_all,
595
- key="sb_rel_hue",
596
- )
597
- alpha_rel = st.slider(
598
- "Point transparency",
599
- 0.1,
600
- 1.0,
601
- 0.7,
602
- 0.05,
603
- key="sb_rel_alpha",
604
- )
605
-
606
- elif family == "Category":
607
- if not categorical_cols_all:
608
- st.error("No categorical columns in this dataset.")
609
- cat_var = num_cat = None
610
- else:
611
- cat_var = st.selectbox(
612
- "Category",
613
- categorical_cols_all,
614
- key="sb_cat_var",
615
- )
616
- cat_kind = st.selectbox(
617
- "Plot type",
618
- [
619
- "Count",
620
- "Bar (mean)",
621
- "Box",
622
- "Violin",
623
- ],
624
- key="sb_cat_kind",
625
- )
626
- num_cat = None
627
- if cat_kind in ["Bar (mean)", "Box", "Violin"]:
628
- if not numeric_cols_all:
629
- st.error("No numeric columns for this plot type.")
630
- else:
631
- num_cat = st.selectbox(
632
- "Numeric column",
633
- numeric_cols_all,
634
- key="sb_cat_num",
635
- )
636
-
637
- if cat_var is not None:
638
- order_top = st.slider(
639
- "Top categories",
640
- 3,
641
- min(15, df[cat_var].nunique()),
642
- min(8, df[cat_var].nunique()),
643
- key="sb_cat_top",
644
- )
645
-
646
- elif family == "Matrix / Heatmap":
647
- if len(numeric_cols_all) < 2:
648
- st.error("Need at least two numeric columns.")
649
- selected_hm = []
650
- else:
651
- selected_hm = st.multiselect(
652
- "Numeric variables",
653
- numeric_cols_all,
654
- default=numeric_cols_all[: min(6, len(numeric_cols_all))],
655
- key="sb_hm_vars",
656
- )
657
- annot_hm = st.checkbox(
658
- "Show values",
659
- value=True,
660
- key="sb_hm_annot",
661
- )
662
- center_zero = st.checkbox(
663
- "Center at zero",
664
- value=True,
665
- key="sb_hm_center",
666
- )
667
-
668
- else: # Multi-variable
669
- if len(numeric_cols_all) < 2:
670
- st.error("Need at least two numeric columns.")
671
- multi_vars = []
672
- else:
673
- multi_vars = st.multiselect(
674
- "Numeric variables",
675
- numeric_cols_all,
676
- default=numeric_cols_all[: min(4, len(numeric_cols_all))],
677
- key="sb_multi_vars",
678
- )
679
- sample_n = st.slider(
680
- "Sample rows",
681
- 100,
682
- min(len(df), 1000),
683
- min(400, len(df)),
684
- key="sb_multi_sample",
685
- )
686
- hue_multi = None
687
- if categorical_cols_all:
688
- use_hue_multi = st.checkbox(
689
- "Color by category",
690
- value=False,
691
- key="sb_multi_use_hue",
692
- )
693
- if use_hue_multi:
694
- hue_multi = st.selectbox(
695
- "Hue",
696
- categorical_cols_all,
697
- key="sb_multi_hue",
698
- )
699
-
700
- with col_plot:
701
- st.markdown('<div class="plot-container">', unsafe_allow_html=True)
702
-
703
- # ------- Distribution -------
704
- if family == "Distribution" and numeric_cols_all and num_col is not None:
705
- fig_seaborn, ax = plt.subplots(figsize=(10, 5))
706
-
707
- if kind == "Histogram":
708
- sns.histplot(
709
- data=df,
710
- x=num_col,
711
- bins=bins,
712
- hue=hue_col,
713
- kde=False,
714
- ax=ax,
715
- log_scale=log_scale,
716
- )
717
- elif kind == "KDE":
718
- sns.kdeplot(
719
- data=df,
720
- x=num_col,
721
- hue=hue_col,
722
- fill=True,
723
- ax=ax,
724
- log_scale=log_scale,
725
- )
726
- elif kind == "Histogram + KDE":
727
- sns.histplot(
728
- data=df,
729
- x=num_col,
730
- bins=bins,
731
- hue=hue_col,
732
- kde=True,
733
- ax=ax,
734
- log_scale=log_scale,
735
- )
736
- elif kind == "Box":
737
- sns.boxplot(
738
- data=df,
739
- x=num_col,
740
- ax=ax,
741
- )
742
- elif kind == "Violin":
743
- sns.violinplot(
744
- data=df,
745
- x=num_col,
746
- ax=ax,
747
- )
748
- else: # ECDF
749
- sns.ecdfplot(
750
- data=df,
751
- x=num_col,
752
- hue=hue_col,
753
- ax=ax,
754
- )
755
- ax.yaxis.set_major_formatter(
756
- FuncFormatter(lambda y, _: f"{y:.0%}")
757
- )
758
-
759
- ax.set_title(f"{kind} for {num_col}", fontsize=13, fontweight="bold")
760
- apply_dark(fig_seaborn, DARK)
761
- st.pyplot(fig_seaborn)
762
-
763
- hue_part = f', hue="{hue_col}"' if hue_col else ""
764
- extra_kwargs = ""
765
- if kind in ["Histogram", "Histogram + KDE"]:
766
- extra_kwargs = f", bins={bins}"
767
- if log_scale:
768
- extra_kwargs += ", log_scale=True"
769
- if kind in ["KDE", "ECDF"] and log_scale:
770
- extra_kwargs = ", log_scale=True"
771
- if kind == "Histogram + KDE":
772
- fn = "histplot"
773
- extra_kwargs = f", bins={bins}, kde=True"
774
- elif kind == "Histogram":
775
- fn = "histplot"
776
- elif kind == "KDE":
777
- fn = "kdeplot"
778
- elif kind == "Box":
779
- fn = "boxplot"
780
- elif kind == "Violin":
781
- fn = "violinplot"
782
- else:
783
- fn = "ecdfplot"
784
-
785
- code_str = f"""fig, ax = plt.subplots(figsize=(10, 5))
786
- sns.{fn}(data=df, x="{num_col}"{hue_part}{extra_kwargs}, ax=ax)
787
- ax.set_title("{kind} for {num_col}")
788
- plt.show()"""
789
- description = "Distribution pattern: shape, spread, and tails of a single numeric column."
790
-
791
- # ------- Relationship -------
792
- elif family == "Relationship" and len(numeric_cols_all) >= 2 and x_rel is not None:
793
- fig_seaborn, ax = plt.subplots(figsize=(10, 5))
794
-
795
- if rel_kind == "Scatter":
796
- sns.scatterplot(
797
- data=df,
798
- x=x_rel,
799
- y=y_rel,
800
- hue=hue_rel,
801
- alpha=alpha_rel,
802
- s=70,
803
- ax=ax,
804
- )
805
- elif rel_kind == "Line":
806
- sns.lineplot(
807
- data=df,
808
- x=x_rel,
809
- y=y_rel,
810
- hue=hue_rel,
811
- ax=ax,
812
- )
813
- else: # Regression
814
- sns.regplot(
815
- data=df,
816
- x=x_rel,
817
- y=y_rel,
818
- ax=ax,
819
- scatter_kws={"alpha": alpha_rel, "s": 60},
820
- line_kws={"linewidth": 2},
821
- )
822
-
823
- ax.set_title(
824
- f"{rel_kind}: {y_rel} vs {x_rel}",
825
- fontsize=13,
826
- fontweight="bold",
827
- )
828
- apply_dark(fig_seaborn, DARK)
829
- st.pyplot(fig_seaborn)
830
-
831
- if rel_kind == "Scatter":
832
- hue_part = f', hue="{hue_rel}"' if hue_rel else ""
833
- code_str = f"""fig, ax = plt.subplots(figsize=(10, 5))
834
- sns.scatterplot(
835
- data=df,
836
- x="{x_rel}",
837
- y="{y_rel}"{hue_part},
838
- alpha=0.7,
839
- s=70,
840
- ax=ax,
841
- )
842
- ax.set_title("Scatter: {y_rel} vs {x_rel}")
843
- plt.show()"""
844
- elif rel_kind == "Line":
845
- hue_part = f', hue="{hue_rel}"' if hue_rel else ""
846
- code_str = f"""fig, ax = plt.subplots(figsize=(10, 5))
847
- sns.lineplot(
848
- data=df,
849
- x="{x_rel}",
850
- y="{y_rel}"{hue_part},
851
- ax=ax,
852
- )
853
- ax.set_title("Line: {y_rel} vs {x_rel}")
854
- plt.show()"""
855
- else:
856
- code_str = f"""fig, ax = plt.subplots(figsize=(10, 5))
857
- sns.regplot(
858
- data=df,
859
- x="{x_rel}",
860
- y="{y_rel}",
861
- scatter_kws={{"alpha": 0.7, "s": 60}},
862
- line_kws={{"linewidth": 2}},
863
- ax=ax,
864
- )
865
- ax.set_title("Regression: {y_rel} vs {x_rel}")
866
- plt.show()"""
867
- description = "Relationship pattern: how two numeric variables move together."
868
-
869
- # ------- Category -------
870
- elif family == "Category" and categorical_cols_all and cat_var is not None:
871
- fig_seaborn, ax = plt.subplots(figsize=(10, 5))
872
-
873
- df_tmp = df.copy()
874
- top_cats = (
875
- df_tmp[cat_var]
876
- .value_counts()
877
- .head(order_top)
878
- .index
879
- )
880
- df_tmp = df_tmp[df_tmp[cat_var].isin(top_cats)]
881
-
882
- if cat_kind == "Count":
883
- sns.countplot(
884
- data=df_tmp,
885
- y=cat_var,
886
- order=top_cats,
887
- ax=ax,
888
- )
889
- for container in ax.containers:
890
- ax.bar_label(container, padding=3)
891
- elif cat_kind == "Bar (mean)":
892
- sns.barplot(
893
- data=df_tmp,
894
- y=cat_var,
895
- x=num_cat,
896
- order=top_cats,
897
- ax=ax,
898
- ci=95,
899
- )
900
- elif cat_kind == "Box":
901
- sns.boxplot(
902
- data=df_tmp,
903
- y=cat_var,
904
- x=num_cat,
905
- order=top_cats,
906
- ax=ax,
907
- )
908
- else: # Violin
909
- sns.violinplot(
910
- data=df_tmp,
911
- y=cat_var,
912
- x=num_cat,
913
- order=top_cats,
914
- ax=ax,
915
- )
916
-
917
- ax.set_title(
918
- f"{cat_kind} for {cat_var}",
919
- fontsize=13,
920
- fontweight="bold",
921
- )
922
- apply_dark(fig_seaborn, DARK)
923
- st.pyplot(fig_seaborn)
924
-
925
- if cat_kind == "Count":
926
- code_str = f"""fig, ax = plt.subplots(figsize=(10, 5))
927
- sns.countplot(
928
- data=df,
929
- y="{cat_var}",
930
- ax=ax,
931
- )
932
- ax.set_title("Count for {cat_var}")
933
- plt.show()"""
934
- elif cat_kind == "Bar (mean)":
935
- code_str = f"""fig, ax = plt.subplots(figsize=(10, 5))
936
- sns.barplot(
937
- data=df,
938
- y="{cat_var}",
939
- x="{num_cat}",
940
- ci=95,
941
- ax=ax,
942
- )
943
- ax.set_title("Mean {num_cat} by {cat_var}")
944
- plt.show()"""
945
- elif cat_kind == "Box":
946
- code_str = f"""fig, ax = plt.subplots(figsize=(10, 5))
947
- sns.boxplot(
948
- data=df,
949
- y="{cat_var}",
950
- x="{num_cat}",
951
- ax=ax,
952
- )
953
- ax.set_title("Box: {num_cat} by {cat_var}")
954
- plt.show()"""
955
- else:
956
- code_str = f"""fig, ax = plt.subplots(figsize=(10, 5))
957
- sns.violinplot(
958
- data=df,
959
- y="{cat_var}",
960
- x="{num_cat}",
961
- ax=ax,
962
- )
963
- ax.set_title("Violin: {num_cat} by {cat_var}")
964
- plt.show()"""
965
- description = "Category pattern: compare distributions or means across groups."
966
-
967
- # ------- Matrix / Heatmap -------
968
- elif family == "Matrix / Heatmap" and selected_hm:
969
- corr = df[selected_hm].corr()
970
- fig_seaborn, ax = plt.subplots(figsize=(7, 6))
971
- sns.heatmap(
972
- corr,
973
- annot=annot_hm,
974
- fmt=".2f",
975
- cmap="vlag",
976
- center=0 if center_zero else None,
977
- square=True,
978
- linewidths=1,
979
- cbar_kws={"shrink": 0.8},
980
- ax=ax,
981
- )
982
- ax.set_title("Correlation heatmap", fontsize=13, fontweight="bold")
983
- apply_dark(fig_seaborn, DARK)
984
- st.pyplot(fig_seaborn)
985
-
986
- center_value = "0" if center_zero else "None"
987
- code_str = f"""corr = df[{selected_hm}].corr()
988
- fig, ax = plt.subplots(figsize=(7, 6))
989
- sns.heatmap(
990
- corr,
991
- annot={annot_hm},
992
- fmt=".2f",
993
- cmap="vlag",
994
- center={center_value},
995
- square=True,
996
- linewidths=1,
997
- cbar_kws={{"shrink": 0.8}},
998
- ax=ax,
999
- )
1000
- ax.set_title("Correlation heatmap")
1001
- plt.show()"""
1002
- description = "Matrix pattern: scan many pairwise relationships at once."
1003
-
1004
- # ------- Multi-variable (pairplot) -------
1005
- elif family == "Multi-variable" and multi_vars:
1006
- sample_size = min(sample_n, len(df))
1007
- cols_to_use = multi_vars + ([hue_multi] if hue_multi else [])
1008
- df_sample = df[cols_to_use].dropna().sample(sample_size, random_state=42)
1009
-
1010
- with st.spinner("Building pairplot..."):
1011
- g = sns.pairplot(
1012
- df_sample,
1013
- vars=multi_vars,
1014
- hue=hue_multi,
1015
- corner=True,
1016
- diag_kind="kde",
1017
- plot_kws={"alpha": 0.6},
1018
- diag_kws={"alpha": 0.7},
1019
- )
1020
- g.fig.suptitle("Pairplot", y=1.01, fontweight="bold")
1021
- fig_seaborn = g.fig
1022
- apply_dark(fig_seaborn, DARK)
1023
- st.pyplot(fig_seaborn)
1024
-
1025
- code_str = f"""sample = df[{multi_vars + ([hue_multi] if hue_multi else [])}].dropna().sample({sample_n}, random_state=42)
1026
- g = sns.pairplot(
1027
- sample,
1028
- vars={multi_vars},
1029
- hue={repr(hue_multi)},
1030
- corner=True,
1031
- diag_kind="kde",
1032
- plot_kws={{"alpha": 0.6}},
1033
- )
1034
- g.fig.suptitle("Pairplot", y=1.01)
1035
- plt.show()"""
1036
- description = "Multi-variable view: every pair of variables in one grid."
1037
-
1038
- st.markdown("</div>", unsafe_allow_html=True)
1039
-
1040
- st.markdown("### Code preview")
1041
- if code_str:
1042
- show_code_example(code_str, description)
1043
-
1044
- if "fig_seaborn" in locals() and fig_seaborn is not None:
1045
- if st.button("Save last Seaborn plot to gallery", key="sb_save_gallery"):
1046
- save_to_gallery(fig_seaborn, f"Seaborn: {family}", "Seaborn builder plot")
1047
- st.success("Saved to gallery.")
1048
-
1049
- # ==================== TAB: MATPLOTLIB BUILDER ====================
1050
- with tab_mpl:
1051
- st.markdown("## Matplotlib builder")
1052
- st.markdown(
1053
- '<div class="info-box"><strong>Goal:</strong> Build Matplotlib plots with fine-grained control on axes and layouts.</div>',
1054
- unsafe_allow_html=True,
1055
- )
1056
-
1057
- if df.empty:
1058
- st.warning("No data loaded.")
1059
- else:
1060
- col_plot, col_ctrl = st.columns([7, 3])
1061
-
1062
- with col_ctrl:
1063
- st.markdown(
1064
- """
1065
- <div class="control-panel">
1066
- <div class="control-panel-header">PLOT SETUP</div>
1067
- </div>
1068
- """,
1069
- unsafe_allow_html=True,
1070
- )
1071
-
1072
- mpl_type = st.selectbox(
1073
- "Plot type",
1074
- [
1075
- "Line",
1076
- "Scatter",
1077
- "Bar",
1078
- "Histogram",
1079
- "Box",
1080
- "Subplots overview",
1081
- ],
1082
- key="mpl_type",
1083
- )
1084
-
1085
- code_mpl = ""
1086
- fig_mpl = None
1087
-
1088
- if mpl_type == "Line":
1089
- x_line = st.selectbox(
1090
- "X (numeric or index)",
1091
- ["index"] + numeric_cols_all,
1092
- key="mpl_line_x",
1093
- )
1094
- y_line = st.selectbox(
1095
- "Y (numeric)",
1096
- numeric_cols_all,
1097
- key="mpl_line_y",
1098
- )
1099
- marker = st.selectbox(
1100
- "Marker",
1101
- ["o", "s", "None"],
1102
- index=0,
1103
- key="mpl_line_marker",
1104
- )
1105
- use_grid = st.checkbox(
1106
- "Show grid",
1107
- value=True,
1108
- key="mpl_line_grid",
1109
- )
1110
-
1111
- elif mpl_type == "Scatter":
1112
- if len(numeric_cols_all) < 2:
1113
- st.error("Need at least two numeric columns for scatter.")
1114
- x_sc = st.selectbox(
1115
- "X (numeric)",
1116
- numeric_cols_all,
1117
- key="mpl_sc_x",
1118
- )
1119
- y_sc = st.selectbox(
1120
- "Y (numeric)",
1121
- [c for c in numeric_cols_all if c != x_sc],
1122
- key="mpl_sc_y",
1123
- )
1124
- color_by = None
1125
- if categorical_cols_all:
1126
- use_color = st.checkbox(
1127
- "Color by category",
1128
- value=False,
1129
- key="mpl_sc_use_color",
1130
- )
1131
- if use_color:
1132
- color_by = st.selectbox(
1133
- "Category",
1134
- categorical_cols_all,
1135
- key="mpl_sc_color_by",
1136
- )
1137
- alpha_sc = st.slider(
1138
- "Point transparency",
1139
- 0.1,
1140
- 1.0,
1141
- 0.7,
1142
- 0.05,
1143
- key="mpl_sc_alpha",
1144
- )
1145
- size_sc = st.slider(
1146
- "Point size",
1147
- 20,
1148
- 200,
1149
- 70,
1150
- key="mpl_sc_size",
1151
- )
1152
-
1153
- elif mpl_type == "Bar":
1154
- cat_for_bar = None
1155
- if categorical_cols_all:
1156
- cat_for_bar = st.selectbox(
1157
- "Category",
1158
- categorical_cols_all,
1159
- key="mpl_bar_cat",
1160
- )
1161
- else:
1162
- st.error("Need a categorical column for bar plot.")
1163
- num_for_bar = st.selectbox(
1164
- "Value",
1165
- numeric_cols_all,
1166
- key="mpl_bar_num",
1167
- )
1168
- agg_bar = st.selectbox(
1169
- "Aggregation",
1170
- ["mean", "sum", "count"],
1171
- key="mpl_bar_agg",
1172
- )
1173
- horiz = st.checkbox(
1174
- "Horizontal bars",
1175
- value=True,
1176
- key="mpl_bar_horiz",
1177
- )
1178
-
1179
- elif mpl_type == "Histogram":
1180
- num_hist = st.selectbox(
1181
- "Numeric column",
1182
- numeric_cols_all,
1183
- key="mpl_hist_num",
1184
- )
1185
- bins_hist = st.slider(
1186
- "Bins",
1187
- 5,
1188
- 80,
1189
- 30,
1190
- key="mpl_hist_bins",
1191
- )
1192
- density_hist = st.checkbox(
1193
- "Show density instead of counts",
1194
- value=False,
1195
- key="mpl_hist_density",
1196
- )
1197
-
1198
- elif mpl_type == "Box":
1199
- nums_box = st.multiselect(
1200
- "Numeric columns",
1201
- numeric_cols_all,
1202
- default=numeric_cols_all[: min(4, len(numeric_cols_all))],
1203
- key="mpl_box_nums",
1204
- )
1205
-
1206
- else: # Subplots overview
1207
- nums_over = st.multiselect(
1208
- "Numeric columns",
1209
- numeric_cols_all,
1210
- default=numeric_cols_all[: min(3, len(numeric_cols_all))],
1211
- key="mpl_over_nums",
1212
- )
1213
- use_kde = st.checkbox(
1214
- "Overlay KDE on histograms",
1215
- value=True,
1216
- key="mpl_over_kde",
1217
- )
1218
-
1219
- with col_plot:
1220
- st.markdown('<div class="plot-container">', unsafe_allow_html=True)
1221
-
1222
- if mpl_type == "Line":
1223
- if not numeric_cols_all:
1224
- st.error("No numeric columns for line plot.")
1225
- else:
1226
- if x_line == "index":
1227
- x_vals = np.arange(len(df))
1228
- x_label = "Index"
1229
- else:
1230
- x_vals = df[x_line].values
1231
- x_label = x_line
1232
- y_vals = df[y_line].values
1233
-
1234
- fig_mpl, ax = plt.subplots(figsize=(10, 5))
1235
- line_marker = None if marker == "None" else marker
1236
- ax.plot(x_vals, y_vals, marker=line_marker, lw=2)
1237
- ax.set_title(f"Line: {y_line} over {x_label}", fontsize=13, fontweight="bold")
1238
- ax.set_xlabel(x_label)
1239
- ax.set_ylabel(y_line)
1240
- if use_grid:
1241
- ax.grid(alpha=0.3)
1242
- apply_dark(fig_mpl, DARK)
1243
- st.pyplot(fig_mpl)
1244
-
1245
- code_mpl = f"""fig, ax = plt.subplots(figsize=(10, 5))
1246
- ax.plot(
1247
- { 'np.arange(len(df))' if x_line == "index" else f'df["{x_line}"]' },
1248
- df["{y_line}"],
1249
- marker={'None' if marker == "None" else repr(marker)},
1250
- lw=2,
1251
- )
1252
- ax.set_title("Line: {y_line} over {x_label}")
1253
- ax.set_xlabel("{x_label}")
1254
- ax.set_ylabel("{y_line}")
1255
- ax.grid(alpha=0.3)
1256
- plt.show()"""
1257
-
1258
- elif mpl_type == "Scatter":
1259
- if len(numeric_cols_all) < 2:
1260
- st.error("No numeric columns for scatter plot.")
1261
- else:
1262
- fig_mpl, ax = plt.subplots(figsize=(10, 5))
1263
- if color_by:
1264
- unique_vals = df[color_by].dropna().unique()
1265
- cmap = plt.get_cmap("tab10")
1266
- for idx, val in enumerate(unique_vals):
1267
- mask = df[color_by] == val
1268
- ax.scatter(
1269
- df.loc[mask, x_sc],
1270
- df.loc[mask, y_sc],
1271
- alpha=alpha_sc,
1272
- s=size_sc,
1273
- label=str(val),
1274
- color=cmap(idx % 10),
1275
- )
1276
- ax.legend(title=color_by)
1277
- else:
1278
- ax.scatter(
1279
- df[x_sc],
1280
- df[y_sc],
1281
- alpha=alpha_sc,
1282
- s=size_sc,
1283
- )
1284
- ax.set_title(f"Scatter: {y_sc} vs {x_sc}", fontsize=13, fontweight="bold")
1285
- ax.set_xlabel(x_sc)
1286
- ax.set_ylabel(y_sc)
1287
- ax.grid(alpha=0.3)
1288
- apply_dark(fig_mpl, DARK)
1289
- st.pyplot(fig_mpl)
1290
-
1291
- code_mpl = f"""fig, ax = plt.subplots(figsize=(10, 5))
1292
- ax.scatter(
1293
- df["{x_sc}"],
1294
- df["{y_sc}"],
1295
- alpha={alpha_sc},
1296
- s={size_sc},
1297
- )
1298
- ax.set_title("Scatter: {y_sc} vs {x_sc}")
1299
- ax.set_xlabel("{x_sc}")
1300
- ax.set_ylabel("{y_sc}")
1301
- ax.grid(alpha=0.3)
1302
- plt.show()"""
1303
-
1304
- elif mpl_type == "Bar":
1305
- if cat_for_bar is None:
1306
- st.error("Select a categorical column for the bar plot.")
1307
- else:
1308
- grouped = getattr(df.groupby(cat_for_bar)[num_for_bar], agg_bar)()
1309
- grouped = grouped.sort_values(ascending=True)
1310
- fig_mpl, ax = plt.subplots(figsize=(9, 5))
1311
- if horiz:
1312
- ax.barh(grouped.index, grouped.values)
1313
- ax.set_xlabel(num_for_bar)
1314
- ax.set_ylabel(cat_for_bar)
1315
- else:
1316
- ax.bar(grouped.index, grouped.values)
1317
- ax.set_ylabel(num_for_bar)
1318
- ax.set_xlabel(cat_for_bar)
1319
- plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
1320
- ax.set_title(f"{agg_bar} of {num_for_bar} by {cat_for_bar}", fontsize=13, fontweight="bold")
1321
- ax.grid(axis="x" if horiz else "y", alpha=0.3)
1322
- apply_dark(fig_mpl, DARK)
1323
- st.pyplot(fig_mpl)
1324
-
1325
- code_mpl = f"""grouped = df.groupby("{cat_for_bar}")["{num_for_bar}"].{agg_bar}().sort_values()
1326
- fig, ax = plt.subplots(figsize=(9, 5))
1327
- ax.barh(grouped.index, grouped.values) if {horiz} else ax.bar(grouped.index, grouped.values)
1328
- ax.set_title("{agg_bar} of {num_for_bar} by {cat_for_bar}")
1329
- plt.show()"""
1330
-
1331
- elif mpl_type == "Histogram":
1332
- fig_mpl, ax = plt.subplots(figsize=(9, 5))
1333
- ax.hist(
1334
- df[num_hist].dropna().values,
1335
- bins=bins_hist,
1336
- density=density_hist,
1337
- alpha=0.85,
1338
- )
1339
- ax.set_title(f"Histogram of {num_hist}", fontsize=13, fontweight="bold")
1340
- ax.set_xlabel(num_hist)
1341
- ax.set_ylabel("Density" if density_hist else "Count")
1342
- ax.grid(alpha=0.3)
1343
- apply_dark(fig_mpl, DARK)
1344
- st.pyplot(fig_mpl)
1345
-
1346
- code_mpl = f"""fig, ax = plt.subplots(figsize=(9, 5))
1347
- ax.hist(
1348
- df["{num_hist}"].dropna().values,
1349
- bins={bins_hist},
1350
- density={density_hist},
1351
- alpha=0.85,
1352
- )
1353
- ax.set_title("Histogram of {num_hist}")
1354
- ax.set_xlabel("{num_hist}")
1355
- ax.set_ylabel("{'Density' if density_hist else 'Count'}")
1356
- ax.grid(alpha=0.3)
1357
- plt.show()"""
1358
-
1359
- elif mpl_type == "Box":
1360
- if not nums_box:
1361
- st.warning("Select at least one numeric column.")
1362
- else:
1363
- fig_mpl, ax = plt.subplots(figsize=(10, 5))
1364
- ax.boxplot(
1365
- [df[c].dropna().values for c in nums_box],
1366
- labels=nums_box,
1367
- vert=True,
1368
- )
1369
- ax.set_title("Box plots", fontsize=13, fontweight="bold")
1370
- ax.grid(alpha=0.3)
1371
- apply_dark(fig_mpl, DARK)
1372
- st.pyplot(fig_mpl)
1373
-
1374
- code_mpl = f"""fig, ax = plt.subplots(figsize=(10, 5))
1375
- ax.boxplot(
1376
- [{', '.join([f'df["{c}"].dropna().values' for c in nums_box])}],
1377
- labels={nums_box},
1378
- )
1379
- ax.set_title("Box plots")
1380
- ax.grid(alpha=0.3)
1381
- plt.show()"""
1382
-
1383
- else: # Subplots overview
1384
- if not nums_over:
1385
- st.warning("Select at least one numeric column.")
1386
- else:
1387
- k = len(nums_over)
1388
- fig_mpl, axes = plt.subplots(
1389
- 1,
1390
- k,
1391
- figsize=(4 * k, 4),
1392
- squeeze=False,
1393
- )
1394
- for idx, col_name in enumerate(nums_over):
1395
- ax = axes[0, idx]
1396
- data = df[col_name].dropna().values
1397
- ax.hist(data, bins=30, alpha=0.8, density=True)
1398
- if use_kde and len(data) > 10:
1399
- x_vals = np.linspace(data.min(), data.max(), 200)
1400
- kde = stats.gaussian_kde(data)
1401
- ax.plot(x_vals, kde(x_vals), lw=2)
1402
- ax.set_title(col_name)
1403
- ax.grid(alpha=0.3)
1404
- fig_mpl.suptitle("Numeric overview", fontsize=13, fontweight="bold")
1405
- plt.tight_layout()
1406
- apply_dark(fig_mpl, DARK)
1407
- st.pyplot(fig_mpl)
1408
-
1409
- code_mpl = """cols = {cols}
1410
- fig, axes = plt.subplots(1, len(cols), figsize=(4 * len(cols), 4), squeeze=False)
1411
- for idx, name in enumerate(cols):
1412
- ax = axes[0, idx]
1413
- data = df[name].dropna().values
1414
- ax.hist(data, bins=30, density=True, alpha=0.8)
1415
- ax.set_title(name)
1416
- ax.grid(alpha=0.3)
1417
- plt.tight_layout()
1418
- plt.show()""".format(
1419
- cols=nums_over
1420
- )
1421
-
1422
- st.markdown("</div>", unsafe_allow_html=True)
1423
-
1424
- st.markdown("### Code preview")
1425
- if code_mpl:
1426
- show_code_example(code_mpl, "Matplotlib commands that reproduce the current plot.")
1427
-
1428
- if fig_mpl is not None:
1429
- if st.button("Save last Matplotlib plot to gallery", key="mpl_save_gallery"):
1430
- save_to_gallery(fig_mpl, f"Matplotlib: {mpl_type}", "Matplotlib builder plot")
1431
- st.success("Saved to gallery.")
1432
-
1433
- # ==================== TAB: COMPARE ====================
1434
- with tab_compare:
1435
- st.markdown("## Compare Seaborn and Matplotlib")
1436
- st.markdown(
1437
- '<div class="info-box"><strong>Goal:</strong> See the same idea expressed once with Seaborn and once with Matplotlib.</div>',
1438
- unsafe_allow_html=True,
1439
- )
1440
-
1441
- if df.empty or not numeric_cols_all:
1442
- st.warning("Need at least one numeric column in the dataset.")
1443
- else:
1444
- compare_kind = st.selectbox(
1445
- "Comparison pattern",
1446
- [
1447
- "Distribution (histogram + KDE)",
1448
- "Relationship (scatter)",
1449
- ],
1450
- key="cmp_kind",
1451
- )
1452
-
1453
- if compare_kind == "Distribution (histogram + KDE)":
1454
- num_cmp = st.selectbox(
1455
- "Numeric column",
1456
- numeric_cols_all,
1457
- key="cmp_dist_num",
1458
- )
1459
- hue_cmp = None
1460
- if categorical_cols_all:
1461
- use_hue_cmp = st.checkbox(
1462
- "Color by category (Seaborn only)",
1463
- value=False,
1464
- key="cmp_dist_use_hue",
1465
- )
1466
- if use_hue_cmp:
1467
- hue_cmp = st.selectbox(
1468
- "Hue",
1469
- categorical_cols_all,
1470
- key="cmp_dist_hue",
1471
- )
1472
-
1473
- col_s, col_m = st.columns(2)
1474
-
1475
- with col_s:
1476
- st.markdown("### Seaborn view")
1477
- fig_s, ax_s = plt.subplots(figsize=(7, 4))
1478
- sns.histplot(
1479
- data=df,
1480
- x=num_cmp,
1481
- hue=hue_cmp,
1482
- kde=True,
1483
- bins=30,
1484
- ax=ax_s,
1485
- )
1486
- ax_s.set_title("Seaborn: histogram + KDE", fontsize=12, fontweight="bold")
1487
- apply_dark(fig_s, DARK)
1488
- st.pyplot(fig_s)
1489
-
1490
- with col_m:
1491
- st.markdown("### Matplotlib view")
1492
- fig_m, ax_m = plt.subplots(figsize=(7, 4))
1493
- values = df[num_cmp].dropna().values
1494
- ax_m.hist(values, bins=30, alpha=0.85, density=True)
1495
- x_vals = np.linspace(values.min(), values.max(), 200)
1496
- kde = stats.gaussian_kde(values)
1497
- ax_m.plot(x_vals, kde(x_vals), lw=2)
1498
- ax_m.set_title("Matplotlib: histogram + KDE", fontsize=12, fontweight="bold")
1499
- ax_m.set_xlabel(num_cmp)
1500
- ax_m.set_ylabel("Density")
1501
- ax_m.grid(alpha=0.3)
1502
- apply_dark(fig_m, DARK)
1503
- st.pyplot(fig_m)
1504
-
1505
- if st.button("Save Seaborn comparison plot to gallery", key="cmp_dist_save"):
1506
- save_to_gallery(fig_s, "Compare: Distribution", "Seaborn vs Matplotlib distribution")
1507
- st.success("Saved Seaborn figure to gallery.")
1508
-
1509
- else: # Relationship (scatter)
1510
- if len(numeric_cols_all) < 2:
1511
- st.warning("Need at least two numeric columns.")
1512
- else:
1513
- x_cmp = st.selectbox(
1514
- "X",
1515
- numeric_cols_all,
1516
- key="cmp_rel_x",
1517
- )
1518
- y_cmp = st.selectbox(
1519
- "Y",
1520
- [c for c in numeric_cols_all if c != x_cmp],
1521
- key="cmp_rel_y",
1522
- )
1523
- hue_cmp_rel = None
1524
- if categorical_cols_all:
1525
- use_hue_cmp_rel = st.checkbox(
1526
- "Color by category (Seaborn only)",
1527
- value=False,
1528
- key="cmp_rel_use_hue",
1529
- )
1530
- if use_hue_cmp_rel:
1531
- hue_cmp_rel = st.selectbox(
1532
- "Hue",
1533
- categorical_cols_all,
1534
- key="cmp_rel_hue",
1535
- )
1536
-
1537
- col_s2, col_m2 = st.columns(2)
1538
-
1539
- with col_s2:
1540
- st.markdown("### Seaborn view")
1541
- fig_s2, ax_s2 = plt.subplots(figsize=(7, 4))
1542
- sns.scatterplot(
1543
- data=df,
1544
- x=x_cmp,
1545
- y=y_cmp,
1546
- hue=hue_cmp_rel,
1547
- alpha=0.7,
1548
- s=70,
1549
- ax=ax_s2,
1550
- )
1551
- ax_s2.set_title("Seaborn: scatterplot", fontsize=12, fontweight="bold")
1552
- apply_dark(fig_s2, DARK)
1553
- st.pyplot(fig_s2)
1554
-
1555
- with col_m2:
1556
- st.markdown("### Matplotlib view")
1557
- fig_m2, ax_m2 = plt.subplots(figsize=(7, 4))
1558
- ax_m2.scatter(df[x_cmp], df[y_cmp], alpha=0.7)
1559
- ax_m2.set_title("Matplotlib: scatter", fontsize=12, fontweight="bold")
1560
- ax_m2.set_xlabel(x_cmp)
1561
- ax_m2.set_ylabel(y_cmp)
1562
- ax_m2.grid(alpha=0.3)
1563
- apply_dark(fig_m2, DARK)
1564
- st.pyplot(fig_m2)
1565
-
1566
- if st.button("Save Seaborn comparison plot to gallery", key="cmp_rel_save"):
1567
- save_to_gallery(fig_s2, "Compare: Relationship", "Seaborn vs Matplotlib scatter")
1568
- st.success("Saved Seaborn figure to gallery.")
1569
-
1570
- # ==================== TAB: GALLERY ====================
1571
- with tab_gallery:
1572
- st.markdown("## Gallery")
1573
-
1574
- if not st.session_state["gallery"]:
1575
- st.info("Gallery is empty. Build a plot in any tab and save it here.")
1576
- st.markdown(
1577
- """
1578
- **How this gallery works**
1579
-
1580
- 1. Create a visualization in one of the tabs
1581
- 2. Click the **Save to gallery** button
1582
- 3. Return here to review the saved visuals
1583
- 4. Download individual PNG files or a ZIP archive
1584
- """
1585
- )
1586
- else:
1587
- st.success(f"{len(st.session_state['gallery'])} visualizations stored.")
1588
-
1589
- col_zip, col_clear, _ = st.columns([2, 2, 1])
1590
-
1591
- with col_zip:
1592
- if st.button("Prepare ZIP archive", key="gal_zip_btn", use_container_width=True):
1593
- zip_buf = io.BytesIO()
1594
- with zipfile.ZipFile(zip_buf, "w", zipfile.ZIP_DEFLATED) as zf:
1595
- for idx, item in enumerate(st.session_state["gallery"]):
1596
- filename = f"{idx+1:02d}_{item['name'].replace(' ', '_')}.png"
1597
- zf.writestr(filename, item["image"])
1598
-
1599
- st.download_button(
1600
- "Download ZIP",
1601
- data=zip_buf.getvalue(),
1602
- file_name=f"visual_lab_gallery_{datetime.now():%Y%m%d_%H%M%S}.zip",
1603
- mime="application/zip",
1604
- use_container_width=True,
1605
- key="gal_zip_dl",
1606
- )
1607
-
1608
- with col_clear:
1609
- if st.button("Clear gallery", key="gal_clear_btn", use_container_width=True):
1610
- st.session_state["gallery"] = []
1611
- st.rerun()
1612
-
1613
- st.markdown("---")
1614
-
1615
- cols_per_row = 2
1616
- for i in range(0, len(st.session_state["gallery"]), cols_per_row):
1617
- cols = st.columns(cols_per_row)
1618
- for j, c in enumerate(cols):
1619
- item_idx = i + j
1620
- if item_idx < len(st.session_state["gallery"]):
1621
- item = st.session_state["gallery"][item_idx]
1622
- with c:
1623
- st.markdown('<div class="plot-container">', unsafe_allow_html=True)
1624
- st.image(item["image"], use_container_width=True)
1625
- st.markdown(f"**{item['name']}**")
1626
- st.caption(item["description"])
1627
- st.caption(
1628
- f"Saved at {item['timestamp'].strftime('%Y-%m-%d %H:%M')}"
1629
- )
1630
- st.download_button(
1631
- "Download PNG",
1632
- data=item["image"],
1633
- file_name=f"{item['name'].replace(' ', '_')}.png",
1634
- mime="image/png",
1635
- key=f"gal_dl_{item_idx}",
1636
- use_container_width=True,
1637
- )
1638
- st.markdown("</div>", unsafe_allow_html=True)
1639
-
1640
- # ==================== FOOTER ====================
1641
- st.markdown("---")
1642
- st.markdown("### Quick reference")
1643
-
1644
- col_f1, col_f2, col_f3 = st.columns(3)
1645
- with col_f1:
1646
- st.markdown(
1647
- """
1648
- **Distribution**
1649
- - Histogram / KDE / ECDF
1650
- - Box / Violin
1651
- """
1652
- )
1653
- with col_f2:
1654
- st.markdown(
1655
- """
1656
- **Relationships & groups**
1657
- - Scatter / Regression / Line
1658
- - Category summaries
1659
- """
1660
- )
1661
- with col_f3:
1662
- st.markdown(
1663
- """
1664
- **Matrix & multi-view**
1665
- - Correlation heatmaps
1666
- - Pairplot grids
1667
- """
1668
- )