ayushsahu45 commited on
Commit
9746dae
·
verified ·
1 Parent(s): d7df271

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -766
app.py DELETED
@@ -1,766 +0,0 @@
1
- """
2
- Multi-AI Analytics Platform — app.py
3
- Author: Ayush
4
- Run: streamlit run app.py
5
- """
6
-
7
- import streamlit as st
8
- import pandas as pd
9
- import numpy as np
10
- from PIL import Image
11
- import os
12
- import sys
13
- import json
14
- from pathlib import Path
15
-
16
- ROOT = Path(__file__).parent
17
- sys.path.insert(0, str(ROOT))
18
-
19
- from config import Config, OUTPUT_DIR
20
- from data.data_loader import DataLoader
21
- from data.powerbi_export import PowerBIExporter
22
- from models.ml_models import MLPipeline, XGBoostPipeline, EnsemblePipeline
23
- from models.generative_ai import GenerativeAI
24
- from utils.helpers import (
25
- create_feature_importance_chart,
26
- create_metrics_dashboard,
27
- create_confusion_matrix,
28
- create_correlation_heatmap,
29
- create_class_distribution,
30
- create_actual_vs_predicted,
31
- create_scatter_plot,
32
- ChartGenerator,
33
- )
34
-
35
- try:
36
- from models.deep_learning import DeepLearningPipeline, ImageClassifier, NLPClassifier
37
- DL_IMPORT_OK = True
38
- except Exception as _dl_err:
39
- DeepLearningPipeline = None
40
- ImageClassifier = None
41
- NLPClassifier = None
42
- DL_IMPORT_OK = False
43
-
44
- try:
45
- from models.ml_models import LightGBMPipeline
46
- LGB_OK = True
47
- except Exception:
48
- LightGBMPipeline = None
49
- LGB_OK = False
50
-
51
- st.set_page_config(
52
- page_title="Multi-AI Analytics Platform",
53
- page_icon="🤖",
54
- layout="wide",
55
- initial_sidebar_state="expanded",
56
- )
57
-
58
- st.markdown("""
59
- <style>
60
- @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;600&display=swap');
61
-
62
- html, body, [class*="css"] { font-family: 'Space Grotesk', sans-serif; }
63
-
64
- .main { background: #0d0d1a; }
65
-
66
- .main-header {
67
- font-size: 2.6rem; font-weight: 700;
68
- background: linear-gradient(135deg, #6C63FF 0%, #00D9FF 100%);
69
- -webkit-background-clip: text; -webkit-text-fill-color: transparent;
70
- text-align: center; margin-bottom: 0.3rem;
71
- }
72
- .sub-header {
73
- font-size: 1.05rem; color: #8888aa;
74
- text-align: center; margin-bottom: 1.5rem; letter-spacing: 0.05em;
75
- }
76
-
77
- section[data-testid="stSidebar"] { background: #0f0f1e !important; }
78
-
79
- .stTabs [data-baseweb="tab-list"] { gap: 1rem; background: transparent; }
80
- .stTabs [data-baseweb="tab"] {
81
- padding: 0.6rem 1.4rem; border-radius: 8px;
82
- font-weight: 600; color: #8888aa;
83
- }
84
- .stTabs [aria-selected="true"] {
85
- background: linear-gradient(135deg, #6C63FF22, #00D9FF22) !important;
86
- color: #6C63FF !important; border-bottom: 2px solid #6C63FF;
87
- }
88
-
89
- .stButton > button {
90
- background: linear-gradient(135deg, #6C63FF, #00D9FF);
91
- color: white; border: none; border-radius: 8px;
92
- font-weight: 600; transition: opacity 0.2s;
93
- }
94
- .stButton > button:hover { opacity: 0.85; }
95
-
96
- [data-testid="stMetricValue"] { color: #6C63FF !important; font-weight: 700; }
97
-
98
- .footer-text { text-align: center; color: #555577; font-size: 13px; }
99
- .brand { color: #6C63FF; font-weight: 700; font-family: 'JetBrains Mono', monospace; }
100
- </style>
101
- """, unsafe_allow_html=True)
102
-
103
- st.markdown('<h1 class="main-header">🤖 Multi-AI Analytics Platform</h1>', unsafe_allow_html=True)
104
- st.markdown(
105
- '<p class="sub-header">Machine Learning &nbsp;•&nbsp; Deep Learning &nbsp;•&nbsp; Generative AI &nbsp;•&nbsp; Power BI Export</p>',
106
- unsafe_allow_html=True,
107
- )
108
-
109
- with st.sidebar:
110
- st.markdown("## ⚙️ Settings")
111
- st.divider()
112
-
113
- st.markdown("### 🔑 Generative AI")
114
- provider_choice = st.selectbox(
115
- "Provider",
116
- ["openai", "google", "anthropic"],
117
- format_func=lambda x: {"openai": "OpenAI GPT-4", "google": "Google Gemini", "anthropic": "Anthropic Claude"}[x],
118
- )
119
- api_key_input = st.text_input("API Key", type="password", placeholder="Paste your key here…")
120
-
121
- st.divider()
122
- st.markdown("### ℹ️ Status")
123
- st.markdown(f"🐍 Python: `{sys.version.split()[0]}`")
124
- try:
125
- import torch
126
- st.markdown(f"🔥 PyTorch: `{torch.__version__}`")
127
- except ImportError:
128
- st.markdown("🔥 PyTorch: ❌ not installed")
129
- try:
130
- import sklearn
131
- st.markdown(f"🤖 sklearn: `{sklearn.__version__}`")
132
- except ImportError:
133
- st.markdown("🤖 sklearn: ❌")
134
- try:
135
- import xgboost
136
- st.markdown(f"⚡ XGBoost: `{xgboost.__version__}`")
137
- except ImportError:
138
- st.markdown("⚡ XGBoost: ❌")
139
-
140
- st.divider()
141
- if st.button("🔄 Reset All", width="stretch"):
142
- for k in list(st.session_state.keys()):
143
- del st.session_state[k]
144
- st.rerun()
145
-
146
- def _init_session():
147
- defaults = {
148
- "data_loader": DataLoader(),
149
- "powerbi_exporter": PowerBIExporter(OUTPUT_DIR),
150
- "df": None,
151
- "data_summary": None,
152
- "ml_pipeline": None,
153
- "ml_results": None,
154
- "ml_metrics": None,
155
- "dl_pipeline": None,
156
- "gen_ai": GenerativeAI(),
157
- "dl_results": None,
158
- "uploaded_image": None,
159
- "target_column": None,
160
- }
161
- for k, v in defaults.items():
162
- if k not in st.session_state:
163
- st.session_state[k] = v
164
-
165
- if st.session_state.dl_pipeline is None and DL_IMPORT_OK:
166
- try:
167
- st.session_state.dl_pipeline = DeepLearningPipeline()
168
- except Exception:
169
- st.session_state.dl_pipeline = None
170
-
171
- _init_session()
172
-
173
- if api_key_input:
174
- st.session_state.gen_ai = GenerativeAI(api_key=api_key_input, provider=provider_choice)
175
-
176
- def render_metrics_row(metrics: dict):
177
- numeric = {k: v for k, v in metrics.items() if isinstance(v, (int, float)) and not isinstance(v, bool)}
178
- if not numeric:
179
- return
180
- cols = st.columns(min(len(numeric), 4))
181
- for idx, (k, v) in enumerate(list(numeric.items())[:4]):
182
- cols[idx % 4].metric(
183
- k.replace("_", " ").title(),
184
- f"{v:.4f}" if isinstance(v, float) else str(v),
185
- )
186
-
187
- def section_header(icon: str, title: str):
188
- st.markdown(f"### {icon} {title}")
189
-
190
- tab1, tab2, tab3, tab4, tab5 = st.tabs([
191
- "📊 Data", "🧠 ML Pipeline", "🔮 Deep Learning", "💡 Generative AI", "📤 Power BI"
192
- ])
193
-
194
- with tab1:
195
- section_header("📊", "Data Loading & Exploration")
196
-
197
- col_up, col_sum = st.columns([2, 1])
198
-
199
- with col_up:
200
- uploaded_file = st.file_uploader(
201
- "Upload CSV, Excel, JSON, or Image",
202
- type=["csv", "xlsx", "xls", "json", "png", "jpg", "jpeg", "bmp", "webp"],
203
- help="Supports tabular data and images.",
204
- )
205
-
206
- if uploaded_file:
207
- try:
208
- if uploaded_file.type.startswith("image"):
209
- img = Image.open(uploaded_file).convert("RGB")
210
- st.image(img, caption=uploaded_file.name, use_column_width=False, width=400)
211
- st.session_state.uploaded_image = img
212
- st.success(f"✅ Image loaded: **{uploaded_file.name}** ({img.size[0]}×{img.size[1]} px)")
213
- else:
214
- name = uploaded_file.name.lower()
215
- if name.endswith(".csv"):
216
- df = pd.read_csv(uploaded_file)
217
- elif name.endswith((".xlsx", ".xls")):
218
- df = pd.read_excel(uploaded_file)
219
- elif name.endswith(".json"):
220
- df = pd.read_json(uploaded_file)
221
- else:
222
- txt = uploaded_file.read().decode("utf-8")
223
- st.text_area("📄 Text Content", txt, height=200)
224
- df = None
225
-
226
- if df is not None:
227
- st.session_state.df = df
228
- st.session_state.data_summary = st.session_state.data_loader.get_data_summary(df)
229
- st.success(f"✅ **{uploaded_file.name}** — {df.shape[0]:,} rows × {df.shape[1]} columns")
230
-
231
- except Exception as e:
232
- st.error(f"❌ Load error: {e}")
233
-
234
- with col_sum:
235
- section_header("📋", "Summary")
236
- if st.session_state.data_summary:
237
- s = st.session_state.data_summary
238
- st.metric("Rows", f"{s['row_count']:,}")
239
- st.metric("Columns", s["features"])
240
- st.metric("Missing Values", sum(v for v in s["missing_values"].values() if isinstance(v, (int, float))))
241
- st.metric("Duplicate Rows", s.get("duplicate_rows", 0))
242
-
243
- if st.session_state.df is not None:
244
- df = st.session_state.df
245
- st.divider()
246
-
247
- t_prev, t_info, t_stats, t_corr, t_dist = st.tabs(
248
- ["🔍 Preview", "📋 Column Info", "📊 Statistics", "🔗 Correlation", "📈 Distributions"]
249
- )
250
-
251
- with t_prev:
252
- n = st.slider("Rows to preview", 5, min(100, len(df)), 10, key="prev_n")
253
- st.dataframe(df.head(n), width="stretch")
254
-
255
- with t_info:
256
- info_df = pd.DataFrame({
257
- "Column": df.columns,
258
- "Type": df.dtypes.astype(str).values,
259
- "Non-Null": df.count().values,
260
- "Null": df.isnull().sum().values,
261
- "Null %": (df.isnull().sum() / len(df) * 100).round(1).values,
262
- "Unique": df.nunique().values,
263
- })
264
- st.dataframe(info_df, width="stretch")
265
-
266
- with t_stats:
267
- numeric_df = df.select_dtypes(include=[np.number])
268
- if not numeric_df.empty:
269
- st.dataframe(numeric_df.describe().round(4), width="stretch")
270
- else:
271
- st.info("No numeric columns to describe.")
272
-
273
- with t_corr:
274
- numeric_df = df.select_dtypes(include=[np.number])
275
- if numeric_df.shape[1] >= 2:
276
- fig_corr = create_correlation_heatmap(df)
277
- st.plotly_chart(fig_corr, width="stretch")
278
- else:
279
- st.info("Need at least 2 numeric columns for correlation.")
280
-
281
- with t_dist:
282
- num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
283
- cat_cols = df.select_dtypes(include=["object", "category"]).columns.tolist()
284
- if num_cols:
285
- sel_col = st.selectbox("Select column", num_cols + cat_cols, key="dist_col")
286
- if df[sel_col].dtype == "object" or df[sel_col].nunique() <= 20:
287
- fig_d = create_class_distribution(df[sel_col], title=f"Distribution: {sel_col}")
288
- else:
289
- import plotly.express as px
290
- fig_d = px.histogram(df, x=sel_col, title=f"Distribution: {sel_col}",
291
- template="plotly_dark", color_discrete_sequence=["#6C63FF"])
292
- st.plotly_chart(fig_d, width="stretch")
293
- else:
294
- st.info("No columns available for distribution plot.")
295
-
296
- with tab2:
297
- section_header("🧠", "Machine Learning Pipeline")
298
-
299
- if st.session_state.df is None:
300
- st.info("👆 Load a dataset in the **Data** tab first.")
301
- else:
302
- df = st.session_state.df
303
-
304
- c1, c2, c3 = st.columns(3)
305
- with c1:
306
- target_col = st.selectbox("🎯 Target Column", df.columns, key="ml_target")
307
- st.session_state.target_column = target_col
308
- unique_count = df[target_col].nunique()
309
- auto_task = st.session_state.data_loader.detect_task_type(df[target_col])
310
- st.caption(f"Unique values: **{unique_count}** — Suggested: **{auto_task}**")
311
-
312
- with c2:
313
- task_type = st.selectbox(
314
- "📊 Task Type", ["classification", "regression"],
315
- index=0 if auto_task == "classification" else 1,
316
- )
317
-
318
- with c3:
319
- model_options = ["Random Forest", "Gradient Boosting", "Logistic Regression", "SVM", "Ensemble"]
320
- if "xgboost" in sys.modules or True:
321
- try:
322
- import xgboost
323
- model_options.insert(2, "XGBoost")
324
- except ImportError:
325
- pass
326
- if LGB_OK:
327
- model_options.insert(3, "LightGBM")
328
-
329
- model_choice = st.selectbox("🤖 Model", model_options)
330
-
331
- c4, c5 = st.columns(2)
332
- with c4:
333
- test_size = st.slider("Test Split", 0.1, 0.4, 0.2, 0.05)
334
- with c5:
335
- show_importance = st.checkbox("Show Feature Importance", value=True)
336
-
337
- with st.expander("📊 Target Distribution", expanded=False):
338
- fig_target = create_class_distribution(df[target_col], title=f"Target: {target_col}")
339
- st.plotly_chart(fig_target, width="stretch")
340
-
341
- if st.button("🚀 Train Model", type="primary", width="stretch"):
342
- with st.spinner(f"Training **{model_choice}** ({task_type})…"):
343
- try:
344
- X_raw = df.drop(columns=[target_col])
345
- y_raw = df[target_col]
346
-
347
- if model_choice == "XGBoost":
348
- pipeline = XGBoostPipeline(task_type=task_type)
349
- elif model_choice == "LightGBM" and LGB_OK:
350
- pipeline = LightGBMPipeline(task_type=task_type)
351
- elif model_choice == "Ensemble":
352
- pipeline = EnsemblePipeline(task_type=task_type)
353
- else:
354
- pipeline = MLPipeline(task_type=task_type, model_name=model_choice)
355
-
356
- if task_type == "classification":
357
- from sklearn.preprocessing import LabelEncoder as _LE
358
- _le = _LE()
359
- y = pd.Series(_le.fit_transform(y_raw.astype(str)), name=target_col)
360
- pipeline.label_encoder = _le
361
- pipeline.classes_ = _le.classes_
362
- else:
363
- y = y_raw.astype(float)
364
-
365
- X_proc = pipeline._preprocess_X(
366
- pd.get_dummies(X_raw.select_dtypes(exclude=["datetime64"]), drop_first=True),
367
- fit=True,
368
- )
369
- pipeline.feature_names = pd.get_dummies(
370
- X_raw.select_dtypes(exclude=["datetime64"]), drop_first=True
371
- ).columns.tolist()
372
-
373
- metrics = pipeline.train(X_proc, y.values, test_size=test_size)
374
-
375
- st.session_state.ml_pipeline = pipeline
376
- st.session_state.ml_metrics = metrics
377
- st.session_state.ml_results = {
378
- "metrics": metrics,
379
- "feature_importance": pipeline.get_feature_importance().to_dict("records") if show_importance else [],
380
- "model_name": model_choice,
381
- "task_type": task_type,
382
- "y_test": pipeline.y_test.tolist() if pipeline.y_test is not None else [],
383
- "y_pred": pipeline.y_pred.tolist() if pipeline.y_pred is not None else [],
384
- }
385
-
386
- st.success(f"🎉 **{model_choice}** trained successfully!")
387
-
388
- except Exception as e:
389
- st.error(f"❌ Training failed: {e}")
390
- import traceback
391
- with st.expander("🔍 Error details"):
392
- st.code(traceback.format_exc())
393
-
394
- if st.session_state.ml_results:
395
- res = st.session_state.ml_results
396
- metrics = res["metrics"]
397
-
398
- st.divider()
399
- st.subheader(f"📈 Results — {res.get('model_name', 'Model')}")
400
- render_metrics_row(metrics)
401
-
402
- r1, r2 = st.tabs(["📊 Charts", "📋 Report"])
403
-
404
- with r1:
405
- ch1, ch2 = st.columns(2)
406
- with ch1:
407
- fig_metrics = create_metrics_dashboard(metrics)
408
- st.plotly_chart(fig_metrics, width="stretch")
409
-
410
- with ch2:
411
- if res.get("feature_importance"):
412
- fig_imp = create_feature_importance_chart(
413
- pd.DataFrame(res["feature_importance"]), top_n=15
414
- )
415
- st.plotly_chart(fig_imp, width="stretch")
416
-
417
- y_test = res.get("y_test", [])
418
- y_pred = res.get("y_pred", [])
419
-
420
- if y_test and y_pred:
421
- if res["task_type"] == "classification":
422
- classes = (
423
- [str(c) for c in st.session_state.ml_pipeline.classes_]
424
- if st.session_state.ml_pipeline and st.session_state.ml_pipeline.classes_ is not None
425
- else None
426
- )
427
- unique_encoded = sorted(set(y_test + y_pred))
428
- if classes and len(classes) >= len(unique_encoded):
429
- label_map = {i: c for i, c in enumerate(classes)}
430
- y_test_labels = [label_map.get(v, str(v)) for v in y_test]
431
- y_pred_labels = [label_map.get(v, str(v)) for v in y_pred]
432
- display_classes = [label_map.get(i, str(i)) for i in unique_encoded]
433
- else:
434
- y_test_labels = [str(v) for v in y_test]
435
- y_pred_labels = [str(v) for v in y_pred]
436
- display_classes = [str(v) for v in unique_encoded]
437
-
438
- fig_cm = create_confusion_matrix(y_test_labels, y_pred_labels, labels=display_classes)
439
- st.plotly_chart(fig_cm, width="stretch")
440
- else:
441
- fig_av = create_actual_vs_predicted(y_test, y_pred)
442
- st.plotly_chart(fig_av, width="stretch")
443
-
444
- with r2:
445
- if "classification_report" in metrics:
446
- st.text(metrics["classification_report"])
447
- if "confusion_matrix" in metrics:
448
- with st.expander("Raw Confusion Matrix"):
449
- st.write(np.array(metrics["confusion_matrix"]))
450
- with st.expander("All Metrics (JSON)"):
451
- safe_metrics = {k: v for k, v in metrics.items() if isinstance(v, (int, float, str))}
452
- st.json(safe_metrics)
453
-
454
- if st.button("📤 Export Predictions to CSV"):
455
- try:
456
- pipeline = st.session_state.ml_pipeline
457
- X_export = df.drop(columns=[target_col])
458
- X_proc_export = pipeline._preprocess_X(
459
- pd.get_dummies(X_export.select_dtypes(exclude=["datetime64"]), drop_first=True),
460
- fit=False,
461
- )
462
- preds = pipeline.predict(X_proc_export)
463
- export_df = df.copy()
464
- export_df["prediction"] = preds
465
- csv_path = st.session_state.powerbi_exporter.export_to_csv(export_df, "ml_predictions")
466
- st.success(f"✅ Saved → `{csv_path}`")
467
- st.download_button(
468
- "⬇️ Download CSV",
469
- export_df.to_csv(index=False).encode(),
470
- file_name="ml_predictions.csv",
471
- mime="text/csv",
472
- )
473
- except Exception as e:
474
- st.error(f"❌ Export error: {e}")
475
-
476
- with tab3:
477
- section_header("🔮", "Advanced AI — Deep Learning")
478
-
479
- dl = st.session_state.dl_pipeline
480
-
481
- if dl is None:
482
- if DL_IMPORT_OK:
483
- try:
484
- st.session_state.dl_pipeline = DeepLearningPipeline()
485
- dl = st.session_state.dl_pipeline
486
- except Exception as e:
487
- st.error(f"Could not initialise DL pipeline: {e}")
488
- else:
489
- st.warning("⚠️ Deep learning import failed.")
490
-
491
- if dl is not None:
492
- status = dl.get_status()
493
- c1, c2, c3 = st.columns(3)
494
- c1.metric("PyTorch", "✅ Available" if status["torch_available"] else "❌ Not installed")
495
- c2.metric("Transformers", "✅ Available" if status["transformers_available"] else "⚠️ Fallback mode")
496
- c3.metric("Image / Text", "✅ Ready")
497
-
498
- if not status["torch_available"]:
499
- st.info("📦 Install PyTorch for GPU-accelerated inference")
500
-
501
- dl_option = st.radio(
502
- "Select Feature",
503
- ["🖼️ Image Classification", "📝 Text Embeddings", "🏷️ Text Classification"],
504
- horizontal=True,
505
- )
506
-
507
- if dl_option == "🖼️ Image Classification":
508
- ci1, ci2 = st.columns(2)
509
- with ci1:
510
- section_header("📤", "Input Image")
511
- if st.session_state.uploaded_image:
512
- st.image(st.session_state.uploaded_image, caption="Loaded from Data tab", width=300)
513
- else:
514
- local_img = st.file_uploader("Upload image here", type=["jpg", "jpeg", "png", "bmp", "webp"], key="dl_img")
515
- if local_img:
516
- img = Image.open(local_img).convert("RGB")
517
- st.session_state.uploaded_image = img
518
- st.image(img, width=300)
519
-
520
- model_choice_dl = st.selectbox("Model", ["resnet50", "efficientnet_b0", "mobilenet_v3"])
521
-
522
- if st.button("🔍 Classify", type="primary"):
523
- if st.session_state.uploaded_image and dl:
524
- with st.spinner("Running inference…"):
525
- try:
526
- dl.image_classifier.model_name = model_choice_dl
527
- dl.image_classifier.model = None
528
- results = dl.process_image(st.session_state.uploaded_image)
529
- st.session_state.dl_results = results
530
- st.success("✅ Done!")
531
- except Exception as e:
532
- st.error(f"❌ {e}")
533
- else:
534
- st.warning("⚠️ Upload an image first.")
535
-
536
- with ci2:
537
- section_header("📊", "Predictions")
538
- if st.session_state.dl_results and st.session_state.dl_results.get("type") == "image":
539
- preds = st.session_state.dl_results.get("predictions", [])
540
- if preds:
541
- import plotly.graph_objects as go
542
- labels = [p.get("label", f"class_{p.get('class_id','?')}") for p in preds]
543
- probs = [p.get("probability", 0) for p in preds]
544
- fig_pred = go.Figure(go.Bar(
545
- x=probs, y=labels, orientation="h",
546
- marker=dict(color=probs, colorscale="Viridis"),
547
- ))
548
- fig_pred.update_layout(
549
- title="Class Probabilities", yaxis=dict(autorange="reversed"),
550
- template="plotly_dark", paper_bgcolor="rgba(0,0,0,0)",
551
- )
552
- st.plotly_chart(fig_pred, width="stretch")
553
- else:
554
- st.info("No predictions yet.")
555
-
556
- elif dl_option == "📝 Text Embeddings":
557
- section_header("🔢", "Generate Text Embeddings")
558
- st.info("Text embeddings map words/sentences into high-dimensional vectors.")
559
-
560
- text_input = st.text_area("Enter text", "The quick brown fox jumps over the lazy dog.", height=120)
561
- compare_text = st.text_input("Compare with (optional)", "")
562
-
563
- if st.button("⚡ Generate Embeddings", type="primary"):
564
- with st.spinner("Processing…"):
565
- try:
566
- results = dl.process_text(text_input)
567
- st.session_state.dl_results = results
568
- st.success(f"✅ Embeddings generated via **{results.get('method', 'unknown')}**")
569
-
570
- cm1, cm2, cm3 = st.columns(3)
571
- cm1.metric("Shape", str(results.get("embeddings_shape", "N/A")))
572
- cm2.metric("Dimension", results.get("embedding_dim", "N/A"))
573
- cm3.metric("Device", results.get("device", "N/A"))
574
-
575
- if compare_text:
576
- sim = dl.text_embedder.semantic_similarity(text_input, compare_text)
577
- st.metric("🔗 Cosine Similarity", f"{sim:.4f}")
578
-
579
- except Exception as e:
580
- st.error(f"❌ {e}")
581
-
582
- elif dl_option == "🏷️ Text Classification":
583
- section_header("🎯", "Train Text Classifier")
584
-
585
- if st.session_state.df is not None:
586
- df = st.session_state.df
587
- text_cols = df.select_dtypes(include=["object"]).columns.tolist()
588
-
589
- if text_cols:
590
- tc1, tc2 = st.columns(2)
591
- with tc1:
592
- text_col = st.selectbox("Text Column", text_cols)
593
- with tc2:
594
- label_col = st.selectbox("Label Column", df.columns.tolist())
595
-
596
- epochs = st.slider("Training Epochs", 1, 10, 3, key="nlp_epochs")
597
-
598
- if st.button("🎓 Train Classifier", type="primary"):
599
- with st.spinner("Training…"):
600
- try:
601
- texts = df[text_col].dropna().astype(str).tolist()
602
- labels = df[label_col].dropna().astype(str).tolist()
603
- min_len = min(len(texts), len(labels))
604
- texts, labels = texts[:min_len], labels[:min_len]
605
-
606
- if min_len < 5:
607
- st.error("❌ Need at least 5 samples.")
608
- else:
609
- dl.nlp_classifier.train_classifier(texts, labels, epochs=epochs)
610
- st.success("🎉 Classifier trained!")
611
-
612
- sample = texts[:3]
613
- preds = dl.nlp_classifier.predict(sample)
614
- st.subheader("Sample Predictions")
615
- for t, p in zip(sample, preds):
616
- st.markdown(f"**Text:** {t[:80]}… \n**→ {p['predicted_class']}** ({p['confidence']:.2%})")
617
- except Exception as e:
618
- st.error(f"❌ {e}")
619
-
620
- st.divider()
621
- custom_text = st.text_area("🔍 Classify custom text", key="nlp_custom")
622
- if st.button("Predict", key="nlp_predict") and custom_text:
623
- try:
624
- result = dl.nlp_classifier.predict([custom_text])
625
- if result:
626
- st.metric("Predicted Class", result[0]["predicted_class"])
627
- st.metric("Confidence", f"{result[0]['confidence']:.2%}")
628
- st.json(result[0]["probabilities"])
629
- except Exception as e:
630
- st.error(f"❌ {e}")
631
- else:
632
- st.warning("⚠️ No text columns found in the loaded dataset.")
633
- else:
634
- st.info("👆 Load a dataset with text columns in the **Data** tab first.")
635
-
636
- with tab4:
637
- section_header("💡", "Generative AI Integration")
638
-
639
- gen_ai = st.session_state.gen_ai
640
- if gen_ai.is_available():
641
- st.success(f"✅ **{gen_ai._provider_config['name']}** connected")
642
- else:
643
- st.warning("⚠️ No API key configured — running in offline fallback mode.")
644
- st.info(f"Add your key in the sidebar. Get one from: {gen_ai._provider_config['url']}")
645
-
646
- gen_option = st.selectbox(
647
- "Analysis Type",
648
- ["General Insights", "Trends Analysis", "Anomaly Detection", "Recommendations", "Generate Report", "Q&A"],
649
- )
650
-
651
- analysis_map = {
652
- "General Insights": "general",
653
- "Trends Analysis": "trends_analysis",
654
- "Anomaly Detection": "anomaly_detection",
655
- "Recommendations": "recommendations",
656
- }
657
-
658
- if gen_option in analysis_map:
659
- if st.button("✨ Generate", type="primary"):
660
- if st.session_state.data_summary:
661
- with st.spinner("🤖 Generating…"):
662
- insights = gen_ai.generate_insights(
663
- st.session_state.data_summary, analysis_map[gen_option]
664
- )
665
- st.markdown("### 📝 Result")
666
- st.markdown(insights)
667
- else:
668
- st.warning("⚠️ Load data first in the **Data** tab.")
669
-
670
- elif gen_option == "Generate Report":
671
- if st.button("📄 Generate Report", type="primary"):
672
- payload = {
673
- "ml_metrics": st.session_state.ml_metrics or {},
674
- "data_summary": st.session_state.data_summary or {},
675
- "model": st.session_state.ml_results.get("model_name", "N/A") if st.session_state.ml_results else "N/A",
676
- }
677
- with st.spinner("📝 Writing report…"):
678
- report = gen_ai.generate_report(payload)
679
-
680
- st.markdown("### 📄 Report")
681
- st.text_area("", report, height=400)
682
- st.download_button(
683
- "⬇️ Download Report",
684
- report.encode(),
685
- file_name="ai_report.txt",
686
- mime="text/plain",
687
- )
688
-
689
- elif gen_option == "Q&A":
690
- question = st.text_input("❓ Ask a question about your data")
691
- if st.button("🔍 Ask", type="primary") and question:
692
- context = json.dumps(st.session_state.data_summary, default=str) if st.session_state.data_summary else None
693
- with st.spinner("Thinking…"):
694
- answer = gen_ai.answer_question(question, context)
695
- st.markdown("### 💬 Answer")
696
- st.info(answer)
697
-
698
- with tab5:
699
- section_header("📤", "Power BI Export")
700
-
701
- exporter = st.session_state.powerbi_exporter
702
-
703
- if st.session_state.df is None:
704
- st.info("👆 Load data first.")
705
- else:
706
- df = st.session_state.df
707
-
708
- st.markdown("### 📦 Export Options")
709
- ec1, ec2 = st.columns(2)
710
-
711
- with ec1:
712
- include_parquet = st.checkbox("Include Parquet files", value=True)
713
- export_name = st.text_input("Dataset name", value="main_data")
714
-
715
- with ec2:
716
- st.markdown("**Available datasets:**")
717
- available = {"Main Data": df}
718
- if st.session_state.ml_results and st.session_state.ml_results.get("feature_importance"):
719
- available["Feature Importance"] = pd.DataFrame(st.session_state.ml_results["feature_importance"])
720
- for name in available:
721
- st.markdown(f"• {name}")
722
-
723
- if st.button("📊 Export All for Power BI", type="primary", width="stretch"):
724
- with st.spinner("Exporting…"):
725
- try:
726
- named = {export_name: df}
727
- if "Feature Importance" in available:
728
- named["feature_importance"] = available["Feature Importance"]
729
- paths = exporter.export_all(named, include_parquet=include_parquet)
730
- st.success(f"✅ Exported **{len(paths)}** files to `{OUTPUT_DIR}`")
731
- for p in paths:
732
- st.markdown(f" • `{p.name}`")
733
- except Exception as e:
734
- st.error(f"❌ Export error: {e}")
735
-
736
- st.divider()
737
- st.markdown("### 📋 Power BI Instructions")
738
- st.info(exporter.generate_powerbi_instructions())
739
-
740
- st.markdown("### ⬇️ Direct Download")
741
- dl1, dl2 = st.columns(2)
742
- with dl1:
743
- st.download_button(
744
- "⬇️ Download Main Data (CSV)",
745
- df.to_csv(index=False).encode(),
746
- file_name=f"{export_name}.csv",
747
- mime="text/csv",
748
- width="stretch",
749
- )
750
- with dl2:
751
- if st.session_state.ml_results and st.session_state.ml_results.get("feature_importance"):
752
- fi_df = pd.DataFrame(st.session_state.ml_results["feature_importance"])
753
- st.download_button(
754
- "⬇️ Download Feature Importance (CSV)",
755
- fi_df.to_csv(index=False).encode(),
756
- file_name="feature_importance.csv",
757
- mime="text/csv",
758
- width="stretch",
759
- )
760
-
761
- st.divider()
762
- st.markdown(
763
- '<p class="footer-text">Analytics-with-Ayush &nbsp;|&nbsp; Machine Learning • Deep Learning • Generative AI • Power BI'
764
- '&nbsp;&nbsp;<span class="brand">| KYOTO-Z |</span></p>',
765
- unsafe_allow_html=True,
766
- )