Federaxt commited on
Commit
f889168
·
verified ·
1 Parent(s): 20e45f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +300 -228
app.py CHANGED
@@ -207,35 +207,67 @@ def load_kpis() -> Dict[str, Any]:
207
 
208
 
209
  # =========================================================
210
- # AI DASHBOARD -- LLM picks what to display
211
  # =========================================================
212
 
213
- DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a book-sales analytics app.
214
- The user asks questions or requests about their data. You have access to pre-computed
215
- artifacts from a Python analysis pipeline.
 
 
 
 
 
 
 
 
 
 
216
 
217
- AVAILABLE ARTIFACTS (only reference ones that exist):
218
- {artifacts_json}
219
 
220
- KPI SUMMARY: {kpis_json}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
- YOUR JOB:
223
- 1. Answer the user's question conversationally using the KPIs and your knowledge of the artifacts.
224
- 2. At the END of your response, output a JSON block (fenced with ```json ... ```) that tells
225
- the dashboard which artifact to display. The JSON must have this shape:
226
- {{"show": "figure"|"table"|"none", "scope": "python", "filename": "..."}}
227
 
228
- - Use "show": "figure" to display a chart image.
229
- - Use "show": "table" to display a CSV/JSON table.
230
- - Use "show": "none" if no artifact is relevant.
231
 
232
- RULES:
233
- - If the user asks about sales trends or forecasting by title, show sales_trends or arima figures.
234
- - If the user asks about sentiment, show sentiment figure or sentiment_counts table.
235
- - If the user asks about forecast accuracy or ARIMA, show arima figures.
236
- - If the user asks about top sellers, show top_titles_by_units_sold.csv.
237
- - If the user asks a general data question, pick the most relevant artifact.
238
- - Keep your answer concise (2-4 sentences), then the JSON block.
 
 
 
 
 
 
 
 
 
239
  """
240
 
241
  JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
@@ -259,34 +291,73 @@ def _parse_display_directive(text: str) -> Dict[str, str]:
259
 
260
 
261
  def _clean_response(text: str) -> str:
262
- """Strip the JSON directive block from the displayed response."""
263
  return JSON_BLOCK_RE.sub("", text).strip()
264
 
265
 
266
- def _n8n_call(msg: str) -> Tuple[str, Dict]:
267
- """Call the student's n8n webhook and return (reply, directive)."""
268
- import requests as req
269
- try:
270
- resp = req.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=20)
271
- data = resp.json()
272
- answer = data.get("answer", "No response from n8n workflow.")
273
- chart = data.get("chart", "none")
274
- if chart and chart != "none":
275
- return answer, {"show": "figure", "chart": chart}
276
- return answer, {"show": "none"}
277
- except Exception as e:
278
- return f"n8n error: {e}. Falling back to keyword matching.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
 
281
  def ai_chat(user_msg: str, history: list):
282
- """Chat function for the AI Dashboard tab."""
283
  if not user_msg or not user_msg.strip():
284
  return history, "", None, None
285
 
286
  idx = artifacts_index()
287
  kpis = load_kpis()
288
 
289
- # Priority: n8n webhook > HF LLM > keyword fallback
290
  if N8N_WEBHOOK_URL:
291
  reply, directive = _n8n_call(user_msg)
292
  if directive is None:
@@ -295,10 +366,7 @@ def ai_chat(user_msg: str, history: list):
295
  elif not LLM_ENABLED:
296
  reply, directive = _keyword_fallback(user_msg, idx, kpis)
297
  else:
298
- system = DASHBOARD_SYSTEM.format(
299
- artifacts_json=json.dumps(idx, indent=2),
300
- kpis_json=json.dumps(kpis, indent=2) if kpis else "(no KPIs yet, run the pipeline first)",
301
- )
302
  msgs = [{"role": "system", "content": system}]
303
  for entry in (history or [])[-6:]:
304
  msgs.append(entry)
@@ -324,39 +392,26 @@ def ai_chat(user_msg: str, history: list):
324
  reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
325
  reply += "\n\n" + reply_fb
326
 
327
- # Resolve artifacts — build interactive Plotly charts when possible
328
  chart_out = None
329
  tab_out = None
330
  show = directive.get("show", "none")
331
  fname = directive.get("filename", "")
332
- chart_name = directive.get("chart", "")
333
-
334
- # Interactive chart builders keyed by name
335
- chart_builders = {
336
- "sales": build_sales_chart,
337
- "sentiment": build_sentiment_chart,
338
- "top_sellers": build_top_sellers_chart,
339
- }
340
 
341
- if chart_name and chart_name in chart_builders:
342
- chart_out = chart_builders[chart_name]()
343
- elif show == "figure" and fname:
344
- # Fallback: try to match filename to a chart builder
345
- if "sales_trend" in fname:
346
- chart_out = build_sales_chart()
347
- elif "sentiment" in fname:
348
- chart_out = build_sentiment_chart()
349
- elif "arima" in fname or "forecast" in fname:
350
- chart_out = build_sales_chart() # closest interactive equivalent
351
- else:
352
- chart_out = _empty_chart(f"No interactive chart for {fname}")
353
-
354
- if show == "table" and fname:
355
- fp = PY_TAB_DIR / fname
356
- if fp.exists():
357
- tab_out = _load_table_safe(fp)
358
- else:
359
- reply += f"\n\n*(Could not find table: {fname})*"
360
 
361
  new_history = (history or []) + [
362
  {"role": "user", "content": user_msg},
@@ -366,73 +421,8 @@ def ai_chat(user_msg: str, history: list):
366
  return new_history, "", chart_out, tab_out
367
 
368
 
369
- def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]:
370
- """Simple keyword matcher when LLM is unavailable."""
371
- msg_lower = msg.lower()
372
-
373
- if not idx["python"]["figures"] and not idx["python"]["tables"]:
374
- return (
375
- "No artifacts found yet. Please run the pipeline first (Tab 1), "
376
- "then come back here to explore the results.",
377
- {"show": "none"},
378
- )
379
-
380
- kpi_text = ""
381
- if kpis:
382
- total = kpis.get("total_units_sold", 0)
383
- kpi_text = (
384
- f"Quick summary: **{kpis.get('n_titles', '?')}** book titles across "
385
- f"**{kpis.get('n_months', '?')}** months, with **{total:,.0f}** total units sold."
386
- )
387
-
388
- if any(w in msg_lower for w in ["trend", "sales trend", "monthly sale"]):
389
- return (
390
- f"Here are the sales trends. {kpi_text}",
391
- {"show": "figure", "chart": "sales"},
392
- )
393
-
394
- if any(w in msg_lower for w in ["sentiment", "review", "positive", "negative"]):
395
- return (
396
- f"Here is the sentiment distribution across sampled book titles. {kpi_text}",
397
- {"show": "figure", "chart": "sentiment"},
398
- )
399
-
400
- if any(w in msg_lower for w in ["arima", "forecast", "predict"]):
401
- return (
402
- f"Here are the sales trends and forecasts. {kpi_text}",
403
- {"show": "figure", "chart": "sales"},
404
- )
405
-
406
- if any(w in msg_lower for w in ["top", "best sell", "popular", "rank"]):
407
- return (
408
- f"Here are the top-selling titles by units sold. {kpi_text}",
409
- {"show": "table", "scope": "python", "filename": "top_titles_by_units_sold.csv"},
410
- )
411
-
412
- if any(w in msg_lower for w in ["price", "pricing", "decision"]):
413
- return (
414
- f"Here are the pricing decisions. {kpi_text}",
415
- {"show": "table", "scope": "python", "filename": "pricing_decisions.csv"},
416
- )
417
-
418
- if any(w in msg_lower for w in ["dashboard", "overview", "summary", "kpi"]):
419
- return (
420
- f"Dashboard overview: {kpi_text}\n\nAsk me about sales trends, sentiment, forecasts, "
421
- "pricing, or top sellers to see specific visualizations.",
422
- {"show": "table", "scope": "python", "filename": "df_dashboard.csv"},
423
- )
424
-
425
- # Default
426
- return (
427
- f"I can show you various analyses. {kpi_text}\n\n"
428
- "Try asking about: **sales trends**, **sentiment**, **ARIMA forecasts**, "
429
- "**pricing decisions**, **top sellers**, or **dashboard overview**.",
430
- {"show": "none"},
431
- )
432
-
433
-
434
  # =========================================================
435
- # KPI CARDS (BubbleBusters style)
436
  # =========================================================
437
 
438
  def render_kpi_cards() -> str:
@@ -465,10 +455,13 @@ def render_kpi_cards() -> str:
465
  </div>"""
466
 
467
  kpi_config = [
468
- ("n_titles", "📚", "Book Titles", "#a48de8"),
469
- ("n_months", "📅", "Time Periods", "#7aa6f8"),
470
- ("total_units_sold", "📦", "Units Sold", "#6ee7c7"),
471
- ("total_revenue", "💰", "Revenue", "#3dcba8"),
 
 
 
472
  ]
473
 
474
  html = (
@@ -479,28 +472,21 @@ def render_kpi_cards() -> str:
479
  val = kpis.get(key)
480
  if val is None:
481
  continue
482
- if isinstance(val, (int, float)) and val > 100:
483
- val = f"{val:,.0f}"
484
  html += card(icon, label, str(val), colour)
485
- # Extra KPIs not in config
486
- known = {k for k, *_ in kpi_config}
487
- for key, val in kpis.items():
488
- if key not in known:
489
- label = key.replace("_", " ").title()
490
- if isinstance(val, (int, float)) and val > 100:
491
- val = f"{val:,.0f}"
492
- html += card("📈", label, str(val), "#8fa8f8")
493
  html += "</div>"
494
  return html
495
 
496
 
497
  # =========================================================
498
- # INTERACTIVE PLOTLY CHARTS (BubbleBusters style)
499
  # =========================================================
500
 
501
  CHART_PALETTE = ["#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230", "#5e8fef",
502
  "#c45ea8", "#3dbacc", "#a0522d", "#6aaa3a", "#d46060"]
503
 
 
504
  def _styled_layout(**kwargs) -> dict:
505
  defaults = dict(
506
  template="plotly_white",
@@ -508,11 +494,6 @@ def _styled_layout(**kwargs) -> dict:
508
  plot_bgcolor="rgba(255,255,255,0.98)",
509
  font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12),
510
  margin=dict(l=60, r=20, t=70, b=70),
511
- legend=dict(
512
- orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1,
513
- bgcolor="rgba(255,255,255,0.92)",
514
- bordercolor="rgba(124,92,191,0.35)", borderwidth=1,
515
- ),
516
  title=dict(font=dict(size=15, color="#4b2d8a")),
517
  )
518
  defaults.update(kwargs)
@@ -522,90 +503,181 @@ def _styled_layout(**kwargs) -> dict:
522
  def _empty_chart(title: str) -> go.Figure:
523
  fig = go.Figure()
524
  fig.update_layout(
525
- title=title, height=420, template="plotly_white",
 
 
526
  paper_bgcolor="rgba(255,255,255,0.95)",
527
- annotations=[dict(text="Run the pipeline to generate data",
528
- x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False,
529
- font=dict(size=14, color="rgba(124,92,191,0.5)"))],
 
 
 
530
  )
531
  return fig
532
 
533
 
534
- def build_sales_chart() -> go.Figure:
535
- path = PY_TAB_DIR / "df_dashboard.csv"
536
- if not path.exists():
537
- return _empty_chart("Sales Trends — run the pipeline first")
538
- df = pd.read_csv(path)
539
- date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
540
- val_cols = [c for c in df.columns if c != date_col and df[c].dtype in ("float64", "int64")]
541
- if not date_col or not val_cols:
542
- return _empty_chart("Could not auto-detect columns in df_dashboard.csv")
543
- df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
544
- fig = go.Figure()
545
- for i, col in enumerate(val_cols):
546
- fig.add_trace(go.Scatter(
547
- x=df[date_col], y=df[col], name=col.replace("_", " ").title(),
548
- mode="lines+markers", line=dict(color=CHART_PALETTE[i % len(CHART_PALETTE)], width=2),
549
- marker=dict(size=4),
550
- hovertemplate=f"<b>{col.replace('_',' ').title()}</b><br>%{{x|%b %Y}}: %{{y:,.0f}}<extra></extra>",
551
- ))
552
- fig.update_layout(**_styled_layout(height=450, hovermode="x unified",
553
- title=dict(text="Monthly Overview")))
554
- fig.update_xaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
555
- fig.update_yaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
556
- return fig
557
 
 
 
 
 
 
 
 
 
 
 
 
558
 
559
- def build_sentiment_chart() -> go.Figure:
560
- path = PY_TAB_DIR / "sentiment_counts_sampled.csv"
561
- if not path.exists():
562
- return _empty_chart("Sentiment Distribution — run the pipeline first")
563
- df = pd.read_csv(path)
564
- title_col = df.columns[0]
565
- sent_cols = [c for c in ["negative", "neutral", "positive"] if c in df.columns]
566
- if not sent_cols:
567
- return _empty_chart("No sentiment columns found in CSV")
568
- colors = {"negative": "#e8537a", "neutral": "#5e8fef", "positive": "#2ec4a0"}
569
- fig = go.Figure()
570
- for col in sent_cols:
571
- fig.add_trace(go.Bar(
572
- name=col.title(), y=df[title_col], x=df[col],
573
- orientation="h", marker_color=colors.get(col, "#888"),
574
- hovertemplate=f"<b>{col.title()}</b>: %{{x}}<extra></extra>",
575
- ))
576
  fig.update_layout(**_styled_layout(
577
- height=max(400, len(df) * 28), barmode="stack",
578
- title=dict(text="Sentiment Distribution by Book"),
 
579
  ))
580
- fig.update_xaxes(title="Number of Reviews")
581
  fig.update_yaxes(autorange="reversed")
 
582
  return fig
583
 
584
 
585
- def build_top_sellers_chart() -> go.Figure:
586
- path = PY_TAB_DIR / "top_titles_by_units_sold.csv"
587
- if not path.exists():
588
- return _empty_chart("Top Sellers run the pipeline first")
589
- df = pd.read_csv(path).head(15)
590
- title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
591
- val_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
592
- fig = go.Figure(go.Bar(
593
- y=df[title_col], x=df[val_col], orientation="h",
594
- marker=dict(color=df[val_col], colorscale=[[0, "#c5b4f0"], [1, "#7c5cbf"]]),
595
- hovertemplate="<b>%{y}</b><br>Units: %{x:,.0f}<extra></extra>",
 
 
 
 
 
 
 
 
596
  ))
 
597
  fig.update_layout(**_styled_layout(
598
- height=max(400, len(df) * 30),
599
- title=dict(text="Top Selling Titles"), showlegend=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
  ))
601
- fig.update_yaxes(autorange="reversed")
602
- fig.update_xaxes(title="Total Units Sold")
603
  return fig
604
 
605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
  def refresh_dashboard():
607
- return render_kpi_cards(), build_sales_chart(), build_sentiment_chart(), build_top_sellers_chart()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
608
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
 
610
  # =========================================================
611
  # UI
@@ -661,9 +733,9 @@ with gr.Blocks(title="AIBDM 2026 Workshop App") as demo:
661
  refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
662
 
663
  gr.Markdown("#### Interactive Charts")
664
- chart_sales = gr.Plot(label="Monthly Overview")
665
- chart_sentiment = gr.Plot(label="Sentiment Distribution")
666
- chart_top = gr.Plot(label="Top Sellers")
667
 
668
  gr.Markdown("#### Static Figures (from notebooks)")
669
  gallery = gr.Gallery(
@@ -729,12 +801,12 @@ with gr.Blocks(title="AIBDM 2026 Workshop App") as demo:
729
  )
730
  gr.Examples(
731
  examples=[
732
- "Show me the sales trends",
733
- "What does the sentiment look like?",
734
- "Which titles sell the most?",
735
- "Show the ARIMA forecasts",
736
- "What are the pricing decisions?",
737
- "Give me a dashboard overview",
738
  ],
739
  inputs=user_input,
740
  )
 
207
 
208
 
209
  # =========================================================
210
+ # DATA LOADER FOR YOUR DATASET
211
  # =========================================================
212
 
213
+ def load_main_dataset() -> pd.DataFrame:
214
+ for candidate in [
215
+ BASE_DIR / "final_dataset.csv",
216
+ BASE_DIR / "datareal.csv",
217
+ ]:
218
+ if candidate.exists():
219
+ try:
220
+ if candidate.name == "datareal.csv":
221
+ return pd.read_csv(candidate, sep=";")
222
+ return pd.read_csv(candidate)
223
+ except Exception:
224
+ pass
225
+ return pd.DataFrame()
226
 
 
 
227
 
228
+ def load_kpis() -> Dict[str, Any]:
229
+ df = load_main_dataset()
230
+ if df.empty:
231
+ return {}
232
+
233
+ kpis = {
234
+ "n_rows": len(df),
235
+ "n_countries": df["COUNTRY"].nunique() if "COUNTRY" in df.columns else None,
236
+ "avg_job_satisfaction": round(df["AVG_JOB_SATISFACTION"].mean(), 2)
237
+ if "AVG_JOB_SATISFACTION" in df.columns else None,
238
+ "avg_income": round(df["MEAN_NET_INCOME"].mean(), 2)
239
+ if "MEAN_NET_INCOME" in df.columns else None,
240
+ "avg_work_life_balance": round(df["WORK_LIFE_BALANCE"].mean(), 2)
241
+ if "WORK_LIFE_BALANCE" in df.columns else None,
242
+ "avg_stress_level": round(df["STRESS_LEVEL"].mean(), 2)
243
+ if "STRESS_LEVEL" in df.columns else None,
244
+ "avg_weekly_hours": round(df["AVG_WEEKLY_WORKING_HOURS"].mean(), 2)
245
+ if "AVG_WEEKLY_WORKING_HOURS" in df.columns else None,
246
+ }
247
+
248
+ return {k: v for k, v in kpis.items() if v is not None}
249
 
 
 
 
 
 
250
 
251
+ # =========================================================
252
+ # AI DASHBOARD -- adapted to your dataset
253
+ # =========================================================
254
 
255
+ DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a country-level job satisfaction analytics app.
256
+ The dataset contains variables such as COUNTRY, AVG_JOB_SATISFACTION, WORK_LIFE_BALANCE,
257
+ STRESS_LEVEL, MEAN_NET_INCOME, and AVG_WEEKLY_WORKING_HOURS.
258
+
259
+ Your job:
260
+ 1. Answer the user's question briefly and clearly.
261
+ 2. At the end, output a JSON block inside ```json ... ``` with:
262
+ {"show": "figure"|"table"|"none", "scope": "python", "filename": "..."}
263
+
264
+ Use these filenames:
265
+ - "job_satisfaction_by_country" for country ranking chart
266
+ - "correlation_heatmap" for correlation chart
267
+ - "income_vs_satisfaction" for scatter chart
268
+ - "top_countries_table" for top countries table
269
+ - "bottom_countries_table" for bottom countries table
270
+ - "full_dataset_table" for full dataset preview
271
  """
272
 
273
  JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
 
291
 
292
 
293
  def _clean_response(text: str) -> str:
 
294
  return JSON_BLOCK_RE.sub("", text).strip()
295
 
296
 
297
+ def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]:
298
+ msg_lower = msg.lower()
299
+
300
+ if not kpis:
301
+ return (
302
+ "No dataset found yet. Please run the pipeline first.",
303
+ {"show": "none"},
304
+ )
305
+
306
+ summary = (
307
+ f"The dataset contains **{kpis.get('n_rows', '?')}** rows and "
308
+ f"**{kpis.get('n_countries', '?')}** countries. "
309
+ f"Average job satisfaction is **{kpis.get('avg_job_satisfaction', '?')}**."
310
+ )
311
+
312
+ if any(w in msg_lower for w in ["country", "countries", "ranking", "top countries", "bottom countries"]):
313
+ return (
314
+ f"Here is the country-level job satisfaction ranking. {summary}",
315
+ {"show": "figure", "filename": "job_satisfaction_by_country"},
316
+ )
317
+
318
+ if any(w in msg_lower for w in ["correlation", "heatmap", "relationship"]):
319
+ return (
320
+ f"Here is the correlation overview for the numeric variables. {summary}",
321
+ {"show": "figure", "filename": "correlation_heatmap"},
322
+ )
323
+
324
+ if any(w in msg_lower for w in ["income", "salary", "net income"]):
325
+ return (
326
+ f"Here is the relationship between income and job satisfaction. {summary}",
327
+ {"show": "figure", "filename": "income_vs_satisfaction"},
328
+ )
329
+
330
+ if any(w in msg_lower for w in ["top", "best", "highest"]):
331
+ return (
332
+ f"Here are the top countries by job satisfaction. {summary}",
333
+ {"show": "table", "scope": "python", "filename": "top_countries_table"},
334
+ )
335
+
336
+ if any(w in msg_lower for w in ["bottom", "lowest", "worst"]):
337
+ return (
338
+ f"Here are the bottom countries by job satisfaction. {summary}",
339
+ {"show": "table", "scope": "python", "filename": "bottom_countries_table"},
340
+ )
341
+
342
+ if any(w in msg_lower for w in ["overview", "summary", "dataset", "data", "kpi"]):
343
+ return (
344
+ f"Here is an overview of the dataset. {summary}",
345
+ {"show": "table", "scope": "python", "filename": "full_dataset_table"},
346
+ )
347
+
348
+ return (
349
+ f"{summary} Ask about country rankings, correlations, income effects, or top/bottom countries.",
350
+ {"show": "none"},
351
+ )
352
 
353
 
354
  def ai_chat(user_msg: str, history: list):
 
355
  if not user_msg or not user_msg.strip():
356
  return history, "", None, None
357
 
358
  idx = artifacts_index()
359
  kpis = load_kpis()
360
 
 
361
  if N8N_WEBHOOK_URL:
362
  reply, directive = _n8n_call(user_msg)
363
  if directive is None:
 
366
  elif not LLM_ENABLED:
367
  reply, directive = _keyword_fallback(user_msg, idx, kpis)
368
  else:
369
+ system = DASHBOARD_SYSTEM
 
 
 
370
  msgs = [{"role": "system", "content": system}]
371
  for entry in (history or [])[-6:]:
372
  msgs.append(entry)
 
392
  reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
393
  reply += "\n\n" + reply_fb
394
 
 
395
  chart_out = None
396
  tab_out = None
397
  show = directive.get("show", "none")
398
  fname = directive.get("filename", "")
 
 
 
 
 
 
 
 
399
 
400
+ if show == "figure":
401
+ if fname == "job_satisfaction_by_country":
402
+ chart_out = build_job_satisfaction_chart()
403
+ elif fname == "correlation_heatmap":
404
+ chart_out = build_correlation_chart()
405
+ elif fname == "income_vs_satisfaction":
406
+ chart_out = build_income_chart()
407
+
408
+ if show == "table":
409
+ if fname == "top_countries_table":
410
+ tab_out = get_top_countries_table()
411
+ elif fname == "bottom_countries_table":
412
+ tab_out = get_bottom_countries_table()
413
+ elif fname == "full_dataset_table":
414
+ tab_out = get_dataset_preview()
 
 
 
 
415
 
416
  new_history = (history or []) + [
417
  {"role": "user", "content": user_msg},
 
421
  return new_history, "", chart_out, tab_out
422
 
423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
  # =========================================================
425
+ # KPI CARDS
426
  # =========================================================
427
 
428
  def render_kpi_cards() -> str:
 
455
  </div>"""
456
 
457
  kpi_config = [
458
+ ("n_rows", "📄", "Rows", "#a48de8"),
459
+ ("n_countries", "🌍", "Countries", "#7aa6f8"),
460
+ ("avg_job_satisfaction", "😊", "Avg Job Satisfaction", "#6ee7c7"),
461
+ ("avg_income", "💰", "Avg Net Income", "#3dcba8"),
462
+ ("avg_work_life_balance", "⚖️", "Work-Life Balance", "#f4b942"),
463
+ ("avg_stress_level", "🔥", "Stress Level", "#ff6b6b"),
464
+ ("avg_weekly_hours", "⏱️", "Weekly Hours", "#8fa8f8"),
465
  ]
466
 
467
  html = (
 
472
  val = kpis.get(key)
473
  if val is None:
474
  continue
475
+ if isinstance(val, (int, float)):
476
+ val = f"{val:,.2f}" if abs(val) < 1000 else f"{val:,.0f}"
477
  html += card(icon, label, str(val), colour)
 
 
 
 
 
 
 
 
478
  html += "</div>"
479
  return html
480
 
481
 
482
  # =========================================================
483
+ # INTERACTIVE PLOTLY CHARTS
484
  # =========================================================
485
 
486
  CHART_PALETTE = ["#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230", "#5e8fef",
487
  "#c45ea8", "#3dbacc", "#a0522d", "#6aaa3a", "#d46060"]
488
 
489
+
490
  def _styled_layout(**kwargs) -> dict:
491
  defaults = dict(
492
  template="plotly_white",
 
494
  plot_bgcolor="rgba(255,255,255,0.98)",
495
  font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12),
496
  margin=dict(l=60, r=20, t=70, b=70),
 
 
 
 
 
497
  title=dict(font=dict(size=15, color="#4b2d8a")),
498
  )
499
  defaults.update(kwargs)
 
503
  def _empty_chart(title: str) -> go.Figure:
504
  fig = go.Figure()
505
  fig.update_layout(
506
+ title=title,
507
+ height=420,
508
+ template="plotly_white",
509
  paper_bgcolor="rgba(255,255,255,0.95)",
510
+ annotations=[dict(
511
+ text="Run the pipeline to generate data",
512
+ x=0.5, y=0.5, xref="paper", yref="paper",
513
+ showarrow=False,
514
+ font=dict(size=14, color="rgba(124,92,191,0.5)")
515
+ )],
516
  )
517
  return fig
518
 
519
 
520
+ def build_job_satisfaction_chart() -> go.Figure:
521
+ df = load_main_dataset()
522
+ if df.empty or "COUNTRY" not in df.columns or "AVG_JOB_SATISFACTION" not in df.columns:
523
+ return _empty_chart("Job Satisfaction by Country — run the pipeline first")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
 
525
+ plot_df = df[["COUNTRY", "AVG_JOB_SATISFACTION"]].sort_values(
526
+ "AVG_JOB_SATISFACTION", ascending=False
527
+ ).head(20)
528
+
529
+ fig = go.Figure(go.Bar(
530
+ x=plot_df["AVG_JOB_SATISFACTION"],
531
+ y=plot_df["COUNTRY"],
532
+ orientation="h",
533
+ marker=dict(color=plot_df["AVG_JOB_SATISFACTION"], colorscale="Viridis"),
534
+ hovertemplate="<b>%{y}</b><br>Job Satisfaction: %{x:.2f}<extra></extra>",
535
+ ))
536
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
  fig.update_layout(**_styled_layout(
538
+ height=max(450, len(plot_df) * 28),
539
+ title=dict(text="Top Countries by Job Satisfaction"),
540
+ showlegend=False,
541
  ))
 
542
  fig.update_yaxes(autorange="reversed")
543
+ fig.update_xaxes(title="Average Job Satisfaction")
544
  return fig
545
 
546
 
547
+ def build_income_chart() -> go.Figure:
548
+ df = load_main_dataset()
549
+ needed = {"MEAN_NET_INCOME", "AVG_JOB_SATISFACTION", "COUNTRY"}
550
+ if df.empty or not needed.issubset(df.columns):
551
+ return _empty_chart("Income vs Job Satisfaction — run the pipeline first")
552
+
553
+ fig = go.Figure(go.Scatter(
554
+ x=df["MEAN_NET_INCOME"],
555
+ y=df["AVG_JOB_SATISFACTION"],
556
+ mode="markers+text",
557
+ text=df["COUNTRY"],
558
+ textposition="top center",
559
+ marker=dict(
560
+ size=10,
561
+ color=df["AVG_JOB_SATISFACTION"],
562
+ colorscale="Viridis",
563
+ showscale=True,
564
+ ),
565
+ hovertemplate="<b>%{text}</b><br>Income: %{x:,.0f}<br>Job Satisfaction: %{y:.2f}<extra></extra>",
566
  ))
567
+
568
  fig.update_layout(**_styled_layout(
569
+ height=500,
570
+ title=dict(text="Income vs Job Satisfaction"),
571
+ ))
572
+ fig.update_xaxes(title="Mean Net Income")
573
+ fig.update_yaxes(title="Average Job Satisfaction")
574
+ return fig
575
+
576
+
577
+ def build_correlation_chart() -> go.Figure:
578
+ df = load_main_dataset()
579
+ if df.empty:
580
+ return _empty_chart("Correlation Heatmap — run the pipeline first")
581
+
582
+ num_df = df.select_dtypes(include=["number"])
583
+ if num_df.empty:
584
+ return _empty_chart("No numeric columns found")
585
+
586
+ corr = num_df.corr(numeric_only=True)
587
+
588
+ fig = go.Figure(data=go.Heatmap(
589
+ z=corr.values,
590
+ x=corr.columns,
591
+ y=corr.columns,
592
+ colorscale="RdBu",
593
+ zmin=-1,
594
+ zmax=1,
595
+ hovertemplate="X: %{x}<br>Y: %{y}<br>Corr: %{z:.2f}<extra></extra>",
596
+ ))
597
+
598
+ fig.update_layout(**_styled_layout(
599
+ height=600,
600
+ title=dict(text="Correlation Heatmap"),
601
  ))
 
 
602
  return fig
603
 
604
 
605
+ def get_top_countries_table() -> pd.DataFrame:
606
+ df = load_main_dataset()
607
+ if df.empty or "COUNTRY" not in df.columns or "AVG_JOB_SATISFACTION" not in df.columns:
608
+ return pd.DataFrame([{"info": "No data available"}])
609
+ return df[["COUNTRY", "AVG_JOB_SATISFACTION"]].sort_values(
610
+ "AVG_JOB_SATISFACTION", ascending=False
611
+ ).head(10)
612
+
613
+
614
+ def get_bottom_countries_table() -> pd.DataFrame:
615
+ df = load_main_dataset()
616
+ if df.empty or "COUNTRY" not in df.columns or "AVG_JOB_SATISFACTION" not in df.columns:
617
+ return pd.DataFrame([{"info": "No data available"}])
618
+ return df[["COUNTRY", "AVG_JOB_SATISFACTION"]].sort_values(
619
+ "AVG_JOB_SATISFACTION", ascending=True
620
+ ).head(10)
621
+
622
+
623
+ def get_dataset_preview() -> pd.DataFrame:
624
+ df = load_main_dataset()
625
+ if df.empty:
626
+ return pd.DataFrame([{"info": "No data available"}])
627
+ return df.head(20)
628
+
629
+
630
  def refresh_dashboard():
631
+ return (
632
+ render_kpi_cards(),
633
+ build_job_satisfaction_chart(),
634
+ build_correlation_chart(),
635
+ build_income_chart(),
636
+ )
637
+
638
+
639
+ # =========================================================
640
+ # GALLERY / TABLE REFRESH
641
+ # =========================================================
642
+
643
+ def refresh_gallery():
644
+ figures = _load_all_figures()
645
+
646
+ table_choices = []
647
+ if (BASE_DIR / "final_dataset.csv").exists():
648
+ table_choices.append("final_dataset.csv")
649
+ if (BASE_DIR / "datareal.csv").exists():
650
+ table_choices.append("datareal.csv")
651
+
652
+ default_df = pd.DataFrame()
653
+ if table_choices:
654
+ first_path = BASE_DIR / table_choices[0]
655
+ if first_path.name == "datareal.csv":
656
+ default_df = pd.read_csv(first_path, sep=";", nrows=MAX_PREVIEW_ROWS)
657
+ else:
658
+ default_df = pd.read_csv(first_path, nrows=MAX_PREVIEW_ROWS)
659
 
660
+ return (
661
+ figures if figures else [],
662
+ gr.update(choices=table_choices, value=table_choices[0] if table_choices else None),
663
+ default_df,
664
+ )
665
+
666
+
667
+ def on_table_select(choice: str):
668
+ if not choice:
669
+ return pd.DataFrame([{"hint": "Select a table above."}])
670
+
671
+ path = BASE_DIR / choice
672
+ if not path.exists():
673
+ return pd.DataFrame([{"error": f"File not found: {choice}"}])
674
+
675
+ try:
676
+ if path.name == "datareal.csv":
677
+ return pd.read_csv(path, sep=";", nrows=MAX_PREVIEW_ROWS)
678
+ return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
679
+ except Exception as e:
680
+ return pd.DataFrame([{"error": str(e)}])
681
 
682
  # =========================================================
683
  # UI
 
733
  refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
734
 
735
  gr.Markdown("#### Interactive Charts")
736
+ chart_sales = gr.Plot(label="Job Satisfaction by Country")
737
+ chart_sentiment = gr.Plot(label="Correlation Heatmap")
738
+ chart_top = gr.Plot(label="Income vs Job Satisfaction")
739
 
740
  gr.Markdown("#### Static Figures (from notebooks)")
741
  gallery = gr.Gallery(
 
801
  )
802
  gr.Examples(
803
  examples=[
804
+ "Show me the top countries by job satisfaction",
805
+ "Show me the correlation heatmap",
806
+ "How does income relate to job satisfaction?",
807
+ "Which countries have the lowest job satisfaction?",
808
+ "Give me a dataset overview",
809
+ "Show me the top 10 countries",
810
  ],
811
  inputs=user_input,
812
  )