elodie122 commited on
Commit
a98f27b
·
verified ·
1 Parent(s): 690fe96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -257
app.py CHANGED
@@ -58,9 +58,6 @@ def ensure_dirs():
58
  def stamp():
59
  return time.strftime("%Y%m%d-%H%M%S")
60
 
61
- def tail(text: str, n: int = MAX_LOG_CHARS) -> str:
62
- return (text or "")[-n:]
63
-
64
  def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]:
65
  if not dir_path.is_dir():
66
  return []
@@ -102,7 +99,6 @@ def run_notebook(nb_name: str) -> str:
102
  )
103
  return f"Executed {nb_name}"
104
 
105
-
106
  def run_datacreation() -> str:
107
  try:
108
  log = run_notebook(NB1)
@@ -111,7 +107,6 @@ def run_datacreation() -> str:
111
  except Exception as e:
112
  return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
113
 
114
-
115
  def run_pythonanalysis() -> str:
116
  try:
117
  log = run_notebook(NB2)
@@ -126,33 +121,29 @@ def run_pythonanalysis() -> str:
126
  except Exception as e:
127
  return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
128
 
129
-
130
  def run_full_pipeline() -> str:
131
  logs = []
132
  logs.append("=" * 50)
133
- logs.append("STEP 1/2: Data Creation (web scraping + synthetic data)")
134
  logs.append("=" * 50)
135
  logs.append(run_datacreation())
136
  logs.append("")
137
  logs.append("=" * 50)
138
- logs.append("STEP 2/2: Python Analysis (sentiment, ARIMA, dashboard)")
139
  logs.append("=" * 50)
140
  logs.append(run_pythonanalysis())
141
  return "\n".join(logs)
142
 
143
-
144
  # =========================================================
145
  # GALLERY LOADERS
146
  # =========================================================
147
 
148
  def _load_all_figures() -> List[Tuple[str, str]]:
149
- """Return list of (filepath, caption) for Gallery."""
150
  items = []
151
  for p in sorted(PY_FIG_DIR.glob("*.png")):
152
- items.append((str(p), p.stem.replace('_', ' ').title()))
153
  return items
154
 
155
-
156
  def _load_table_safe(path: Path) -> pd.DataFrame:
157
  try:
158
  if path.suffix == ".json":
@@ -164,9 +155,7 @@ def _load_table_safe(path: Path) -> pd.DataFrame:
164
  except Exception as e:
165
  return pd.DataFrame([{"error": str(e)}])
166
 
167
-
168
  def refresh_gallery():
169
- """Called when user clicks Refresh on Gallery tab."""
170
  figures = _load_all_figures()
171
  idx = artifacts_index()
172
 
@@ -182,7 +171,6 @@ def refresh_gallery():
182
  default_df,
183
  )
184
 
185
-
186
  def on_table_select(choice: str):
187
  if not choice:
188
  return pd.DataFrame([{"hint": "Select a table above."}])
@@ -191,57 +179,54 @@ def on_table_select(choice: str):
191
  return pd.DataFrame([{"error": f"File not found: {choice}"}])
192
  return _load_table_safe(path)
193
 
194
-
195
  # =========================================================
196
  # KPI LOADER
197
  # =========================================================
198
 
199
  def load_kpis() -> Dict[str, Any]:
200
- for candidate in [PY_TAB_DIR / "kpis.json", PY_FIG_DIR / "kpis.json"]:
201
- if candidate.exists():
202
- try:
203
- return _read_json(candidate)
204
- except Exception:
205
- pass
206
  return {}
207
 
208
-
209
  # =========================================================
210
- # AI DASHBOARD -- LLM picks what to display
211
  # =========================================================
212
 
213
- DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a book-sales analytics app.
214
- The user asks questions or requests about their data. You have access to pre-computed
215
- artifacts from a Python analysis pipeline.
 
 
216
 
217
  AVAILABLE ARTIFACTS (only reference ones that exist):
218
  {artifacts_json}
219
 
220
- KPI SUMMARY: {kpis_json}
 
221
 
222
  YOUR JOB:
223
- 1. Answer the user's question conversationally using the KPIs and your knowledge of the artifacts.
224
- 2. At the END of your response, output a JSON block (fenced with ```json ... ```) that tells
225
- the dashboard which artifact to display. The JSON must have this shape:
226
  {{"show": "figure"|"table"|"none", "scope": "python", "filename": "..."}}
227
 
228
- - Use "show": "figure" to display a chart image.
229
- - Use "show": "table" to display a CSV/JSON table.
230
- - Use "show": "none" if no artifact is relevant.
231
-
232
- RULES:
233
- - If the user asks about sales trends or forecasting by title, show sales_trends or arima figures.
234
- - If the user asks about sentiment, show sentiment figure or sentiment_counts table.
235
- - If the user asks about forecast accuracy or ARIMA, show arima figures.
236
- - If the user asks about top sellers, show top_titles_by_units_sold.csv.
237
- - If the user asks a general data question, pick the most relevant artifact.
238
- - Keep your answer concise (2-4 sentences), then the JSON block.
239
  """
240
 
241
  JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
242
  FALLBACK_JSON_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL)
243
 
244
-
245
  def _parse_display_directive(text: str) -> Dict[str, str]:
246
  m = JSON_BLOCK_RE.search(text)
247
  if m:
@@ -257,14 +242,10 @@ def _parse_display_directive(text: str) -> Dict[str, str]:
257
  pass
258
  return {"show": "none"}
259
 
260
-
261
  def _clean_response(text: str) -> str:
262
- """Strip the JSON directive block from the displayed response."""
263
  return JSON_BLOCK_RE.sub("", text).strip()
264
 
265
-
266
- def _n8n_call(msg: str) -> Tuple[str, Dict]:
267
- """Call the student's n8n webhook and return (reply, directive)."""
268
  import requests as req
269
  try:
270
  resp = req.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=20)
@@ -277,16 +258,13 @@ def _n8n_call(msg: str) -> Tuple[str, Dict]:
277
  except Exception as e:
278
  return f"n8n error: {e}. Falling back to keyword matching.", None
279
 
280
-
281
  def ai_chat(user_msg: str, history: list):
282
- """Chat function for the AI Dashboard tab."""
283
  if not user_msg or not user_msg.strip():
284
  return history, "", None, None
285
 
286
  idx = artifacts_index()
287
  kpis = load_kpis()
288
 
289
- # Priority: n8n webhook > HF LLM > keyword fallback
290
  if N8N_WEBHOOK_URL:
291
  reply, directive = _n8n_call(user_msg)
292
  if directive is None:
@@ -324,32 +302,30 @@ def ai_chat(user_msg: str, history: list):
324
  reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
325
  reply += "\n\n" + reply_fb
326
 
327
- # Resolve artifacts — build interactive Plotly charts when possible
328
  chart_out = None
329
  tab_out = None
330
  show = directive.get("show", "none")
331
  fname = directive.get("filename", "")
332
  chart_name = directive.get("chart", "")
333
 
334
- # Interactive chart builders keyed by name
335
  chart_builders = {
336
- "sales": build_sales_chart,
337
- "sentiment": build_sentiment_chart,
338
- "top_sellers": build_top_sellers_chart,
 
339
  }
340
 
341
  if chart_name and chart_name in chart_builders:
342
  chart_out = chart_builders[chart_name]()
343
  elif show == "figure" and fname:
344
- # Fallback: try to match filename to a chart builder
345
- if "sales_trend" in fname:
346
- chart_out = build_sales_chart()
347
- elif "sentiment" in fname:
348
- chart_out = build_sentiment_chart()
349
- elif "arima" in fname or "forecast" in fname:
350
- chart_out = build_sales_chart() # closest interactive equivalent
351
- else:
352
- chart_out = _empty_chart(f"No interactive chart for {fname}")
353
 
354
  if show == "table" and fname:
355
  fp = PY_TAB_DIR / fname
@@ -365,74 +341,66 @@ def ai_chat(user_msg: str, history: list):
365
 
366
  return new_history, "", chart_out, tab_out
367
 
368
-
369
- def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]:
370
- """Simple keyword matcher when LLM is unavailable."""
371
  msg_lower = msg.lower()
372
 
373
  if not idx["python"]["figures"] and not idx["python"]["tables"]:
374
  return (
375
- "No artifacts found yet. Please run the pipeline first (Tab 1), "
376
- "then come back here to explore the results.",
377
  {"show": "none"},
378
  )
379
 
380
  kpi_text = ""
381
  if kpis:
382
- total = kpis.get("total_units_sold", 0)
383
  kpi_text = (
384
- f"Quick summary: **{kpis.get('n_titles', '?')}** book titles across "
385
- f"**{kpis.get('n_months', '?')}** months, with **{total:,.0f}** total units sold."
 
386
  )
387
 
388
- if any(w in msg_lower for w in ["trend", "sales trend", "monthly sale"]):
389
  return (
390
- f"Here are the sales trends. {kpi_text}",
391
- {"show": "figure", "chart": "sales"},
392
  )
393
 
394
- if any(w in msg_lower for w in ["sentiment", "review", "positive", "negative"]):
395
  return (
396
- f"Here is the sentiment distribution across sampled book titles. {kpi_text}",
397
- {"show": "figure", "chart": "sentiment"},
398
  )
399
 
400
- if any(w in msg_lower for w in ["arima", "forecast", "predict"]):
401
  return (
402
- f"Here are the sales trends and forecasts. {kpi_text}",
403
- {"show": "figure", "chart": "sales"},
404
  )
405
 
406
- if any(w in msg_lower for w in ["top", "best sell", "popular", "rank"]):
407
  return (
408
- f"Here are the top-selling titles by units sold. {kpi_text}",
409
- {"show": "table", "scope": "python", "filename": "top_titles_by_units_sold.csv"},
410
  )
411
 
412
- if any(w in msg_lower for w in ["price", "pricing", "decision"]):
413
  return (
414
- f"Here are the pricing decisions. {kpi_text}",
415
- {"show": "table", "scope": "python", "filename": "pricing_decisions.csv"},
416
  )
417
 
418
- if any(w in msg_lower for w in ["dashboard", "overview", "summary", "kpi"]):
419
  return (
420
- f"Dashboard overview: {kpi_text}\n\nAsk me about sales trends, sentiment, forecasts, "
421
- "pricing, or top sellers to see specific visualizations.",
422
- {"show": "table", "scope": "python", "filename": "df_dashboard.csv"},
423
  )
424
 
425
- # Default
426
  return (
427
- f"I can show you various analyses. {kpi_text}\n\n"
428
- "Try asking about: **sales trends**, **sentiment**, **ARIMA forecasts**, "
429
- "**pricing decisions**, **top sellers**, or **dashboard overview**.",
430
  {"show": "none"},
431
  )
432
 
433
-
434
  # =========================================================
435
- # KPI CARDS (BubbleBusters style)
436
  # =========================================================
437
 
438
  def render_kpi_cards() -> str:
@@ -443,11 +411,9 @@ def render_kpi_cards() -> str:
443
  'border-radius:20px;padding:28px;text-align:center;'
444
  'border:1.5px solid rgba(255,255,255,.7);'
445
  'box-shadow:0 8px 32px rgba(124,92,191,.08);">'
446
- '<div style="font-size:36px;margin-bottom:10px;">📊</div>'
447
- '<div style="color:#a48de8;font-size:14px;'
448
- 'font-weight:800;margin-bottom:6px;">No data yet</div>'
449
- '<div style="color:#9d8fc4;font-size:12px;">'
450
- 'Run the pipeline to populate these cards.</div>'
451
  '</div>'
452
  )
453
 
@@ -465,16 +431,13 @@ def render_kpi_cards() -> str:
465
  </div>"""
466
 
467
  kpi_config = [
468
- ("n_titles", "📚", "Book Titles", "#a48de8"),
469
- ("n_months", "📅", "Time Periods", "#7aa6f8"),
470
- ("total_units_sold", "📦", "Units Sold", "#6ee7c7"),
471
- ("total_revenue", "💰", "Revenue", "#3dcba8"),
472
  ]
473
 
474
- html = (
475
- '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));'
476
- 'gap:12px;margin-bottom:24px;">'
477
- )
478
  for key, icon, label, colour in kpi_config:
479
  val = kpis.get(key)
480
  if val is None:
@@ -482,24 +445,14 @@ def render_kpi_cards() -> str:
482
  if isinstance(val, (int, float)) and val > 100:
483
  val = f"{val:,.0f}"
484
  html += card(icon, label, str(val), colour)
485
- # Extra KPIs not in config
486
- known = {k for k, *_ in kpi_config}
487
- for key, val in kpis.items():
488
- if key not in known:
489
- label = key.replace("_", " ").title()
490
- if isinstance(val, (int, float)) and val > 100:
491
- val = f"{val:,.0f}"
492
- html += card("📈", label, str(val), "#8fa8f8")
493
  html += "</div>"
494
  return html
495
 
496
-
497
  # =========================================================
498
- # INTERACTIVE PLOTLY CHARTS (BubbleBusters style)
499
  # =========================================================
500
 
501
- CHART_PALETTE = ["#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230", "#5e8fef",
502
- "#c45ea8", "#3dbacc", "#a0522d", "#6aaa3a", "#d46060"]
503
 
504
  def _styled_layout(**kwargs) -> dict:
505
  defaults = dict(
@@ -508,104 +461,114 @@ def _styled_layout(**kwargs) -> dict:
508
  plot_bgcolor="rgba(255,255,255,0.98)",
509
  font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12),
510
  margin=dict(l=60, r=20, t=70, b=70),
511
- legend=dict(
512
- orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1,
513
- bgcolor="rgba(255,255,255,0.92)",
514
- bordercolor="rgba(124,92,191,0.35)", borderwidth=1,
515
- ),
516
  title=dict(font=dict(size=15, color="#4b2d8a")),
517
  )
518
  defaults.update(kwargs)
519
  return defaults
520
 
521
-
522
  def _empty_chart(title: str) -> go.Figure:
523
  fig = go.Figure()
524
  fig.update_layout(
525
- title=title, height=420, template="plotly_white",
 
 
526
  paper_bgcolor="rgba(255,255,255,0.95)",
527
- annotations=[dict(text="Run the pipeline to generate data",
528
- x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False,
529
- font=dict(size=14, color="rgba(124,92,191,0.5)"))],
 
 
 
530
  )
531
  return fig
532
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
 
534
- def build_sales_chart() -> go.Figure:
535
- path = PY_TAB_DIR / "df_dashboard.csv"
536
  if not path.exists():
537
- return _empty_chart("Sales Trends — run the pipeline first")
538
  df = pd.read_csv(path)
539
- date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
540
- val_cols = [c for c in df.columns if c != date_col and df[c].dtype in ("float64", "int64")]
541
- if not date_col or not val_cols:
542
- return _empty_chart("Could not auto-detect columns in df_dashboard.csv")
543
- df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
544
- fig = go.Figure()
545
- for i, col in enumerate(val_cols):
546
- fig.add_trace(go.Scatter(
547
- x=df[date_col], y=df[col], name=col.replace("_", " ").title(),
548
- mode="lines+markers", line=dict(color=CHART_PALETTE[i % len(CHART_PALETTE)], width=2),
549
- marker=dict(size=4),
550
- hovertemplate=f"<b>{col.replace('_',' ').title()}</b><br>%{{x|%b %Y}}: %{{y:,.0f}}<extra></extra>",
551
- ))
552
- fig.update_layout(**_styled_layout(height=450, hovermode="x unified",
553
- title=dict(text="Monthly Overview")))
554
- fig.update_xaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
555
- fig.update_yaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
556
- return fig
557
 
 
 
 
 
 
 
 
 
 
 
 
558
 
559
- def build_sentiment_chart() -> go.Figure:
560
- path = PY_TAB_DIR / "sentiment_counts_sampled.csv"
561
  if not path.exists():
562
- return _empty_chart("Sentiment Distribution — run the pipeline first")
563
  df = pd.read_csv(path)
564
- title_col = df.columns[0]
565
- sent_cols = [c for c in ["negative", "neutral", "positive"] if c in df.columns]
566
- if not sent_cols:
567
- return _empty_chart("No sentiment columns found in CSV")
568
- colors = {"negative": "#e8537a", "neutral": "#5e8fef", "positive": "#2ec4a0"}
569
  fig = go.Figure()
570
- for col in sent_cols:
 
571
  fig.add_trace(go.Bar(
572
- name=col.title(), y=df[title_col], x=df[col],
573
- orientation="h", marker_color=colors.get(col, "#888"),
574
- hovertemplate=f"<b>{col.title()}</b>: %{{x}}<extra></extra>",
575
  ))
 
576
  fig.update_layout(**_styled_layout(
577
- height=max(400, len(df) * 28), barmode="stack",
578
- title=dict(text="Sentiment Distribution by Book"),
 
579
  ))
580
- fig.update_xaxes(title="Number of Reviews")
581
- fig.update_yaxes(autorange="reversed")
582
  return fig
583
 
584
-
585
- def build_top_sellers_chart() -> go.Figure:
586
- path = PY_TAB_DIR / "top_titles_by_units_sold.csv"
587
  if not path.exists():
588
- return _empty_chart("Top Sellers — run the pipeline first")
589
- df = pd.read_csv(path).head(15)
590
- title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
591
- val_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
592
- fig = go.Figure(go.Bar(
593
- y=df[title_col], x=df[val_col], orientation="h",
594
- marker=dict(color=df[val_col], colorscale=[[0, "#c5b4f0"], [1, "#7c5cbf"]]),
595
- hovertemplate="<b>%{y}</b><br>Units: %{x:,.0f}<extra></extra>",
596
- ))
 
 
 
597
  fig.update_layout(**_styled_layout(
598
- height=max(400, len(df) * 30),
599
- title=dict(text="Top Selling Titles"), showlegend=False,
 
600
  ))
601
- fig.update_yaxes(autorange="reversed")
602
- fig.update_xaxes(title="Total Units Sold")
603
  return fig
604
 
605
-
606
  def refresh_dashboard():
607
- return render_kpi_cards(), build_sales_chart(), build_sentiment_chart(), build_top_sellers_chart()
608
-
609
 
610
  # =========================================================
611
  # UI
@@ -617,21 +580,15 @@ def load_css() -> str:
617
  css_path = BASE_DIR / "style.css"
618
  return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
619
 
620
-
621
- with gr.Blocks(title="AIBDM 2026 Workshop App") as demo:
622
 
623
  gr.Markdown(
624
  "# SE21 App Template\n"
625
- "*This is an app template for SE21 students*",
626
  elem_id="escp_title",
627
  )
628
 
629
- # ===========================================================
630
- # TAB 1 -- Pipeline Runner
631
- # ===========================================================
632
  with gr.Tab("Pipeline Runner"):
633
- gr.Markdown()
634
-
635
  with gr.Row():
636
  with gr.Column(scale=1):
637
  btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary")
@@ -641,48 +598,27 @@ with gr.Blocks(title="AIBDM 2026 Workshop App") as demo:
641
  with gr.Row():
642
  btn_all = gr.Button("Run Full Pipeline (Both Steps)", variant="primary")
643
 
644
- run_log = gr.Textbox(
645
- label="Execution Log",
646
- lines=18,
647
- max_lines=30,
648
- interactive=False,
649
- )
650
 
651
  btn_nb1.click(run_datacreation, outputs=[run_log])
652
  btn_nb2.click(run_pythonanalysis, outputs=[run_log])
653
  btn_all.click(run_full_pipeline, outputs=[run_log])
654
 
655
- # ===========================================================
656
- # TAB 2 -- Dashboard (KPIs + Interactive Charts + Gallery)
657
- # ===========================================================
658
  with gr.Tab("Dashboard"):
659
  kpi_html = gr.HTML(value=render_kpi_cards)
660
-
661
  refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
662
 
663
  gr.Markdown("#### Interactive Charts")
664
- chart_sales = gr.Plot(label="Monthly Overview")
665
- chart_sentiment = gr.Plot(label="Sentiment Distribution")
666
- chart_top = gr.Plot(label="Top Sellers")
667
 
668
  gr.Markdown("#### Static Figures (from notebooks)")
669
- gallery = gr.Gallery(
670
- label="Generated Figures",
671
- columns=2,
672
- height=480,
673
- object_fit="contain",
674
- )
675
 
676
  gr.Markdown("#### Data Tables")
677
- table_dropdown = gr.Dropdown(
678
- label="Select a table to view",
679
- choices=[],
680
- interactive=True,
681
- )
682
- table_display = gr.Dataframe(
683
- label="Table Preview",
684
- interactive=False,
685
- )
686
 
687
  def _on_refresh():
688
  kpi, c1, c2, c3 = refresh_dashboard()
@@ -691,62 +627,44 @@ with gr.Blocks(title="AIBDM 2026 Workshop App") as demo:
691
 
692
  refresh_btn.click(
693
  _on_refresh,
694
- outputs=[kpi_html, chart_sales, chart_sentiment, chart_top,
695
- gallery, table_dropdown, table_display],
696
- )
697
- table_dropdown.change(
698
- on_table_select,
699
- inputs=[table_dropdown],
700
- outputs=[table_display],
701
  )
 
702
 
703
- # ===========================================================
704
- # TAB 3 -- AI Dashboard
705
- # ===========================================================
706
  with gr.Tab('"AI" Dashboard'):
707
  _ai_status = (
708
  "Connected to your **n8n workflow**." if N8N_WEBHOOK_URL
709
  else "**LLM active.**" if LLM_ENABLED
710
- else "Using **keyword matching**. Upgrade options: "
711
- "set `N8N_WEBHOOK_URL` to connect your n8n workflow, "
712
- "or set `HF_API_KEY` for direct LLM access."
713
  )
714
  gr.Markdown(
715
  "### Ask questions, get interactive visualisations\n\n"
716
- f"Type a question and the system will pick the right interactive chart or table. {_ai_status}"
717
  )
718
 
719
  with gr.Row(equal_height=True):
720
  with gr.Column(scale=1):
721
- chatbot = gr.Chatbot(
722
- label="Conversation",
723
- height=380,
724
- )
725
  user_input = gr.Textbox(
726
  label="Ask about your data",
727
- placeholder="e.g. Show me sales trends / What are the top sellers? / Sentiment analysis",
728
  lines=1,
729
  )
730
  gr.Examples(
731
  examples=[
732
- "Show me the sales trends",
733
- "What does the sentiment look like?",
734
- "Which titles sell the most?",
735
- "Show the ARIMA forecasts",
736
- "What are the pricing decisions?",
737
  "Give me a dashboard overview",
738
  ],
739
  inputs=user_input,
740
  )
741
 
742
  with gr.Column(scale=1):
743
- ai_figure = gr.Plot(
744
- label="Interactive Chart",
745
- )
746
- ai_table = gr.Dataframe(
747
- label="Data Table",
748
- interactive=False,
749
- )
750
 
751
  user_input.submit(
752
  ai_chat,
@@ -754,5 +672,4 @@ with gr.Blocks(title="AIBDM 2026 Workshop App") as demo:
754
  outputs=[chatbot, user_input, ai_figure, ai_table],
755
  )
756
 
757
-
758
  demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])
 
58
  def stamp():
59
  return time.strftime("%Y%m%d-%H%M%S")
60
 
 
 
 
61
  def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]:
62
  if not dir_path.is_dir():
63
  return []
 
99
  )
100
  return f"Executed {nb_name}"
101
 
 
102
  def run_datacreation() -> str:
103
  try:
104
  log = run_notebook(NB1)
 
107
  except Exception as e:
108
  return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
109
 
 
110
  def run_pythonanalysis() -> str:
111
  try:
112
  log = run_notebook(NB2)
 
121
  except Exception as e:
122
  return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
123
 
 
124
  def run_full_pipeline() -> str:
125
  logs = []
126
  logs.append("=" * 50)
127
+ logs.append("STEP 1/2: Data Creation")
128
  logs.append("=" * 50)
129
  logs.append(run_datacreation())
130
  logs.append("")
131
  logs.append("=" * 50)
132
+ logs.append("STEP 2/2: Python Analysis")
133
  logs.append("=" * 50)
134
  logs.append(run_pythonanalysis())
135
  return "\n".join(logs)
136
 
 
137
  # =========================================================
138
  # GALLERY LOADERS
139
  # =========================================================
140
 
141
  def _load_all_figures() -> List[Tuple[str, str]]:
 
142
  items = []
143
  for p in sorted(PY_FIG_DIR.glob("*.png")):
144
+ items.append((str(p), p.stem.replace("_", " ").title()))
145
  return items
146
 
 
147
  def _load_table_safe(path: Path) -> pd.DataFrame:
148
  try:
149
  if path.suffix == ".json":
 
155
  except Exception as e:
156
  return pd.DataFrame([{"error": str(e)}])
157
 
 
158
  def refresh_gallery():
 
159
  figures = _load_all_figures()
160
  idx = artifacts_index()
161
 
 
171
  default_df,
172
  )
173
 
 
174
  def on_table_select(choice: str):
175
  if not choice:
176
  return pd.DataFrame([{"hint": "Select a table above."}])
 
179
  return pd.DataFrame([{"error": f"File not found: {choice}"}])
180
  return _load_table_safe(path)
181
 
 
182
  # =========================================================
183
  # KPI LOADER
184
  # =========================================================
185
 
186
  def load_kpis() -> Dict[str, Any]:
187
+ candidate = PY_TAB_DIR / "kpis.json"
188
+ if candidate.exists():
189
+ try:
190
+ return _read_json(candidate)
191
+ except Exception:
192
+ pass
193
  return {}
194
 
 
195
  # =========================================================
196
+ # AI DASHBOARD
197
  # =========================================================
198
 
199
+ DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a food and nutrition analytics app.
200
+ The user asks questions about food products, calories, sugar, fat, protein, fiber, salt,
201
+ Nutri-Score, health labels, and nutrition trends.
202
+
203
+ You have access to pre-computed artifacts from a Python analysis pipeline.
204
 
205
  AVAILABLE ARTIFACTS (only reference ones that exist):
206
  {artifacts_json}
207
 
208
+ KPI SUMMARY:
209
+ {kpis_json}
210
 
211
  YOUR JOB:
212
+ 1. Answer the user's question conversationally using the KPIs and available artifacts.
213
+ 2. At the END of your response, output a JSON block fenced with ```json ... ```.
214
+ 3. The JSON must have this shape:
215
  {{"show": "figure"|"table"|"none", "scope": "python", "filename": "..."}}
216
 
217
+ Rules:
218
+ - If the user asks about calories, energy, or nutrition overview, prefer food_dashboard.csv or calorie charts.
219
+ - If the user asks about sugar, fat, salt, protein, or fiber by health label, show the relevant table or figure.
220
+ - If the user asks about health label distribution, show the health label figure or table.
221
+ - If the user asks about Nutri-Score versus health label, show the comparison figure or table.
222
+ - If the user asks for recommendations, show recommendations.csv.
223
+ - If no artifact is relevant, return show = none.
224
+ - Keep your answer concise.
 
 
 
225
  """
226
 
227
  JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
228
  FALLBACK_JSON_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL)
229
 
 
230
  def _parse_display_directive(text: str) -> Dict[str, str]:
231
  m = JSON_BLOCK_RE.search(text)
232
  if m:
 
242
  pass
243
  return {"show": "none"}
244
 
 
245
  def _clean_response(text: str) -> str:
 
246
  return JSON_BLOCK_RE.sub("", text).strip()
247
 
248
+ def _n8n_call(msg: str):
 
 
249
  import requests as req
250
  try:
251
  resp = req.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=20)
 
258
  except Exception as e:
259
  return f"n8n error: {e}. Falling back to keyword matching.", None
260
 
 
261
  def ai_chat(user_msg: str, history: list):
 
262
  if not user_msg or not user_msg.strip():
263
  return history, "", None, None
264
 
265
  idx = artifacts_index()
266
  kpis = load_kpis()
267
 
 
268
  if N8N_WEBHOOK_URL:
269
  reply, directive = _n8n_call(user_msg)
270
  if directive is None:
 
302
  reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
303
  reply += "\n\n" + reply_fb
304
 
 
305
  chart_out = None
306
  tab_out = None
307
  show = directive.get("show", "none")
308
  fname = directive.get("filename", "")
309
  chart_name = directive.get("chart", "")
310
 
 
311
  chart_builders = {
312
+ "calories": build_calories_chart,
313
+ "health_label": build_health_label_chart,
314
+ "nutriscore": build_nutriscore_chart,
315
+ "macros": build_macros_chart,
316
  }
317
 
318
  if chart_name and chart_name in chart_builders:
319
  chart_out = chart_builders[chart_name]()
320
  elif show == "figure" and fname:
321
+ if "calorie" in fname or "energy" in fname:
322
+ chart_out = build_calories_chart()
323
+ elif "health_label" in fname:
324
+ chart_out = build_health_label_chart()
325
+ elif "nutriscore" in fname:
326
+ chart_out = build_nutriscore_chart()
327
+ elif "macro" in fname or "nutrition" in fname:
328
+ chart_out = build_macros_chart()
 
329
 
330
  if show == "table" and fname:
331
  fp = PY_TAB_DIR / fname
 
341
 
342
  return new_history, "", chart_out, tab_out
343
 
344
+ def _keyword_fallback(msg: str, idx: Dict, kpis: Dict):
 
 
345
  msg_lower = msg.lower()
346
 
347
  if not idx["python"]["figures"] and not idx["python"]["tables"]:
348
  return (
349
+ "No artifacts found yet. Please run the pipeline first, then come back here.",
 
350
  {"show": "none"},
351
  )
352
 
353
  kpi_text = ""
354
  if kpis:
 
355
  kpi_text = (
356
+ f"Quick summary: **{kpis.get('n_products', '?')}** food products, "
357
+ f"average calories **{kpis.get('avg_calories_per_100g', '?')} kcal/100g**, "
358
+ f"and **{kpis.get('healthy_count', '?')}** products labelled healthy."
359
  )
360
 
361
+ if any(w in msg_lower for w in ["calorie", "calories", "energy"]):
362
  return (
363
+ f"Here is the calorie overview for your food dataset. {kpi_text}",
364
+ {"show": "figure", "chart": "calories"},
365
  )
366
 
367
+ if any(w in msg_lower for w in ["health label", "healthy", "unhealthy", "moderate"]):
368
  return (
369
+ f"Here is the health label distribution. {kpi_text}",
370
+ {"show": "figure", "chart": "health_label"},
371
  )
372
 
373
+ if any(w in msg_lower for w in ["nutriscore", "nutri-score", "grade"]):
374
  return (
375
+ f"Here is the Nutri-Score overview. {kpi_text}",
376
+ {"show": "figure", "chart": "nutriscore"},
377
  )
378
 
379
+ if any(w in msg_lower for w in ["protein", "fat", "sugar", "salt", "fiber", "nutrition", "macros"]):
380
  return (
381
+ f"Here is the nutrition breakdown across health labels. {kpi_text}",
382
+ {"show": "figure", "chart": "macros"},
383
  )
384
 
385
+ if any(w in msg_lower for w in ["recommendation", "recommend", "action"]):
386
  return (
387
+ f"Here are the recommendation actions for the products. {kpi_text}",
388
+ {"show": "table", "scope": "python", "filename": "recommendations.csv"},
389
  )
390
 
391
+ if any(w in msg_lower for w in ["overview", "dashboard", "summary", "kpi"]):
392
  return (
393
+ f"Dashboard overview: {kpi_text}",
394
+ {"show": "table", "scope": "python", "filename": "food_dashboard.csv"},
 
395
  )
396
 
 
397
  return (
398
+ f"I can help with calories, protein, fat, sugar, salt, fiber, Nutri-Score, health labels, and recommendations. {kpi_text}",
 
 
399
  {"show": "none"},
400
  )
401
 
 
402
  # =========================================================
403
+ # KPI CARDS
404
  # =========================================================
405
 
406
  def render_kpi_cards() -> str:
 
411
  'border-radius:20px;padding:28px;text-align:center;'
412
  'border:1.5px solid rgba(255,255,255,.7);'
413
  'box-shadow:0 8px 32px rgba(124,92,191,.08);">'
414
+ '<div style="font-size:36px;margin-bottom:10px;">🍽️</div>'
415
+ '<div style="color:#a48de8;font-size:14px;font-weight:800;margin-bottom:6px;">No data yet</div>'
416
+ '<div style="color:#9d8fc4;font-size:12px;">Run the pipeline to populate these cards.</div>'
 
 
417
  '</div>'
418
  )
419
 
 
431
  </div>"""
432
 
433
  kpi_config = [
434
+ ("n_products", "🍎", "Products", "#a48de8"),
435
+ ("avg_calories_per_100g", "🔥", "Avg Calories", "#7aa6f8"),
436
+ ("healthy_count", "🥗", "Healthy", "#6ee7c7"),
437
+ ("unhealthy_count", "⚠️", "Unhealthy", "#3dcba8"),
438
  ]
439
 
440
+ html = '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));gap:12px;margin-bottom:24px;">'
 
 
 
441
  for key, icon, label, colour in kpi_config:
442
  val = kpis.get(key)
443
  if val is None:
 
445
  if isinstance(val, (int, float)) and val > 100:
446
  val = f"{val:,.0f}"
447
  html += card(icon, label, str(val), colour)
 
 
 
 
 
 
 
 
448
  html += "</div>"
449
  return html
450
 
 
451
  # =========================================================
452
+ # CHARTS
453
  # =========================================================
454
 
455
+ CHART_PALETTE = ["#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230", "#5e8fef"]
 
456
 
457
  def _styled_layout(**kwargs) -> dict:
458
  defaults = dict(
 
461
  plot_bgcolor="rgba(255,255,255,0.98)",
462
  font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12),
463
  margin=dict(l=60, r=20, t=70, b=70),
 
 
 
 
 
464
  title=dict(font=dict(size=15, color="#4b2d8a")),
465
  )
466
  defaults.update(kwargs)
467
  return defaults
468
 
 
469
  def _empty_chart(title: str) -> go.Figure:
470
  fig = go.Figure()
471
  fig.update_layout(
472
+ title=title,
473
+ height=420,
474
+ template="plotly_white",
475
  paper_bgcolor="rgba(255,255,255,0.95)",
476
+ annotations=[dict(
477
+ text="Run the pipeline to generate data",
478
+ x=0.5, y=0.5, xref="paper", yref="paper",
479
+ showarrow=False,
480
+ font=dict(size=14, color="rgba(124,92,191,0.5)")
481
+ )],
482
  )
483
  return fig
484
 
485
+ def build_calories_chart() -> go.Figure:
486
+ path = PY_TAB_DIR / "food_dashboard.csv"
487
+ if not path.exists():
488
+ return _empty_chart("Calories Overview — run the pipeline first")
489
+ df = pd.read_csv(path).sort_values("energy-kcal_100g", ascending=False).head(15)
490
+
491
+ fig = go.Figure(go.Bar(
492
+ x=df["energy-kcal_100g"],
493
+ y=df["product_name"],
494
+ orientation="h"
495
+ ))
496
+ fig.update_layout(**_styled_layout(
497
+ height=500,
498
+ title=dict(text="Top 15 Products by Calories (per 100g)")
499
+ ))
500
+ fig.update_yaxes(autorange="reversed")
501
+ fig.update_xaxes(title="Calories per 100g")
502
+ return fig
503
 
504
+ def build_health_label_chart() -> go.Figure:
505
+ path = PY_TAB_DIR / "health_label_counts.csv"
506
  if not path.exists():
507
+ return _empty_chart("Health Label Distribution — run the pipeline first")
508
  df = pd.read_csv(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
 
510
+ fig = go.Figure(go.Bar(
511
+ x=df["health_label"],
512
+ y=df["count"]
513
+ ))
514
+ fig.update_layout(**_styled_layout(
515
+ height=420,
516
+ title=dict(text="Health Label Distribution")
517
+ ))
518
+ fig.update_xaxes(title="Health Label")
519
+ fig.update_yaxes(title="Count")
520
+ return fig
521
 
522
+ def build_nutriscore_chart() -> go.Figure:
523
+ path = PY_TAB_DIR / "nutriscore_vs_health.csv"
524
  if not path.exists():
525
+ return _empty_chart("Nutri-Score vs Health Label — run the pipeline first")
526
  df = pd.read_csv(path)
527
+
 
 
 
 
528
  fig = go.Figure()
529
+ for label in df["health_label"].unique():
530
+ sub = df[df["health_label"] == label]
531
  fig.add_trace(go.Bar(
532
+ x=sub["nutriscore_grade"],
533
+ y=sub["count"],
534
+ name=label
535
  ))
536
+
537
  fig.update_layout(**_styled_layout(
538
+ height=450,
539
+ barmode="stack",
540
+ title=dict(text="Nutri-Score vs Health Label")
541
  ))
542
+ fig.update_xaxes(title="Nutri-Score Grade")
543
+ fig.update_yaxes(title="Count")
544
  return fig
545
 
546
+ def build_macros_chart() -> go.Figure:
547
+ path = PY_TAB_DIR / "nutrition_by_health_label.csv"
 
548
  if not path.exists():
549
+ return _empty_chart("Nutrition by Health Label — run the pipeline first")
550
+ df = pd.read_csv(path)
551
+
552
+ fig = go.Figure()
553
+ for col in ["sugars_100g", "fat_100g", "salt_100g", "proteins_100g", "fiber_100g"]:
554
+ if col in df.columns:
555
+ fig.add_trace(go.Bar(
556
+ x=df["health_label"],
557
+ y=df[col],
558
+ name=col.replace("_100g", "").replace("_", " ").title()
559
+ ))
560
+
561
  fig.update_layout(**_styled_layout(
562
+ height=450,
563
+ barmode="group",
564
+ title=dict(text="Nutrition by Health Label")
565
  ))
566
+ fig.update_xaxes(title="Health Label")
567
+ fig.update_yaxes(title="Average per 100g")
568
  return fig
569
 
 
570
  def refresh_dashboard():
571
+ return render_kpi_cards(), build_calories_chart(), build_health_label_chart(), build_nutriscore_chart()
 
572
 
573
  # =========================================================
574
  # UI
 
580
  css_path = BASE_DIR / "style.css"
581
  return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
582
 
583
+ with gr.Blocks(title="Food Nutrition Dashboard") as demo:
 
584
 
585
  gr.Markdown(
586
  "# SE21 App Template\n"
587
+ "*Food and nutrition analytics dashboard*",
588
  elem_id="escp_title",
589
  )
590
 
 
 
 
591
  with gr.Tab("Pipeline Runner"):
 
 
592
  with gr.Row():
593
  with gr.Column(scale=1):
594
  btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary")
 
598
  with gr.Row():
599
  btn_all = gr.Button("Run Full Pipeline (Both Steps)", variant="primary")
600
 
601
+ run_log = gr.Textbox(label="Execution Log", lines=18, max_lines=30, interactive=False)
 
 
 
 
 
602
 
603
  btn_nb1.click(run_datacreation, outputs=[run_log])
604
  btn_nb2.click(run_pythonanalysis, outputs=[run_log])
605
  btn_all.click(run_full_pipeline, outputs=[run_log])
606
 
 
 
 
607
  with gr.Tab("Dashboard"):
608
  kpi_html = gr.HTML(value=render_kpi_cards)
 
609
  refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
610
 
611
  gr.Markdown("#### Interactive Charts")
612
+ chart_calories = gr.Plot(label="Calories Overview")
613
+ chart_health = gr.Plot(label="Health Label Distribution")
614
+ chart_nutri = gr.Plot(label="Nutri-Score Comparison")
615
 
616
  gr.Markdown("#### Static Figures (from notebooks)")
617
+ gallery = gr.Gallery(label="Generated Figures", columns=2, height=480, object_fit="contain")
 
 
 
 
 
618
 
619
  gr.Markdown("#### Data Tables")
620
+ table_dropdown = gr.Dropdown(label="Select a table to view", choices=[], interactive=True)
621
+ table_display = gr.Dataframe(label="Table Preview", interactive=False)
 
 
 
 
 
 
 
622
 
623
  def _on_refresh():
624
  kpi, c1, c2, c3 = refresh_dashboard()
 
627
 
628
  refresh_btn.click(
629
  _on_refresh,
630
+ outputs=[kpi_html, chart_calories, chart_health, chart_nutri, gallery, table_dropdown, table_display],
 
 
 
 
 
 
631
  )
632
+ table_dropdown.change(on_table_select, inputs=[table_dropdown], outputs=[table_display])
633
 
 
 
 
634
  with gr.Tab('"AI" Dashboard'):
635
  _ai_status = (
636
  "Connected to your **n8n workflow**." if N8N_WEBHOOK_URL
637
  else "**LLM active.**" if LLM_ENABLED
638
+ else "Using **keyword matching**."
 
 
639
  )
640
  gr.Markdown(
641
  "### Ask questions, get interactive visualisations\n\n"
642
+ f"Type a question and the system will pick the right chart or table. {_ai_status}"
643
  )
644
 
645
  with gr.Row(equal_height=True):
646
  with gr.Column(scale=1):
647
+ chatbot = gr.Chatbot(label="Conversation", height=380)
 
 
 
648
  user_input = gr.Textbox(
649
  label="Ask about your data",
650
+ placeholder="e.g. Show me calories / Which foods have the most protein? / Show health label distribution",
651
  lines=1,
652
  )
653
  gr.Examples(
654
  examples=[
655
+ "Show me calories",
656
+ "Which products are highest in sugar?",
657
+ "Show health label distribution",
658
+ "Compare Nutri-Score and health label",
659
+ "Show me protein and fat by health label",
660
  "Give me a dashboard overview",
661
  ],
662
  inputs=user_input,
663
  )
664
 
665
  with gr.Column(scale=1):
666
+ ai_figure = gr.Plot(label="Interactive Chart")
667
+ ai_table = gr.Dataframe(label="Data Table", interactive=False)
 
 
 
 
 
668
 
669
  user_input.submit(
670
  ai_chat,
 
672
  outputs=[chatbot, user_input, ai_figure, ai_table],
673
  )
674
 
 
675
  demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])