Upload 4 files

#126
by amar7llis - opened
Files changed (4) hide show
  1. app.py +235 -693
  2. pythonanalysis.ipynb +955 -0
  3. requirements.txt +12 -17
  4. style.css +206 -256
app.py CHANGED
@@ -1,758 +1,300 @@
1
- import os
2
- import re
3
- import json
4
- import time
5
- import traceback
6
  from pathlib import Path
7
  from typing import Dict, Any, List, Tuple
8
-
9
  import pandas as pd
10
  import gradio as gr
11
  import papermill as pm
12
  import plotly.graph_objects as go
13
 
14
- # Optional LLM (HuggingFace Inference API)
15
  try:
16
  from huggingface_hub import InferenceClient
17
  except Exception:
18
  InferenceClient = None
19
 
20
- # =========================================================
21
- # CONFIG
22
- # =========================================================
23
-
24
  BASE_DIR = Path(__file__).resolve().parent
25
-
26
  NB1 = os.environ.get("NB1", "datacreation.ipynb").strip()
27
  NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip()
28
-
29
- RUNS_DIR = BASE_DIR / "runs"
30
- ART_DIR = BASE_DIR / "artifacts"
31
  PY_FIG_DIR = ART_DIR / "py" / "figures"
32
  PY_TAB_DIR = ART_DIR / "py" / "tables"
33
-
34
  PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800"))
35
- MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50"))
36
- MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000"))
37
-
38
- HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
39
- MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip()
40
- HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
41
- N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
42
-
43
  LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
44
- llm_client = (
45
- InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY)
46
- if LLM_ENABLED
47
- else None
48
- )
49
-
50
- # =========================================================
51
- # HELPERS
52
- # =========================================================
53
 
54
  def ensure_dirs():
55
  for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]:
56
  p.mkdir(parents=True, exist_ok=True)
57
 
58
- def stamp():
59
- return time.strftime("%Y%m%d-%H%M%S")
60
 
61
- def tail(text: str, n: int = MAX_LOG_CHARS) -> str:
62
- return (text or "")[-n:]
 
63
 
64
- def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]:
65
- if not dir_path.is_dir():
66
- return []
67
- return sorted(p.name for p in dir_path.iterdir() if p.is_file() and p.suffix.lower() in exts)
68
 
69
- def _read_csv(path: Path) -> pd.DataFrame:
70
- return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
71
-
72
- def _read_json(path: Path):
73
- with path.open(encoding="utf-8") as f:
74
- return json.load(f)
75
-
76
- def artifacts_index() -> Dict[str, Any]:
77
- return {
78
- "python": {
79
- "figures": _ls(PY_FIG_DIR, (".png", ".jpg", ".jpeg")),
80
- "tables": _ls(PY_TAB_DIR, (".csv", ".json")),
81
- },
82
- }
83
 
84
- # =========================================================
85
- # PIPELINE RUNNERS
86
- # =========================================================
 
 
 
 
 
87
 
88
- def run_notebook(nb_name: str) -> str:
89
  ensure_dirs()
90
- nb_in = BASE_DIR / nb_name
91
- if not nb_in.exists():
92
- return f"ERROR: {nb_name} not found."
93
- nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}"
94
- pm.execute_notebook(
95
- input_path=str(nb_in),
96
- output_path=str(nb_out),
97
- cwd=str(BASE_DIR),
98
- log_output=True,
99
- progress_bar=False,
100
- request_save_on_cell_execute=True,
101
- execution_timeout=PAPERMILL_TIMEOUT,
102
- )
103
- return f"Executed {nb_name}"
104
-
105
-
106
- def run_datacreation() -> str:
107
  try:
108
  log = run_notebook(NB1)
109
  csvs = [f.name for f in BASE_DIR.glob("*.csv")]
110
- return f"OK {log}\n\nCSVs now in /app:\n" + "\n".join(f" - {c}" for c in sorted(csvs))
111
  except Exception as e:
112
  return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
113
 
114
-
115
- def run_pythonanalysis() -> str:
116
  try:
117
  log = run_notebook(NB2)
118
  idx = artifacts_index()
119
- figs = idx["python"]["figures"]
120
- tabs = idx["python"]["tables"]
121
- return (
122
- f"OK {log}\n\n"
123
- f"Figures: {', '.join(figs) or '(none)'}\n"
124
- f"Tables: {', '.join(tabs) or '(none)'}"
125
- )
126
  except Exception as e:
127
  return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
128
 
129
-
130
- def run_full_pipeline() -> str:
131
- logs = []
132
- logs.append("=" * 50)
133
- logs.append("STEP 1/2: Data Creation (web scraping + synthetic data)")
134
- logs.append("=" * 50)
135
- logs.append(run_datacreation())
136
- logs.append("")
137
- logs.append("=" * 50)
138
- logs.append("STEP 2/2: Python Analysis (sentiment, ARIMA, dashboard)")
139
- logs.append("=" * 50)
140
- logs.append(run_pythonanalysis())
141
- return "\n".join(logs)
142
-
143
-
144
- # =========================================================
145
- # GALLERY LOADERS
146
- # =========================================================
147
-
148
- def _load_all_figures() -> List[Tuple[str, str]]:
149
- """Return list of (filepath, caption) for Gallery."""
150
- items = []
151
- for p in sorted(PY_FIG_DIR.glob("*.png")):
152
- items.append((str(p), p.stem.replace('_', ' ').title()))
153
- return items
154
-
155
-
156
- def _load_table_safe(path: Path) -> pd.DataFrame:
157
  try:
158
- if path.suffix == ".json":
159
- obj = _read_json(path)
160
- if isinstance(obj, dict):
161
- return pd.DataFrame([obj])
162
- return pd.DataFrame(obj)
163
- return _read_csv(path)
164
- except Exception as e:
165
- return pd.DataFrame([{"error": str(e)}])
166
-
167
-
168
- def refresh_gallery():
169
- """Called when user clicks Refresh on Gallery tab."""
170
- figures = _load_all_figures()
171
- idx = artifacts_index()
172
-
173
- table_choices = list(idx["python"]["tables"])
174
-
175
- default_df = pd.DataFrame()
176
- if table_choices:
177
- default_df = _load_table_safe(PY_TAB_DIR / table_choices[0])
178
-
179
- return (
180
- figures if figures else [],
181
- gr.update(choices=table_choices, value=table_choices[0] if table_choices else None),
182
- default_df,
183
- )
184
-
185
-
186
- def on_table_select(choice: str):
187
- if not choice:
188
- return pd.DataFrame([{"hint": "Select a table above."}])
189
- path = PY_TAB_DIR / choice
190
- if not path.exists():
191
- return pd.DataFrame([{"error": f"File not found: {choice}"}])
192
- return _load_table_safe(path)
193
-
194
-
195
- # =========================================================
196
- # KPI LOADER
197
- # =========================================================
198
-
199
- def load_kpis() -> Dict[str, Any]:
200
- for candidate in [PY_TAB_DIR / "kpis.json", PY_FIG_DIR / "kpis.json"]:
201
- if candidate.exists():
202
- try:
203
- return _read_json(candidate)
204
- except Exception:
205
- pass
206
- return {}
207
-
208
-
209
- # =========================================================
210
- # AI DASHBOARD -- LLM picks what to display
211
- # =========================================================
212
-
213
- DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a book-sales analytics app.
214
- The user asks questions or requests about their data. You have access to pre-computed
215
- artifacts from a Python analysis pipeline.
216
-
217
- AVAILABLE ARTIFACTS (only reference ones that exist):
218
- {artifacts_json}
219
-
220
  KPI SUMMARY: {kpis_json}
 
 
221
 
222
- YOUR JOB:
223
- 1. Answer the user's question conversationally using the KPIs and your knowledge of the artifacts.
224
- 2. At the END of your response, output a JSON block (fenced with ```json ... ```) that tells
225
- the dashboard which artifact to display. The JSON must have this shape:
226
- {{"show": "figure"|"table"|"none", "scope": "python", "filename": "..."}}
227
-
228
- - Use "show": "figure" to display a chart image.
229
- - Use "show": "table" to display a CSV/JSON table.
230
- - Use "show": "none" if no artifact is relevant.
231
-
232
- RULES:
233
- - If the user asks about sales trends or forecasting by title, show sales_trends or arima figures.
234
- - If the user asks about sentiment, show sentiment figure or sentiment_counts table.
235
- - If the user asks about forecast accuracy or ARIMA, show arima figures.
236
- - If the user asks about top sellers, show top_titles_by_units_sold.csv.
237
- - If the user asks a general data question, pick the most relevant artifact.
238
- - Keep your answer concise (2-4 sentences), then the JSON block.
239
- """
240
 
241
- JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
242
- FALLBACK_JSON_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL)
243
-
244
-
245
- def _parse_display_directive(text: str) -> Dict[str, str]:
246
- m = JSON_BLOCK_RE.search(text)
247
  if m:
248
- try:
249
- return json.loads(m.group(1))
250
- except json.JSONDecodeError:
251
- pass
252
- m = FALLBACK_JSON_RE.search(text)
253
  if m:
254
- try:
255
- return json.loads(m.group(0))
256
- except json.JSONDecodeError:
257
- pass
258
- return {"show": "none"}
259
 
 
260
 
261
- def _clean_response(text: str) -> str:
262
- """Strip the JSON directive block from the displayed response."""
263
- return JSON_BLOCK_RE.sub("", text).strip()
264
-
265
-
266
- def _n8n_call(msg: str) -> Tuple[str, Dict]:
267
- """Call the student's n8n webhook and return (reply, directive)."""
268
- import requests as req
269
- try:
270
- resp = req.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=20)
271
- data = resp.json()
272
- answer = data.get("answer", "No response from n8n workflow.")
273
- chart = data.get("chart", "none")
274
- if chart and chart != "none":
275
- return answer, {"show": "figure", "chart": chart}
276
- return answer, {"show": "none"}
277
- except Exception as e:
278
- return f"n8n error: {e}. Falling back to keyword matching.", None
279
-
280
-
281
- def ai_chat(user_msg: str, history: list):
282
- """Chat function for the AI Dashboard tab."""
283
- if not user_msg or not user_msg.strip():
284
- return history, "", None, None
285
-
286
- idx = artifacts_index()
287
- kpis = load_kpis()
288
-
289
- # Priority: n8n webhook > HF LLM > keyword fallback
290
  if N8N_WEBHOOK_URL:
291
- reply, directive = _n8n_call(user_msg)
292
- if directive is None:
293
- reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
294
- reply += "\n\n" + reply_fb
295
- elif not LLM_ENABLED:
296
- reply, directive = _keyword_fallback(user_msg, idx, kpis)
297
- else:
298
- system = DASHBOARD_SYSTEM.format(
299
- artifacts_json=json.dumps(idx, indent=2),
300
- kpis_json=json.dumps(kpis, indent=2) if kpis else "(no KPIs yet, run the pipeline first)",
301
- )
302
- msgs = [{"role": "system", "content": system}]
303
- for entry in (history or [])[-6:]:
304
- msgs.append(entry)
305
- msgs.append({"role": "user", "content": user_msg})
306
-
307
  try:
308
- r = llm_client.chat_completion(
309
- model=MODEL_NAME,
310
- messages=msgs,
311
- temperature=0.3,
312
- max_tokens=600,
313
- stream=False,
314
- )
315
- raw = (
316
- r["choices"][0]["message"]["content"]
317
- if isinstance(r, dict)
318
- else r.choices[0].message.content
319
- )
320
- directive = _parse_display_directive(raw)
321
- reply = _clean_response(raw)
322
  except Exception as e:
323
- reply = f"LLM error: {e}. Falling back to keyword matching."
324
- reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
325
- reply += "\n\n" + reply_fb
326
-
327
- # Resolve artifacts — build interactive Plotly charts when possible
328
- chart_out = None
329
- tab_out = None
330
- show = directive.get("show", "none")
331
- fname = directive.get("filename", "")
332
- chart_name = directive.get("chart", "")
333
-
334
- # Interactive chart builders keyed by name
335
- chart_builders = {
336
- "sales": build_sales_chart,
337
- "sentiment": build_sentiment_chart,
338
- "top_sellers": build_top_sellers_chart,
339
- }
340
-
341
- if chart_name and chart_name in chart_builders:
342
- chart_out = chart_builders[chart_name]()
343
- elif show == "figure" and fname:
344
- # Fallback: try to match filename to a chart builder
345
- if "sales_trend" in fname:
346
- chart_out = build_sales_chart()
347
- elif "sentiment" in fname:
348
- chart_out = build_sentiment_chart()
349
- elif "arima" in fname or "forecast" in fname:
350
- chart_out = build_sales_chart() # closest interactive equivalent
351
- else:
352
- chart_out = _empty_chart(f"No interactive chart for {fname}")
353
-
354
- if show == "table" and fname:
355
- fp = PY_TAB_DIR / fname
356
- if fp.exists():
357
- tab_out = _load_table_safe(fp)
358
  else:
359
- reply += f"\n\n*(Could not find table: {fname})*"
360
-
361
- new_history = (history or []) + [
362
- {"role": "user", "content": user_msg},
363
- {"role": "assistant", "content": reply},
364
- ]
365
-
366
- return new_history, "", chart_out, tab_out
367
-
368
-
369
- def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]:
370
- """Simple keyword matcher when LLM is unavailable."""
371
- msg_lower = msg.lower()
372
-
373
- if not idx["python"]["figures"] and not idx["python"]["tables"]:
374
- return (
375
- "No artifacts found yet. Please run the pipeline first (Tab 1), "
376
- "then come back here to explore the results.",
377
- {"show": "none"},
378
- )
379
-
380
- kpi_text = ""
381
- if kpis:
382
- total = kpis.get("total_units_sold", 0)
383
- kpi_text = (
384
- f"Quick summary: **{kpis.get('n_titles', '?')}** book titles across "
385
- f"**{kpis.get('n_months', '?')}** months, with **{total:,.0f}** total units sold."
386
- )
387
-
388
- if any(w in msg_lower for w in ["trend", "sales trend", "monthly sale"]):
389
- return (
390
- f"Here are the sales trends. {kpi_text}",
391
- {"show": "figure", "chart": "sales"},
392
- )
393
-
394
- if any(w in msg_lower for w in ["sentiment", "review", "positive", "negative"]):
395
- return (
396
- f"Here is the sentiment distribution across sampled book titles. {kpi_text}",
397
- {"show": "figure", "chart": "sentiment"},
398
- )
399
-
400
- if any(w in msg_lower for w in ["arima", "forecast", "predict"]):
401
- return (
402
- f"Here are the sales trends and forecasts. {kpi_text}",
403
- {"show": "figure", "chart": "sales"},
404
- )
405
-
406
- if any(w in msg_lower for w in ["top", "best sell", "popular", "rank"]):
407
- return (
408
- f"Here are the top-selling titles by units sold. {kpi_text}",
409
- {"show": "table", "scope": "python", "filename": "top_titles_by_units_sold.csv"},
410
- )
411
-
412
- if any(w in msg_lower for w in ["price", "pricing", "decision"]):
413
- return (
414
- f"Here are the pricing decisions. {kpi_text}",
415
- {"show": "table", "scope": "python", "filename": "pricing_decisions.csv"},
416
- )
417
-
418
- if any(w in msg_lower for w in ["dashboard", "overview", "summary", "kpi"]):
419
- return (
420
- f"Dashboard overview: {kpi_text}\n\nAsk me about sales trends, sentiment, forecasts, "
421
- "pricing, or top sellers to see specific visualizations.",
422
- {"show": "table", "scope": "python", "filename": "df_dashboard.csv"},
423
- )
424
-
425
- # Default
426
- return (
427
- f"I can show you various analyses. {kpi_text}\n\n"
428
- "Try asking about: **sales trends**, **sentiment**, **ARIMA forecasts**, "
429
- "**pricing decisions**, **top sellers**, or **dashboard overview**.",
430
- {"show": "none"},
431
- )
432
-
433
-
434
- # =========================================================
435
- # KPI CARDS (BubbleBusters style)
436
- # =========================================================
437
-
438
- def render_kpi_cards() -> str:
439
- kpis = load_kpis()
440
- if not kpis:
441
- return (
442
- '<div style="background:rgba(255,255,255,.65);backdrop-filter:blur(16px);'
443
- 'border-radius:20px;padding:28px;text-align:center;'
444
- 'border:1.5px solid rgba(255,255,255,.7);'
445
- 'box-shadow:0 8px 32px rgba(124,92,191,.08);">'
446
- '<div style="font-size:36px;margin-bottom:10px;">📊</div>'
447
- '<div style="color:#a48de8;font-size:14px;'
448
- 'font-weight:800;margin-bottom:6px;">No data yet</div>'
449
- '<div style="color:#9d8fc4;font-size:12px;">'
450
- 'Run the pipeline to populate these cards.</div>'
451
- '</div>'
452
- )
453
-
454
- def card(icon, label, value, colour):
455
- return f"""
456
- <div style="background:rgba(255,255,255,.72);backdrop-filter:blur(16px);
457
- border-radius:20px;padding:18px 14px 16px;text-align:center;
458
- border:1.5px solid rgba(255,255,255,.8);
459
- box-shadow:0 4px 16px rgba(124,92,191,.08);
460
- border-top:3px solid {colour};">
461
- <div style="font-size:26px;margin-bottom:7px;line-height:1;">{icon}</div>
462
- <div style="color:#9d8fc4;font-size:9.5px;text-transform:uppercase;
463
- letter-spacing:1.8px;margin-bottom:7px;font-weight:800;">{label}</div>
464
- <div style="color:#2d1f4e;font-size:16px;font-weight:800;">{value}</div>
465
- </div>"""
466
-
467
- kpi_config = [
468
- ("n_titles", "📚", "Book Titles", "#a48de8"),
469
- ("n_months", "📅", "Time Periods", "#7aa6f8"),
470
- ("total_units_sold", "📦", "Units Sold", "#6ee7c7"),
471
- ("total_revenue", "💰", "Revenue", "#3dcba8"),
472
- ]
473
-
474
- html = (
475
- '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));'
476
- 'gap:12px;margin-bottom:24px;">'
477
- )
478
- for key, icon, label, colour in kpi_config:
479
- val = kpis.get(key)
480
- if val is None:
481
- continue
482
- if isinstance(val, (int, float)) and val > 100:
483
- val = f"{val:,.0f}"
484
- html += card(icon, label, str(val), colour)
485
- # Extra KPIs not in config
486
- known = {k for k, *_ in kpi_config}
487
- for key, val in kpis.items():
488
- if key not in known:
489
- label = key.replace("_", " ").title()
490
- if isinstance(val, (int, float)) and val > 100:
491
- val = f"{val:,.0f}"
492
- html += card("📈", label, str(val), "#8fa8f8")
493
- html += "</div>"
494
- return html
495
-
496
-
497
- # =========================================================
498
- # INTERACTIVE PLOTLY CHARTS (BubbleBusters style)
499
- # =========================================================
500
-
501
- CHART_PALETTE = ["#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230", "#5e8fef",
502
- "#c45ea8", "#3dbacc", "#a0522d", "#6aaa3a", "#d46060"]
503
-
504
- def _styled_layout(**kwargs) -> dict:
505
- defaults = dict(
506
- template="plotly_white",
507
- paper_bgcolor="rgba(255,255,255,0.95)",
508
- plot_bgcolor="rgba(255,255,255,0.98)",
509
- font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12),
510
- margin=dict(l=60, r=20, t=70, b=70),
511
- legend=dict(
512
- orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1,
513
- bgcolor="rgba(255,255,255,0.92)",
514
- bordercolor="rgba(124,92,191,0.35)", borderwidth=1,
515
- ),
516
- title=dict(font=dict(size=15, color="#4b2d8a")),
517
- )
518
- defaults.update(kwargs)
519
- return defaults
520
-
521
-
522
- def _empty_chart(title: str) -> go.Figure:
523
- fig = go.Figure()
524
- fig.update_layout(
525
- title=title, height=420, template="plotly_white",
526
- paper_bgcolor="rgba(255,255,255,0.95)",
527
- annotations=[dict(text="Run the pipeline to generate data",
528
- x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False,
529
- font=dict(size=14, color="rgba(124,92,191,0.5)"))],
530
- )
531
- return fig
532
-
533
-
534
- def build_sales_chart() -> go.Figure:
535
- path = PY_TAB_DIR / "df_dashboard.csv"
536
- if not path.exists():
537
- return _empty_chart("Sales Trends — run the pipeline first")
538
- df = pd.read_csv(path)
539
- date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
540
- val_cols = [c for c in df.columns if c != date_col and df[c].dtype in ("float64", "int64")]
541
- if not date_col or not val_cols:
542
- return _empty_chart("Could not auto-detect columns in df_dashboard.csv")
543
- df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
544
- fig = go.Figure()
545
- for i, col in enumerate(val_cols):
546
- fig.add_trace(go.Scatter(
547
- x=df[date_col], y=df[col], name=col.replace("_", " ").title(),
548
- mode="lines+markers", line=dict(color=CHART_PALETTE[i % len(CHART_PALETTE)], width=2),
549
- marker=dict(size=4),
550
- hovertemplate=f"<b>{col.replace('_',' ').title()}</b><br>%{{x|%b %Y}}: %{{y:,.0f}}<extra></extra>",
551
- ))
552
- fig.update_layout(**_styled_layout(height=450, hovermode="x unified",
553
- title=dict(text="Monthly Overview")))
554
- fig.update_xaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
555
- fig.update_yaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
556
- return fig
557
-
558
-
559
- def build_sentiment_chart() -> go.Figure:
560
- path = PY_TAB_DIR / "sentiment_counts_sampled.csv"
561
- if not path.exists():
562
- return _empty_chart("Sentiment Distribution — run the pipeline first")
563
- df = pd.read_csv(path)
564
- title_col = df.columns[0]
565
- sent_cols = [c for c in ["negative", "neutral", "positive"] if c in df.columns]
566
- if not sent_cols:
567
- return _empty_chart("No sentiment columns found in CSV")
568
- colors = {"negative": "#e8537a", "neutral": "#5e8fef", "positive": "#2ec4a0"}
569
- fig = go.Figure()
570
- for col in sent_cols:
571
- fig.add_trace(go.Bar(
572
- name=col.title(), y=df[title_col], x=df[col],
573
- orientation="h", marker_color=colors.get(col, "#888"),
574
- hovertemplate=f"<b>{col.title()}</b>: %{{x}}<extra></extra>",
575
- ))
576
- fig.update_layout(**_styled_layout(
577
- height=max(400, len(df) * 28), barmode="stack",
578
- title=dict(text="Sentiment Distribution by Book"),
579
- ))
580
- fig.update_xaxes(title="Number of Reviews")
581
- fig.update_yaxes(autorange="reversed")
582
- return fig
583
-
584
-
585
- def build_top_sellers_chart() -> go.Figure:
586
- path = PY_TAB_DIR / "top_titles_by_units_sold.csv"
587
- if not path.exists():
588
- return _empty_chart("Top Sellers — run the pipeline first")
589
- df = pd.read_csv(path).head(15)
590
- title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
591
- val_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
592
- fig = go.Figure(go.Bar(
593
- y=df[title_col], x=df[val_col], orientation="h",
594
- marker=dict(color=df[val_col], colorscale=[[0, "#c5b4f0"], [1, "#7c5cbf"]]),
595
- hovertemplate="<b>%{y}</b><br>Units: %{x:,.0f}<extra></extra>",
596
- ))
597
- fig.update_layout(**_styled_layout(
598
- height=max(400, len(df) * 30),
599
- title=dict(text="Top Selling Titles"), showlegend=False,
600
- ))
601
- fig.update_yaxes(autorange="reversed")
602
- fig.update_xaxes(title="Total Units Sold")
603
- return fig
604
-
605
-
606
- def refresh_dashboard():
607
- return render_kpi_cards(), build_sales_chart(), build_sentiment_chart(), build_top_sellers_chart()
608
-
609
-
610
- # =========================================================
611
- # UI
612
- # =========================================================
613
 
614
  ensure_dirs()
 
615
 
616
- def load_css() -> str:
617
- css_path = BASE_DIR / "style.css"
618
- return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
619
 
620
-
621
- with gr.Blocks(title="AIBDM 2026 Workshop App") as demo:
622
-
623
- gr.Markdown(
624
- "# SE21 App Template\n"
625
- "*This is an app template for SE21 students*",
626
- elem_id="escp_title",
627
- )
628
-
629
- # ===========================================================
630
- # TAB 1 -- Pipeline Runner
631
- # ===========================================================
632
  with gr.Tab("Pipeline Runner"):
633
- gr.Markdown()
634
-
635
- with gr.Row():
636
- with gr.Column(scale=1):
637
- btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary")
638
- with gr.Column(scale=1):
639
- btn_nb2 = gr.Button("Step 2: Python Analysis", variant="secondary")
640
-
641
  with gr.Row():
642
- btn_all = gr.Button("Run Full Pipeline (Both Steps)", variant="primary")
643
-
644
- run_log = gr.Textbox(
645
- label="Execution Log",
646
- lines=18,
647
- max_lines=30,
648
- interactive=False,
649
- )
650
-
651
- btn_nb1.click(run_datacreation, outputs=[run_log])
652
- btn_nb2.click(run_pythonanalysis, outputs=[run_log])
653
- btn_all.click(run_full_pipeline, outputs=[run_log])
654
-
655
- # ===========================================================
656
- # TAB 2 -- Dashboard (KPIs + Interactive Charts + Gallery)
657
- # ===========================================================
658
  with gr.Tab("Dashboard"):
659
- kpi_html = gr.HTML(value=render_kpi_cards)
660
-
661
- refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
662
-
663
- gr.Markdown("#### Interactive Charts")
664
- chart_sales = gr.Plot(label="Monthly Overview")
665
- chart_sentiment = gr.Plot(label="Sentiment Distribution")
666
- chart_top = gr.Plot(label="Top Sellers")
667
-
668
- gr.Markdown("#### Static Figures (from notebooks)")
669
- gallery = gr.Gallery(
670
- label="Generated Figures",
671
- columns=2,
672
- height=480,
673
- object_fit="contain",
674
- )
675
-
676
- gr.Markdown("#### Data Tables")
677
- table_dropdown = gr.Dropdown(
678
- label="Select a table to view",
679
- choices=[],
680
- interactive=True,
681
- )
682
- table_display = gr.Dataframe(
683
- label="Table Preview",
684
- interactive=False,
685
- )
686
-
687
- def _on_refresh():
688
- kpi, c1, c2, c3 = refresh_dashboard()
689
- figs, dd, df = refresh_gallery()
690
- return kpi, c1, c2, c3, figs, dd, df
691
-
692
- refresh_btn.click(
693
- _on_refresh,
694
- outputs=[kpi_html, chart_sales, chart_sentiment, chart_top,
695
- gallery, table_dropdown, table_display],
696
- )
697
- table_dropdown.change(
698
- on_table_select,
699
- inputs=[table_dropdown],
700
- outputs=[table_display],
701
- )
702
-
703
- # ===========================================================
704
- # TAB 3 -- AI Dashboard
705
- # ===========================================================
706
  with gr.Tab('"AI" Dashboard'):
707
- _ai_status = (
708
- "Connected to your **n8n workflow**." if N8N_WEBHOOK_URL
709
- else "**LLM active.**" if LLM_ENABLED
710
- else "Using **keyword matching**. Upgrade options: "
711
- "set `N8N_WEBHOOK_URL` to connect your n8n workflow, "
712
- "or set `HF_API_KEY` for direct LLM access."
713
- )
714
- gr.Markdown(
715
- "### Ask questions, get interactive visualisations\n\n"
716
- f"Type a question and the system will pick the right interactive chart or table. {_ai_status}"
717
- )
718
-
719
- with gr.Row(equal_height=True):
720
  with gr.Column(scale=1):
721
- chatbot = gr.Chatbot(
722
- label="Conversation",
723
- height=380,
724
- )
725
- user_input = gr.Textbox(
726
- label="Ask about your data",
727
- placeholder="e.g. Show me sales trends / What are the top sellers? / Sentiment analysis",
728
- lines=1,
729
- )
730
- gr.Examples(
731
- examples=[
732
- "Show me the sales trends",
733
- "What does the sentiment look like?",
734
- "Which titles sell the most?",
735
- "Show the ARIMA forecasts",
736
- "What are the pricing decisions?",
737
- "Give me a dashboard overview",
738
- ],
739
- inputs=user_input,
740
- )
741
-
742
  with gr.Column(scale=1):
743
- ai_figure = gr.Plot(
744
- label="Interactive Chart",
745
- )
746
- ai_table = gr.Dataframe(
747
- label="Data Table",
748
- interactive=False,
749
- )
750
-
751
- user_input.submit(
752
- ai_chat,
753
- inputs=[user_input, chatbot],
754
- outputs=[chatbot, user_input, ai_figure, ai_table],
755
- )
756
-
757
 
758
- demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])
 
1
+ import os, re, json, time, traceback
 
 
 
 
2
  from pathlib import Path
3
  from typing import Dict, Any, List, Tuple
 
4
  import pandas as pd
5
  import gradio as gr
6
  import papermill as pm
7
  import plotly.graph_objects as go
8
 
 
9
  try:
10
  from huggingface_hub import InferenceClient
11
  except Exception:
12
  InferenceClient = None
13
 
 
 
 
 
14
  BASE_DIR = Path(__file__).resolve().parent
 
15
  NB1 = os.environ.get("NB1", "datacreation.ipynb").strip()
16
  NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip()
17
+ RUNS_DIR = BASE_DIR / "runs"
18
+ ART_DIR = BASE_DIR / "artifacts"
 
19
  PY_FIG_DIR = ART_DIR / "py" / "figures"
20
  PY_TAB_DIR = ART_DIR / "py" / "tables"
 
21
  PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800"))
22
+ MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50"))
23
+ HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
24
+ MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip()
25
+ HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
26
+ N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
27
+ ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "").strip()
 
 
28
  LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
29
+ llm_client = InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY) if LLM_ENABLED else None
 
 
 
 
 
 
 
 
30
 
31
  def ensure_dirs():
32
  for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]:
33
  p.mkdir(parents=True, exist_ok=True)
34
 
35
+ def _ls(d, exts):
36
+ return sorted(p.name for p in d.iterdir() if p.is_file() and p.suffix.lower() in exts) if d.is_dir() else []
37
 
38
+ def _read_csv(p): return pd.read_csv(p, nrows=MAX_PREVIEW_ROWS)
39
+ def _read_json(p):
40
+ with open(p, encoding='utf-8') as f: return json.load(f)
41
 
42
+ def artifacts_index():
43
+ return {"python": {"figures": _ls(PY_FIG_DIR, (".png",".jpg")), "tables": _ls(PY_TAB_DIR, (".csv",".json"))}}
 
 
44
 
45
+ def load_kpis():
46
+ c = PY_TAB_DIR / "kpis.json"
47
+ if c.exists():
48
+ try: return _read_json(c)
49
+ except: pass
50
+ return {}
 
 
 
 
 
 
 
 
51
 
52
+ def _load_table_safe(p):
53
+ try:
54
+ if p.suffix == ".json":
55
+ obj = _read_json(p)
56
+ return pd.DataFrame([obj] if isinstance(obj, dict) else obj)
57
+ return _read_csv(p)
58
+ except Exception as e:
59
+ return pd.DataFrame([{"error": str(e)}])
60
 
61
+ def run_notebook(nb):
62
  ensure_dirs()
63
+ nb_in = BASE_DIR / nb
64
+ if not nb_in.exists(): return f"ERROR: {nb} not found."
65
+ nb_out = RUNS_DIR / f"run_{time.strftime('%Y%m%d-%H%M%S')}_{nb}"
66
+ pm.execute_notebook(str(nb_in), str(nb_out), cwd=str(BASE_DIR),
67
+ log_output=True, progress_bar=False, execution_timeout=PAPERMILL_TIMEOUT)
68
+ return f"Executed {nb}"
69
+
70
+ def run_datacreation():
 
 
 
 
 
 
 
 
 
71
  try:
72
  log = run_notebook(NB1)
73
  csvs = [f.name for f in BASE_DIR.glob("*.csv")]
74
+ return f"OK {log}\n\nCSVs:\n" + "\n".join(f" - {c}" for c in sorted(csvs))
75
  except Exception as e:
76
  return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
77
 
78
+ def run_pythonanalysis():
 
79
  try:
80
  log = run_notebook(NB2)
81
  idx = artifacts_index()
82
+ return (f"OK {log}\n\nFigures: {', '.join(idx['python']['figures']) or '(none)'}\n"
83
+ f"Tables: {', '.join(idx['python']['tables']) or '(none)'}")
 
 
 
 
 
84
  except Exception as e:
85
  return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
86
 
87
+ def run_full_pipeline():
88
+ return "\n".join(["="*50,"STEP 1/2: Data Collection","="*50,run_datacreation(),"",
89
+ "="*50,"STEP 2/2: Analysis","="*50,run_pythonanalysis()])
90
+
91
+ def _call_anthropic(system, messages, max_tokens=700):
92
+ if not ANTHROPIC_API_KEY: return None
93
+ import urllib.request
94
+ payload = json.dumps({"model":"claude-sonnet-4-20250514","max_tokens":max_tokens,
95
+ "system":system,"messages":messages}).encode()
96
+ req = urllib.request.Request("https://api.anthropic.com/v1/messages", data=payload,
97
+ headers={"Content-Type":"application/json","x-api-key":ANTHROPIC_API_KEY,
98
+ "anthropic-version":"2023-06-01"}, method="POST")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  try:
100
+ with urllib.request.urlopen(req, timeout=30) as r:
101
+ return json.loads(r.read())["content"][0]["text"]
102
+ except Exception as e: return f"Anthropic error: {e}"
103
+
104
+ SYSTEM = """You are an AI portfolio analyst for an emerging market investment fund.
105
+ Help fund managers decide which countries to overweight or underweight during geopolitical stress.
106
+ You have access to World Bank macro data, synthetic geopolitical risk scores, VADER news sentiment, and Random Forest investment signals.
107
+ AVAILABLE ARTIFACTS: {artifacts_json}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  KPI SUMMARY: {kpis_json}
109
+ Answer in 2-4 sentences. End with: ```json {{"show": "figure"|"table"|"none", "scope":"python","filename":"..."}} ```
110
+ ROUTING: GDP/trend->df_dashboard.csv or gdp_heatmap.png | risk->geo_risk_heatmap.png | sentiment->vader_by_country.csv | signal->investment_signal_summary.csv | arima->arima_gdp_forecast.csv | rf->rf_feature_importance.png | predictions->country_predictions_latest.csv | overview->kpis.json"""
111
 
112
+ JSON_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
113
+ FB_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ def _parse_directive(text):
116
+ m = JSON_RE.search(text)
 
 
 
 
117
  if m:
118
+ try: return json.loads(m.group(1))
119
+ except: pass
120
+ m = FB_RE.search(text)
 
 
121
  if m:
122
+ try: return json.loads(m.group(0))
123
+ except: pass
124
+ return {"show":"none"}
 
 
125
 
126
+ def _clean(text): return JSON_RE.sub("", text).strip()
127
 
128
+ def _keyword(msg, idx, kpis):
129
+ ml = msg.lower()
130
+ kpi_txt = f"Summary: {kpis.get('Countries Analysed','?')} countries, avg GDP {kpis.get('Avg GDP Growth','?')}, RF accuracy {kpis.get('RF Accuracy','?')}." if kpis else ""
131
+ if not idx["python"]["figures"] and not idx["python"]["tables"]:
132
+ return "No data yet — run the pipeline first.", {"show":"none"}
133
+ if any(w in ml for w in ["gdp","growth","macro","trend"]): return f"Here is the GDP growth trend. {kpi_txt}", {"show":"figure","filename":"gdp_heatmap.png"}
134
+ if any(w in ml for w in ["risk","geopolit","conflict","stress"]): return f"Here is the geo risk heatmap. {kpi_txt}", {"show":"figure","filename":"geo_risk_heatmap.png"}
135
+ if any(w in ml for w in ["sentiment","vader","news","headline"]): return f"Here is the VADER sentiment. {kpi_txt}", {"show":"figure","filename":"vader_sentiment.png"}
136
+ if any(w in ml for w in ["overweight","underweight","signal","invest","portfolio","allocat"]): return f"Here are investment signals. {kpi_txt}", {"show":"figure","filename":"investment_signal.png"}
137
+ if any(w in ml for w in ["arima","forecast","predict","future"]): return f"Here is the ARIMA forecast. {kpi_txt}", {"show":"figure","filename":"arima_gdp_forecast.png"}
138
+ if any(w in ml for w in ["random forest","rf","feature","importance","classif"]): return f"Here are RF importances. {kpi_txt}", {"show":"figure","filename":"rf_feature_importance.png"}
139
+ if any(w in ml for w in ["country","which","rank","top","best","worst"]): return f"Here are country predictions. {kpi_txt}", {"show":"table","scope":"python","filename":"country_predictions_latest.csv"}
140
+ if any(w in ml for w in ["dashboard","overview","summary","kpi"]): return f"Dashboard. {kpi_txt}", {"show":"table","scope":"python","filename":"kpis.json"}
141
+ return f"Ask about: GDP trends, geo risk, sentiment, investment signals, ARIMA forecasts, RF features, or country predictions. {kpi_txt}", {"show":"none"}
142
+
143
+ def ai_chat(user_msg, history):
144
+ if not user_msg or not user_msg.strip(): return history, "", None, None
145
+ idx = artifacts_index(); kpis = load_kpis()
 
 
 
 
 
 
 
 
 
 
 
146
  if N8N_WEBHOOK_URL:
147
+ import requests as req
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  try:
149
+ r = req.post(N8N_WEBHOOK_URL, json={"question":user_msg}, timeout=20); d = r.json()
150
+ reply = d.get("answer","No n8n response."); chart = d.get("chart","none")
151
+ directive = {"show":"figure","chart":chart} if chart and chart!="none" else {"show":"none"}
 
 
 
 
 
 
 
 
 
 
 
152
  except Exception as e:
153
+ reply = f"n8n error: {e}"; rb, directive = _keyword(user_msg, idx, kpis); reply += "\n\n"+rb
154
+ elif ANTHROPIC_API_KEY:
155
+ system = SYSTEM.format(artifacts_json=json.dumps(idx,indent=2), kpis_json=json.dumps(kpis,indent=2) if kpis else "(run pipeline first)")
156
+ msgs = list(history or [])[-6:] + [{"role":"user","content":user_msg}]
157
+ raw = _call_anthropic(system, msgs)
158
+ if raw and "error" not in (raw or "")[:20]:
159
+ directive = _parse_directive(raw); reply = _clean(raw)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  else:
161
+ reply = raw or "API unavailable."; rb, directive = _keyword(user_msg, idx, kpis); reply += "\n\n"+rb
162
+ elif LLM_ENABLED:
163
+ system = SYSTEM.format(artifacts_json=json.dumps(idx,indent=2), kpis_json=json.dumps(kpis,indent=2) if kpis else "(run pipeline first)")
164
+ try:
165
+ r = llm_client.chat_completion(model=MODEL_NAME,
166
+ messages=[{"role":"system","content":system}]+(history or [])[-6:]+[{"role":"user","content":user_msg}],
167
+ temperature=0.3,max_tokens=600,stream=False)
168
+ raw = r["choices"][0]["message"]["content"] if isinstance(r,dict) else r.choices[0].message.content
169
+ directive = _parse_directive(raw); reply = _clean(raw)
170
+ except Exception as e:
171
+ reply = f"LLM error: {e}."; rb, directive = _keyword(user_msg, idx, kpis); reply += "\n\n"+rb
172
+ else:
173
+ reply, directive = _keyword(user_msg, idx, kpis)
174
+
175
+ show=directive.get("show","none"); fname=directive.get("filename","")
176
+ chart_out=tab_out=None
177
+ CHART_MAP={"investment_signal.png":build_signal_chart,"vader_sentiment.png":build_sentiment_chart,"vader_by_country.csv":build_sentiment_chart,"gdp_heatmap.png":build_gdp_trend_chart}
178
+ if show=="figure":
179
+ builder=CHART_MAP.get(fname)
180
+ if builder: chart_out=builder()
181
+ elif "arima" in fname or "forecast" in fname: chart_out=build_arima_chart()
182
+ else: chart_out=_empty_chart(fname or "Chart")
183
+ elif show=="table" and fname:
184
+ fp=PY_TAB_DIR/fname
185
+ if fp.exists(): tab_out=_load_table_safe(fp)
186
+ else: reply+=f"\n\n*(Table not found: {fname})*"
187
+ new_hist=(history or [])+[{"role":"user","content":user_msg},{"role":"assistant","content":reply}]
188
+ return new_hist,"",chart_out,tab_out
189
+
190
+ def render_kpi_cards():
191
+ kpis=load_kpis()
192
+ if not kpis: return '<div style="text-align:center;padding:28px;background:rgba(255,255,255,.65);border-radius:20px"><div style="font-size:36px">🌍</div><div style="color:#a48de8;font-weight:800">No data yet</div><div style="color:#9d8fc4;font-size:12px">Run the pipeline first.</div></div>'
193
+ icons={"Countries Analysed":"🌍","Years Covered":"📅","Avg GDP Growth":"📈","Avg Geo Risk":"⚠️","Top Overweight":"✅","Top Underweight":"🔻","RF Accuracy":"🤖","Headlines Analysed":"📰"}
194
+ colours={"Countries Analysed":"#a48de8","Years Covered":"#7aa6f8","Avg GDP Growth":"#3dcba8","Avg Geo Risk":"#e8a230","Top Overweight":"#2ec4a0","Top Underweight":"#e8537a","RF Accuracy":"#7c5cbf","Headlines Analysed":"#5e8fef"}
195
+ def card(icon,label,value,colour):
196
+ if isinstance(value,str) and len(value)>18: value=value[:16]+""
197
+ return (f'<div style="background:rgba(255,255,255,.72);border-radius:20px;padding:18px 14px 16px;text-align:center;border-top:3px solid {colour};box-shadow:0 4px 16px rgba(40,9,109,.08)">'
198
+ f'<div style="font-size:26px;margin-bottom:7px">{icon}</div>'
199
+ f'<div style="color:#9d8fc4;font-size:9.5px;text-transform:uppercase;letter-spacing:1.8px;margin-bottom:7px;font-weight:800">{label}</div>'
200
+ f'<div style="color:#2d1f4e;font-size:16px;font-weight:800">{value}</div></div>')
201
+ html='<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(130px,1fr));gap:12px;margin-bottom:24px">'
202
+ for key,val in kpis.items():
203
+ html+=card(icons.get(key,"📊"),key.replace("_"," ").title(),str(val),colours.get(key,"#8fa8f8"))
204
+ return html+"</div>"
205
+
206
+ PAL=["#28096D","#2ec4a0","#e8537a","#F2C637","#5e8fef","#c45ea8"]
207
+ def _styled(**kw):
208
+ d=dict(template="plotly_white",paper_bgcolor="rgba(255,255,255,0.95)",plot_bgcolor="rgba(255,255,255,0.98)",
209
+ font=dict(family="system-ui,sans-serif",color="#2d1f4e",size=12),margin=dict(l=60,r=20,t=70,b=70),
210
+ legend=dict(orientation="h",yanchor="bottom",y=1.02,xanchor="right",x=1),
211
+ title=dict(font=dict(size=15,color="#28096D")))
212
+ d.update(kw); return d
213
+ def _empty_chart(title):
214
+ fig=go.Figure(); fig.update_layout(title=title,height=420,template="plotly_white",paper_bgcolor="rgba(255,255,255,0.95)",annotations=[dict(text="Run the pipeline to generate data",x=0.5,y=0.5,xref="paper",yref="paper",showarrow=False,font=dict(size=14,color="rgba(40,9,109,0.4)"))]); return fig
215
+ def build_gdp_trend_chart():
216
+ p=PY_TAB_DIR/"df_dashboard.csv"
217
+ if not p.exists(): return _empty_chart("GDP Trend — run pipeline first")
218
+ df=pd.read_csv(p); yc=next((c for c in df.columns if "year" in c.lower()),df.columns[0]); vc=[c for c in df.columns if c!=yc and df[c].dtype in("float64","int64")]
219
+ if not vc: return _empty_chart("No numeric columns")
220
+ fig=go.Figure()
221
+ for i,col in enumerate(vc):
222
+ fig.add_trace(go.Scatter(x=df[yc],y=df[col],name=col.replace("_"," ").title(),mode="lines+markers",line=dict(color=PAL[i%len(PAL)],width=2.5),fill="tozeroy",fillcolor="rgba(40,9,109,0.06)"))
223
+ fig.update_layout(**_styled(height=450,hovermode="x unified",title=dict(text="Average EM GDP Growth Rate (2000–2023)"))); fig.update_yaxes(ticksuffix="%"); return fig
224
+ def build_signal_chart():
225
+ p=PY_TAB_DIR/"investment_signal_summary.csv"
226
+ if not p.exists(): return _empty_chart("Investment Signal — run pipeline first")
227
+ df=pd.read_csv(p).sort_values("pct_overweight",ascending=True)
228
+ colors=["#2ec4a0" if v>=50 else "#e8537a" for v in df["pct_overweight"]]
229
+ fig=go.Figure(go.Bar(y=df["country"],x=df["pct_overweight"],orientation="h",marker_color=colors,hovertemplate="<b>%{y}</b><br>%{x:.1f}% years overweight<extra></extra>"))
230
+ fig.add_vline(x=50,line_dash="dash",line_color="gray",line_width=1)
231
+ fig.update_layout(**_styled(height=550,showlegend=False,title=dict(text="Investment Signal by Country (% Years Overweight)"))); fig.update_xaxes(title="% Years Overweight",ticksuffix="%"); return fig
232
+ def build_sentiment_chart():
233
+ p=PY_TAB_DIR/"vader_by_country.csv"
234
+ if not p.exists(): return _empty_chart("Sentiment — run pipeline first")
235
+ df=pd.read_csv(p).sort_values("avg_vader_score")
236
+ colors=["#2ec4a0" if v>=0.05 else("#e8537a" if v<=-0.05 else "#5e8fef") for v in df["avg_vader_score"]]
237
+ fig=go.Figure(go.Bar(y=df["country"],x=df["avg_vader_score"],orientation="h",marker_color=colors,hovertemplate="<b>%{y}</b><br>VADER: %{x:.3f}<extra></extra>"))
238
+ fig.add_vline(x=0,line_dash="dot",line_color="gray")
239
+ fig.update_layout(**_styled(height=550,showlegend=False,title=dict(text="Average VADER News Sentiment by Country"))); fig.update_xaxes(title="Avg VADER Compound Score"); return fig
240
+ def build_arima_chart():
241
+ hp=PY_TAB_DIR/"df_dashboard.csv"; fp=PY_TAB_DIR/"arima_gdp_forecast.csv"
242
+ if not fp.exists(): return _empty_chart("ARIMA Forecast — run pipeline first")
243
+ fc=pd.read_csv(fp); fig=go.Figure()
244
+ if hp.exists():
245
+ h=pd.read_csv(hp); yc=next((c for c in h.columns if "year" in c.lower()),h.columns[0]); vc=next((c for c in h.columns if "gdp" in c.lower() or "avg" in c.lower()),h.columns[-1])
246
+ fig.add_trace(go.Scatter(x=h[yc],y=h[vc],name="Historical",mode="lines+markers",line=dict(color="#28096D",width=2.5)))
247
+ yfc=next((c for c in fc.columns if "year" in c.lower()),fc.columns[0]); vfc=next((c for c in fc.columns if "forecast" in c.lower()),fc.columns[1])
248
+ fig.add_trace(go.Scatter(x=fc[yfc],y=fc[vfc],name="Forecast",mode="lines+markers",line=dict(color="#e8537a",width=2.5,dash="dash")))
249
+ if "lower_ci" in fc.columns and "upper_ci" in fc.columns:
250
+ fig.add_trace(go.Scatter(x=list(fc[yfc])+list(fc[yfc])[::-1],y=list(fc["upper_ci"])+list(fc["lower_ci"])[::-1],fill="toself",fillcolor="rgba(232,83,122,0.12)",line=dict(color="rgba(255,255,255,0)"),name="95% CI"))
251
+ fig.update_layout(**_styled(height=450,hovermode="x unified",title=dict(text="ARIMA GDP Growth Forecast (2024–2028)"))); fig.update_yaxes(ticksuffix="%"); return fig
252
+
253
+ def refresh_dashboard(): return render_kpi_cards(),build_gdp_trend_chart(),build_signal_chart(),build_sentiment_chart()
254
+ def refresh_gallery():
255
+ figs=[(str(p),p.stem.replace("_"," ").title()) for p in sorted(PY_FIG_DIR.glob("*.png"))]
256
+ idx=artifacts_index(); tc=idx["python"]["tables"]
257
+ df=_load_table_safe(PY_TAB_DIR/tc[0]) if tc else pd.DataFrame()
258
+ return (figs or []),gr.update(choices=tc,value=tc[0] if tc else None),df
259
+ def on_table_select(choice):
260
+ if not choice: return pd.DataFrame([{"hint":"Select a table."}])
261
+ p=PY_TAB_DIR/choice; return _load_table_safe(p) if p.exists() else pd.DataFrame([{"error":f"Not found: {choice}"}])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
  ensure_dirs()
264
+ _ai_status=("Connected to **n8n workflow**." if N8N_WEBHOOK_URL else "Powered by **Claude** (Anthropic API)." if ANTHROPIC_API_KEY else "**LLM active**." if LLM_ENABLED else "Using **keyword matching**.")
265
 
266
+ def load_css():
267
+ p=BASE_DIR/"style.css"; return p.read_text(encoding="utf-8") if p.exists() else ""
 
268
 
269
+ with gr.Blocks(title="EM Geopolitical Analytics — ESCP SE21", css=load_css()) as demo:
270
+ gr.Markdown("# 🌍 Emerging Market Geopolitical Analytics\n*How can an EM investment fund use news sentiment & macro forecasting to allocate during geopolitical stress?*\n**Team:** Amaryllis · Kuang · Logan · Tommaso · Achille — ESCP SE21",elem_id="escp_title")
 
 
 
 
 
 
 
 
 
 
271
  with gr.Tab("Pipeline Runner"):
272
+ gr.Markdown("**Step 1** fetches World Bank macro data (20 countries, 2000–2023) and generates synthetic geopolitical risk scores, news headlines, and fund flows.\n\n**Step 2** runs VADER sentiment, ARIMA GDP forecasting, and Random Forest investment signal classification.")
 
 
 
 
 
 
 
273
  with gr.Row():
274
+ btn1=gr.Button(" Step 1: Data Collection",variant="secondary"); btn2=gr.Button("▶ Step 2: Analysis",variant="secondary")
275
+ btn_all=gr.Button("⚡ Run Full Pipeline",variant="primary")
276
+ log=gr.Textbox(label="Execution Log",lines=22,max_lines=40,interactive=False)
277
+ btn1.click(run_datacreation,outputs=[log]); btn2.click(run_pythonanalysis,outputs=[log]); btn_all.click(run_full_pipeline,outputs=[log])
 
 
 
 
 
 
 
 
 
 
 
 
278
  with gr.Tab("Dashboard"):
279
+ kpi_html=gr.HTML(value=render_kpi_cards); refresh_btn=gr.Button("🔄 Refresh Dashboard",variant="primary")
280
+ gr.Markdown("#### 📈 Interactive Charts")
281
+ chart_gdp=gr.Plot(label="GDP Growth Trend"); chart_signal=gr.Plot(label="Investment Signals"); chart_sent=gr.Plot(label="News Sentiment")
282
+ gr.Markdown("#### 🔮 ARIMA Forecast"); chart_arima=gr.Plot(label="GDP Forecast 2024–2028")
283
+ gr.Markdown("#### 🖼️ Figures"); gallery=gr.Gallery(label="Figures",columns=2,height=520)
284
+ gr.Markdown("#### 📋 Tables"); tbl_dd=gr.Dropdown(label="Select table",choices=[],interactive=True); tbl_df=gr.Dataframe(label="Preview",interactive=False)
285
+ def _refresh():
286
+ kpi,c1,c2,c3=refresh_dashboard(); c4=build_arima_chart(); figs,dd,df=refresh_gallery(); return kpi,c1,c2,c3,c4,figs,dd,df
287
+ refresh_btn.click(_refresh,outputs=[kpi_html,chart_gdp,chart_signal,chart_sent,chart_arima,gallery,tbl_dd,tbl_df])
288
+ tbl_dd.change(on_table_select,inputs=[tbl_dd],outputs=[tbl_df])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  with gr.Tab('"AI" Dashboard'):
290
+ gr.Markdown(f"### 💬 Ask questions, get interactive visualisations\n\nAsk about country allocation, macro trends, sentiment, or forecasts. {_ai_status}")
291
+ with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
292
  with gr.Column(scale=1):
293
+ chatbot=gr.Chatbot(label="Conversation",height=420,type="messages")
294
+ user_input=gr.Textbox(label="Ask about your data",placeholder="e.g. Which countries to overweight? / ARIMA forecast / Turkey sentiment")
295
+ gr.Examples(examples=["Which countries should we overweight?","Show me the geopolitical risk heatmap","What does the ARIMA forecast say?","Which countries have negative news sentiment?","What features drive the investment signal?","Give me a dashboard overview"],inputs=user_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  with gr.Column(scale=1):
297
+ ai_chart=gr.Plot(label="Interactive Chart"); ai_table=gr.Dataframe(label="Data Table",interactive=False)
298
+ user_input.submit(ai_chat,inputs=[user_input,chatbot],outputs=[chatbot,user_input,ai_chart,ai_table])
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
+ demo.launch(allowed_paths=[str(BASE_DIR)])
pythonanalysis.ipynb ADDED
@@ -0,0 +1,955 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "ce179b8c",
6
+ "metadata": {},
7
+ "source": [
8
+ "# 🌍 Notebook 2 — Data Analysis & Visualization\n",
9
+ "**Project:** EM Portfolio Risk Advisor\n",
10
+ "\n",
11
+ "**Research Question:** *How can an emerging market investment fund use news sentiment analysis and macroeconomic forecasting to identify which countries to overweight or underweight during periods of geopolitical stress?*\n",
12
+ "\n",
13
+ "**Team:** Amaryllis (PM) · Kuang (UX) · Tommaso (Data Analyst) · Logan (UX) · Achille (Content)\n",
14
+ "**Course:** AI for Big Data Management — ESCP SE21\n",
15
+ "\n",
16
+ "---\n",
17
+ "**Analyses performed in this notebook:**\n",
18
+ "| # | Type | Method |\n",
19
+ "|---|------|--------|\n",
20
+ "| 1 | Qualitative | VADER sentiment scoring of synthetic analyst reports |\n",
21
+ "| 2 | Quantitative | GDP growth heatmap (2000–2023) |\n",
22
+ "| 3 | Quantitative | Geopolitical risk heatmap |\n",
23
+ "| 4 | Quantitative | FDI & Inflation trend analysis |\n",
24
+ "| 5 | Quantitative | Random Forest investment signal classifier |\n",
25
+ "| 6 | Quantitative | ARIMA GDP growth forecasting (2024–2028) |\n",
26
+ "| 7 | Mixed | Country-level investment signal + sentiment composite |"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "markdown",
31
+ "id": "fd142dab",
32
+ "metadata": {},
33
+ "source": [
34
+ "## 1. Install & Import"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": null,
40
+ "id": "47a823f2",
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "!pip install -q pandas numpy matplotlib seaborn vaderSentiment statsmodels scikit-learn plotly"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": null,
50
+ "id": "0717b85c",
51
+ "metadata": {},
52
+ "outputs": [],
53
+ "source": [
54
+ "import pandas as pd\n",
55
+ "import numpy as np\n",
56
+ "import matplotlib.pyplot as plt\n",
57
+ "import matplotlib.colors as mcolors\n",
58
+ "import seaborn as sns\n",
59
+ "import warnings, os, json\n",
60
+ "from pathlib import Path\n",
61
+ "from itertools import product\n",
62
+ "\n",
63
+ "warnings.filterwarnings('ignore')\n",
64
+ "np.random.seed(42)\n",
65
+ "\n",
66
+ "print('✅ Packages loaded')"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "markdown",
71
+ "id": "2aa18937",
72
+ "metadata": {},
73
+ "source": [
74
+ "## 2. Output Directory Setup\n",
75
+ "\n",
76
+ "All figures and tables are saved to `artifacts/` so the Hugging Face app can load them."
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": null,
82
+ "id": "89cb932e",
83
+ "metadata": {},
84
+ "outputs": [],
85
+ "source": [
86
+ "BASE_DIR = Path('.')\n",
87
+ "ART_DIR = BASE_DIR / 'artifacts'\n",
88
+ "PY_FIG = ART_DIR / 'py' / 'figures'\n",
89
+ "PY_TAB = ART_DIR / 'py' / 'tables'\n",
90
+ "\n",
91
+ "for p in [PY_FIG, PY_TAB]:\n",
92
+ " p.mkdir(parents=True, exist_ok=True)\n",
93
+ "\n",
94
+ "print('✅ Output folders:')\n",
95
+ "print(' -', PY_FIG.resolve())\n",
96
+ "print(' -', PY_TAB.resolve())"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "markdown",
101
+ "id": "59fccaec",
102
+ "metadata": {},
103
+ "source": [
104
+ "## 3. Load Datasets\n",
105
+ "\n",
106
+ "All five files were produced by Notebook 1 under the data contract."
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": null,
112
+ "id": "ea5ed4b3",
113
+ "metadata": {},
114
+ "outputs": [],
115
+ "source": [
116
+ "df_macro = pd.read_csv('world_bank_macro.csv')\n",
117
+ "df_risk = pd.read_csv('synthetic_risk_scores.csv')\n",
118
+ "df_sentiment = pd.read_csv('synthetic_news_sentiment.csv')\n",
119
+ "df_master = pd.read_csv('title_level_features.csv')\n",
120
+ "df_monthly = pd.read_csv('monthly_gdp_series.csv')\n",
121
+ "\n",
122
+ "print('world_bank_macro :', df_macro.shape)\n",
123
+ "print('synthetic_risk_scores :', df_risk.shape)\n",
124
+ "print('synthetic_news_sent. :', df_sentiment.shape)\n",
125
+ "print('title_level_features :', df_master.shape)\n",
126
+ "print('monthly_gdp_series :', df_monthly.shape)"
127
+ ]
128
+ },
129
+ {
130
+ "cell_type": "markdown",
131
+ "id": "57b70ed2",
132
+ "metadata": {},
133
+ "source": [
134
+ "## 4. Data Quality Check"
135
+ ]
136
+ },
137
+ {
138
+ "cell_type": "code",
139
+ "execution_count": null,
140
+ "id": "90a068f1",
141
+ "metadata": {},
142
+ "outputs": [],
143
+ "source": [
144
+ "def quality_check(df, name):\n",
145
+ " print(f'\\n🔍 {name}')\n",
146
+ " print(f' Shape : {df.shape}')\n",
147
+ " nulls = df.isnull().sum()\n",
148
+ " nulls = nulls[nulls > 0]\n",
149
+ " print(f' Nulls : {dict(nulls) if len(nulls) else \"none\"}')\n",
150
+ " print(f' Dtypes: {dict(df.dtypes)}')\n",
151
+ " return df\n",
152
+ "\n",
153
+ "for df, nm in [\n",
154
+ " (df_macro, 'world_bank_macro'),\n",
155
+ " (df_risk, 'synthetic_risk_scores'),\n",
156
+ " (df_sentiment, 'synthetic_news_sentiment'),\n",
157
+ " (df_master, 'title_level_features'),\n",
158
+ " (df_monthly, 'monthly_gdp_series'),\n",
159
+ "]:\n",
160
+ " quality_check(df, nm)"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "markdown",
165
+ "id": "3d974373",
166
+ "metadata": {},
167
+ "source": [
168
+ "## 5. Qualitative Analysis — VADER Sentiment Scoring\n",
169
+ "\n",
170
+ "We apply the **VADER** (Valence Aware Dictionary and sEntiment Reasoner) lexicon to every\n",
171
+ "synthetic analyst report headline. VADER returns a compound score in **[−1, +1]**:\n",
172
+ "\n",
173
+ "| Range | Label |\n",
174
+ "|-------|-------|\n",
175
+ "| ≥ 0.05 | bullish |\n",
176
+ "| ≤ −0.05 | bearish |\n",
177
+ "| otherwise | neutral |\n",
178
+ "\n",
179
+ "This mirrors how a portfolio analyst would extract sentiment from news feeds programmatically."
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": null,
185
+ "id": "a0537f27",
186
+ "metadata": {},
187
+ "outputs": [],
188
+ "source": [
189
+ "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n",
190
+ "\n",
191
+ "analyzer = SentimentIntensityAnalyzer()\n",
192
+ "\n",
193
+ "def vader_compound(text):\n",
194
+ " return analyzer.polarity_scores(str(text))['compound']\n",
195
+ "\n",
196
+ "def vader_label(score):\n",
197
+ " if score >= 0.05: return 'bullish'\n",
198
+ " if score <= -0.05: return 'bearish'\n",
199
+ " return 'neutral'\n",
200
+ "\n",
201
+ "df_sentiment['vader_compound'] = df_sentiment['report_text'].apply(vader_compound)\n",
202
+ "df_sentiment['vader_label'] = df_sentiment['vader_compound'].apply(vader_label)\n",
203
+ "\n",
204
+ "print('VADER label distribution:')\n",
205
+ "print(df_sentiment['vader_label'].value_counts())\n",
206
+ "print()\n",
207
+ "print(df_sentiment[['country','year','sentiment_label','vader_compound','vader_label']].head(10))"
208
+ ]
209
+ },
210
+ {
211
+ "cell_type": "code",
212
+ "execution_count": null,
213
+ "id": "1822af7e",
214
+ "metadata": {},
215
+ "outputs": [],
216
+ "source": [
217
+ "# Aggregate VADER scores per country\n",
218
+ "vader_by_country = (\n",
219
+ " df_sentiment\n",
220
+ " .groupby(['iso3','country'])\n",
221
+ " .agg(avg_vader_score=('vader_compound','mean'),\n",
222
+ " report_count=('vader_compound','count'))\n",
223
+ " .reset_index()\n",
224
+ " .sort_values('avg_vader_score')\n",
225
+ ")\n",
226
+ "vader_by_country.to_csv(PY_TAB / 'vader_by_country.csv', index=False)\n",
227
+ "print('✅ vader_by_country.csv saved')\n",
228
+ "print(vader_by_country)"
229
+ ]
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "execution_count": null,
234
+ "id": "902ea1c0",
235
+ "metadata": {},
236
+ "outputs": [],
237
+ "source": [
238
+ "# Aggregate VADER scores per country-year (for merging into master)\n",
239
+ "vader_agg = (\n",
240
+ " df_sentiment\n",
241
+ " .groupby(['iso3','year'])['vader_compound']\n",
242
+ " .mean()\n",
243
+ " .reset_index()\n",
244
+ " .rename(columns={'vader_compound': 'vader_score'})\n",
245
+ ")"
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "markdown",
250
+ "id": "3ae2b5ee",
251
+ "metadata": {},
252
+ "source": [
253
+ "## 6. Merge VADER Scores into Master Dataset"
254
+ ]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": null,
259
+ "id": "4bbfc012",
260
+ "metadata": {},
261
+ "outputs": [],
262
+ "source": [
263
+ "# Also merge risk scores into master (if not already present)\n",
264
+ "risk_cols = ['iso3','year','geopolitical_risk_score']\n",
265
+ "if 'geopolitical_risk_score' not in df_master.columns:\n",
266
+ " df_master = df_master.merge(\n",
267
+ " df_risk[risk_cols], on=['iso3','year'], how='left'\n",
268
+ " )\n",
269
+ "\n",
270
+ "# Merge VADER\n",
271
+ "if 'vader_score' not in df_master.columns:\n",
272
+ " df_master = df_master.merge(vader_agg, on=['iso3','year'], how='left')\n",
273
+ "\n",
274
+ "# Clean numerics\n",
275
+ "for col in ['gdp_growth','fdi_pct_gdp','inflation','geopolitical_risk_score','vader_score']:\n",
276
+ " if col in df_master.columns:\n",
277
+ " df_master[col] = pd.to_numeric(df_master[col], errors='coerce')\n",
278
+ "\n",
279
+ "print('Master dataset shape after merge:', df_master.shape)\n",
280
+ "print(df_master[['country','year','gdp_growth','geopolitical_risk_score','vader_score']].head(10))"
281
+ ]
282
+ },
283
+ {
284
+ "cell_type": "markdown",
285
+ "id": "6f506667",
286
+ "metadata": {},
287
+ "source": [
288
+ "## 7. Quantitative Analysis — GDP Growth Heatmap\n",
289
+ "\n",
290
+ "A heatmap gives fund managers an at-a-glance view of **which countries experienced growth\n",
291
+ "shocks vs. booms** over the 2000–2023 period. Red cells correspond to known crises\n",
292
+ "(Argentina 2001–02, global GFC 2009, COVID 2020)."
293
+ ]
294
+ },
295
+ {
296
+ "cell_type": "code",
297
+ "execution_count": null,
298
+ "id": "d33ff313",
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": [
302
+ "PALETTE = ['#28096D','#2ec4a0','#e8537a','#F2C637','#5e8fef',\n",
303
+ " '#c45ea8','#3dbacc','#a0522d','#6aaa3a','#d46060']\n",
304
+ "\n",
305
+ "# Build pivot\n",
306
+ "pivot_gdp = df_macro.pivot_table(index='country', columns='year', values='gdp_growth')\n",
307
+ "\n",
308
+ "fig, ax = plt.subplots(figsize=(18, 7))\n",
309
+ "sns.heatmap(\n",
310
+ " pivot_gdp,\n",
311
+ " cmap='RdYlGn',\n",
312
+ " center=0,\n",
313
+ " linewidths=0.4,\n",
314
+ " linecolor='white',\n",
315
+ " annot=False,\n",
316
+ " fmt='.1f',\n",
317
+ " ax=ax,\n",
318
+ " cbar_kws={'label': 'GDP Growth (%)', 'shrink': 0.8}\n",
319
+ ")\n",
320
+ "ax.set_title('EM GDP Growth Rate (%) — 2000 to 2023', fontsize=15,\n",
321
+ " fontweight='bold', color='#28096D', pad=16)\n",
322
+ "ax.set_xlabel('Year', fontsize=11, color='#28096D')\n",
323
+ "ax.set_ylabel('Country', fontsize=11, color='#28096D')\n",
324
+ "ax.tick_params(axis='x', labelsize=8, rotation=45)\n",
325
+ "ax.tick_params(axis='y', labelsize=10)\n",
326
+ "plt.tight_layout()\n",
327
+ "plt.savefig(PY_FIG / 'gdp_heatmap.png', dpi=150, bbox_inches='tight')\n",
328
+ "plt.show()\n",
329
+ "print('✅ gdp_heatmap.png saved')"
330
+ ]
331
+ },
332
+ {
333
+ "cell_type": "markdown",
334
+ "id": "01b2971c",
335
+ "metadata": {},
336
+ "source": [
337
+ "## 8. Quantitative Analysis — Geopolitical Risk Heatmap"
338
+ ]
339
+ },
340
+ {
341
+ "cell_type": "code",
342
+ "execution_count": null,
343
+ "id": "c9dc1711",
344
+ "metadata": {},
345
+ "outputs": [],
346
+ "source": [
347
+ "pivot_risk = df_risk.pivot_table(\n",
348
+ " index='country', columns='year', values='geopolitical_risk_score'\n",
349
+ ")\n",
350
+ "\n",
351
+ "fig, ax = plt.subplots(figsize=(18, 7))\n",
352
+ "sns.heatmap(\n",
353
+ " pivot_risk,\n",
354
+ " cmap='YlOrRd',\n",
355
+ " linewidths=0.4,\n",
356
+ " linecolor='white',\n",
357
+ " annot=False,\n",
358
+ " ax=ax,\n",
359
+ " cbar_kws={'label': 'Risk Score (0–10)', 'shrink': 0.8}\n",
360
+ ")\n",
361
+ "ax.set_title('Synthetic Geopolitical Risk Score — 2000 to 2023', fontsize=15,\n",
362
+ " fontweight='bold', color='#28096D', pad=16)\n",
363
+ "ax.set_xlabel('Year', fontsize=11, color='#28096D')\n",
364
+ "ax.set_ylabel('Country', fontsize=11, color='#28096D')\n",
365
+ "ax.tick_params(axis='x', labelsize=8, rotation=45)\n",
366
+ "ax.tick_params(axis='y', labelsize=10)\n",
367
+ "plt.tight_layout()\n",
368
+ "plt.savefig(PY_FIG / 'geo_risk_heatmap.png', dpi=150, bbox_inches='tight')\n",
369
+ "plt.show()\n",
370
+ "print('✅ geo_risk_heatmap.png saved')"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "markdown",
375
+ "id": "30066fb5",
376
+ "metadata": {},
377
+ "source": [
378
+ "## 9. Qualitative Analysis — VADER Sentiment by Country\n",
379
+ "\n",
380
+ "We compare the average VADER compound score to the ground-truth `sentiment_label` assigned during synthetic data generation to validate alignment."
381
+ ]
382
+ },
383
+ {
384
+ "cell_type": "code",
385
+ "execution_count": null,
386
+ "id": "03c36db4",
387
+ "metadata": {},
388
+ "outputs": [],
389
+ "source": [
390
+ "fig, axes = plt.subplots(1, 2, figsize=(16, 6))\n",
391
+ "\n",
392
+ "# Left: VADER compound score (bar)\n",
393
+ "colors = ['#2ec4a0' if v >= 0.05 else ('#e8537a' if v <= -0.05 else '#5e8fef')\n",
394
+ " for v in vader_by_country['avg_vader_score']]\n",
395
+ "axes[0].barh(vader_by_country['country'], vader_by_country['avg_vader_score'],\n",
396
+ " color=colors, edgecolor='white', linewidth=0.6)\n",
397
+ "axes[0].axvline(0, color='gray', linewidth=0.8, linestyle='--')\n",
398
+ "axes[0].set_title('Average VADER Compound Score by Country', fontweight='bold', color='#28096D')\n",
399
+ "axes[0].set_xlabel('Compound Score (−1 to +1)')\n",
400
+ "axes[0].tick_params(labelsize=10)\n",
401
+ "\n",
402
+ "# Right: Sentiment distribution stacked bar\n",
403
+ "sent_dist = (\n",
404
+ " df_sentiment.groupby(['country','vader_label'])\n",
405
+ " .size().unstack(fill_value=0)\n",
406
+ ")\n",
407
+ "for lbl in ['bullish','neutral','bearish']:\n",
408
+ " if lbl not in sent_dist.columns:\n",
409
+ " sent_dist[lbl] = 0\n",
410
+ "sent_dist = sent_dist.reindex(columns=['bullish','neutral','bearish'])\n",
411
+ "sent_colors = {'bullish': '#2ec4a0', 'neutral': '#5e8fef', 'bearish': '#e8537a'}\n",
412
+ "sent_dist.plot(kind='barh', stacked=True, ax=axes[1],\n",
413
+ " color=[sent_colors[c] for c in sent_dist.columns],\n",
414
+ " edgecolor='white', linewidth=0.4)\n",
415
+ "axes[1].set_title('Report Sentiment Distribution by Country', fontweight='bold', color='#28096D')\n",
416
+ "axes[1].set_xlabel('Number of Reports')\n",
417
+ "axes[1].legend(title='VADER Label', loc='lower right')\n",
418
+ "axes[1].tick_params(labelsize=10)\n",
419
+ "\n",
420
+ "plt.suptitle('Qualitative Analysis: News Sentiment Across Emerging Markets',\n",
421
+ " fontsize=13, fontweight='bold', color='#28096D', y=1.02)\n",
422
+ "plt.tight_layout()\n",
423
+ "plt.savefig(PY_FIG / 'vader_sentiment.png', dpi=150, bbox_inches='tight')\n",
424
+ "plt.show()\n",
425
+ "print('✅ vader_sentiment.png saved')"
426
+ ]
427
+ },
428
+ {
429
+ "cell_type": "markdown",
430
+ "id": "67044680",
431
+ "metadata": {},
432
+ "source": [
433
+ "## 10. Quantitative Analysis — FDI & Inflation Trends"
434
+ ]
435
+ },
436
+ {
437
+ "cell_type": "code",
438
+ "execution_count": null,
439
+ "id": "c624d67d",
440
+ "metadata": {},
441
+ "outputs": [],
442
+ "source": [
443
+ "fig, axes = plt.subplots(1, 2, figsize=(16, 6))\n",
444
+ "\n",
445
+ "# FDI line chart\n",
446
+ "for i, (iso3, cname) in enumerate(df_macro.groupby(['iso3','country']).size().index):\n",
447
+ " sub = df_macro[df_macro['iso3'] == iso3]\n",
448
+ " axes[0].plot(sub['year'], sub['fdi_pct_gdp'],\n",
449
+ " label=cname, color=PALETTE[i % len(PALETTE)],\n",
450
+ " linewidth=1.8, alpha=0.85)\n",
451
+ "axes[0].set_title('FDI Inflows (% of GDP)', fontweight='bold', color='#28096D')\n",
452
+ "axes[0].set_xlabel('Year'); axes[0].set_ylabel('% of GDP')\n",
453
+ "axes[0].legend(fontsize=7, ncol=2)\n",
454
+ "axes[0].axhline(0, color='gray', linewidth=0.6, linestyle='--')\n",
455
+ "\n",
456
+ "# Inflation scatter / boxplot by country\n",
457
+ "df_macro_clean = df_macro[df_macro['inflation'].abs() < 200]\n",
458
+ "axes[1].boxplot(\n",
459
+ " [df_macro_clean[df_macro_clean['country'] == c]['inflation'].dropna()\n",
460
+ " for c in df_macro_clean['country'].unique()],\n",
461
+ " labels=df_macro_clean['country'].unique(),\n",
462
+ " patch_artist=True,\n",
463
+ " boxprops=dict(facecolor='#a48de8', alpha=0.7),\n",
464
+ " medianprops=dict(color='#28096D', linewidth=2)\n",
465
+ ")\n",
466
+ "axes[1].set_title('Inflation Distribution by Country', fontweight='bold', color='#28096D')\n",
467
+ "axes[1].set_ylabel('Inflation (%)')\n",
468
+ "axes[1].tick_params(axis='x', rotation=45, labelsize=9)\n",
469
+ "\n",
470
+ "plt.tight_layout()\n",
471
+ "plt.savefig(PY_FIG / 'fdi_inflation.png', dpi=150, bbox_inches='tight')\n",
472
+ "plt.show()\n",
473
+ "print('✅ fdi_inflation.png saved')"
474
+ ]
475
+ },
476
+ {
477
+ "cell_type": "markdown",
478
+ "id": "69b4bbad",
479
+ "metadata": {},
480
+ "source": [
481
+ "## 11. Quantitative Analysis — Random Forest Investment Signal Classifier\n",
482
+ "\n",
483
+ "We train a **Random Forest** on macro + risk + sentiment features to learn which\n",
484
+ "country-years should be tagged *overweight*, *neutral*, or *underweight*.\n",
485
+ "\n",
486
+ "The investment signal target is defined by a rule:\n",
487
+ "- **Overweight**: GDP growth above median AND geopolitical risk below median\n",
488
+ "- **Underweight**: GDP growth in bottom tercile\n",
489
+ "- **Neutral**: all others\n",
490
+ "\n",
491
+ "This rule is used to create labelled training data. The RF then learns non-linear\n",
492
+ "feature interactions that a simple rule would miss."
493
+ ]
494
+ },
495
+ {
496
+ "cell_type": "code",
497
+ "execution_count": null,
498
+ "id": "cbec8543",
499
+ "metadata": {},
500
+ "outputs": [],
501
+ "source": [
502
+ "from sklearn.ensemble import RandomForestClassifier\n",
503
+ "from sklearn.model_selection import train_test_split\n",
504
+ "from sklearn.metrics import classification_report, accuracy_score\n",
505
+ "from sklearn.preprocessing import LabelEncoder\n",
506
+ "\n",
507
+ "FEATURES = ['gdp_growth', 'fdi_pct_gdp', 'inflation',\n",
508
+ " 'geopolitical_risk_score', 'vader_score']\n",
509
+ "\n",
510
+ "# Build target labels\n",
511
+ "df_ml = df_master.dropna(subset=[c for c in FEATURES if c in df_master.columns]).copy()\n",
512
+ "\n",
513
+ "gdp_med = df_ml['gdp_growth'].median()\n",
514
+ "risk_med = df_ml['geopolitical_risk_score'].median() if 'geopolitical_risk_score' in df_ml.columns else 5\n",
515
+ "gdp_q33 = df_ml['gdp_growth'].quantile(0.33)\n",
516
+ "\n",
517
+ "def label_signal(row):\n",
518
+ " try:\n",
519
+ " if row['gdp_growth'] > gdp_med and row['geopolitical_risk_score'] < risk_med:\n",
520
+ " return 'overweight'\n",
521
+ " if row['gdp_growth'] < gdp_q33:\n",
522
+ " return 'underweight'\n",
523
+ " return 'neutral'\n",
524
+ " except Exception:\n",
525
+ " return 'neutral'\n",
526
+ "\n",
527
+ "df_ml['investment_signal'] = df_ml.apply(label_signal, axis=1)\n",
528
+ "print('Signal distribution:')\n",
529
+ "print(df_ml['investment_signal'].value_counts())"
530
+ ]
531
+ },
532
+ {
533
+ "cell_type": "code",
534
+ "execution_count": null,
535
+ "id": "eb472edc",
536
+ "metadata": {},
537
+ "outputs": [],
538
+ "source": [
539
+ "# Filter to available features\n",
540
+ "avail_features = [f for f in FEATURES if f in df_ml.columns]\n",
541
+ "X = df_ml[avail_features].fillna(df_ml[avail_features].median())\n",
542
+ "y = df_ml['investment_signal']\n",
543
+ "\n",
544
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,\n",
545
+ " random_state=42, stratify=y)\n",
546
+ "\n",
547
+ "rf = RandomForestClassifier(n_estimators=200, max_depth=8,\n",
548
+ " class_weight='balanced', random_state=42)\n",
549
+ "rf.fit(X_train, y_train)\n",
550
+ "y_pred = rf.predict(X_test)\n",
551
+ "\n",
552
+ "print(classification_report(y_test, y_pred))\n",
553
+ "print(f'Accuracy: {accuracy_score(y_test, y_pred):.3f}')"
554
+ ]
555
+ },
556
+ {
557
+ "cell_type": "code",
558
+ "execution_count": null,
559
+ "id": "726e9575",
560
+ "metadata": {},
561
+ "outputs": [],
562
+ "source": [
563
+ "# Feature importance chart\n",
564
+ "feat_imp = pd.DataFrame({\n",
565
+ " 'feature': avail_features,\n",
566
+ " 'importance': rf.feature_importances_\n",
567
+ "}).sort_values('importance')\n",
568
+ "\n",
569
+ "fig, ax = plt.subplots(figsize=(9, 5))\n",
570
+ "colors_imp = ['#28096D' if f == feat_imp.iloc[-1]['feature'] else '#a48de8'\n",
571
+ " for f in feat_imp['feature']]\n",
572
+ "ax.barh(feat_imp['feature'], feat_imp['importance'],\n",
573
+ " color=colors_imp, edgecolor='white')\n",
574
+ "ax.set_title('Random Forest — Feature Importances', fontweight='bold',\n",
575
+ " color='#28096D', fontsize=13)\n",
576
+ "ax.set_xlabel('Importance (Gini)', fontsize=11)\n",
577
+ "ax.tick_params(labelsize=11)\n",
578
+ "for v, f in zip(feat_imp['importance'], feat_imp['feature']):\n",
579
+ " ax.text(v + 0.002, f, f'{v:.3f}', va='center', fontsize=10)\n",
580
+ "plt.tight_layout()\n",
581
+ "plt.savefig(PY_FIG / 'rf_feature_importance.png', dpi=150, bbox_inches='tight')\n",
582
+ "plt.show()\n",
583
+ "print('✅ rf_feature_importance.png saved')"
584
+ ]
585
+ },
586
+ {
587
+ "cell_type": "code",
588
+ "execution_count": null,
589
+ "id": "176186e1",
590
+ "metadata": {},
591
+ "outputs": [],
592
+ "source": [
593
+ "# Investment signal summary\n",
594
+ "df_ml['rf_prediction'] = rf.predict(X)\n",
595
+ "signal_summary = (\n",
596
+ " df_ml.groupby('country')\n",
597
+ " .apply(lambda x: (x['rf_prediction'] == 'overweight').mean() * 100)\n",
598
+ " .reset_index()\n",
599
+ ")\n",
600
+ "signal_summary.columns = ['country', 'pct_overweight']\n",
601
+ "signal_summary = signal_summary.sort_values('pct_overweight', ascending=False)\n",
602
+ "signal_summary.to_csv(PY_TAB / 'investment_signal_summary.csv', index=False)\n",
603
+ "print('✅ investment_signal_summary.csv saved')\n",
604
+ "print(signal_summary)"
605
+ ]
606
+ },
607
+ {
608
+ "cell_type": "code",
609
+ "execution_count": null,
610
+ "id": "fed3baa1",
611
+ "metadata": {},
612
+ "outputs": [],
613
+ "source": [
614
+ "# Investment signal chart\n",
615
+ "fig, ax = plt.subplots(figsize=(10, 6))\n",
616
+ "colors_sig = ['#2ec4a0' if v >= 50 else '#e8537a'\n",
617
+ " for v in signal_summary['pct_overweight']]\n",
618
+ "ax.barh(signal_summary['country'], signal_summary['pct_overweight'],\n",
619
+ " color=colors_sig, edgecolor='white', linewidth=0.5)\n",
620
+ "ax.axvline(50, color='gray', linewidth=1.2, linestyle='--', label='50% threshold')\n",
621
+ "ax.set_title('Investment Signal: % of Years Classified as Overweight',\n",
622
+ " fontweight='bold', color='#28096D', fontsize=13)\n",
623
+ "ax.set_xlabel('% Years Overweight', fontsize=11)\n",
624
+ "ax.set_xlim(0, 100)\n",
625
+ "for v, c in zip(signal_summary['pct_overweight'], signal_summary['country']):\n",
626
+ " ax.text(v + 1, c, f'{v:.0f}%', va='center', fontsize=10)\n",
627
+ "ax.legend(fontsize=10)\n",
628
+ "plt.tight_layout()\n",
629
+ "plt.savefig(PY_FIG / 'investment_signal.png', dpi=150, bbox_inches='tight')\n",
630
+ "plt.show()\n",
631
+ "print('✅ investment_signal.png saved')"
632
+ ]
633
+ },
634
+ {
635
+ "cell_type": "markdown",
636
+ "id": "a9005249",
637
+ "metadata": {},
638
+ "source": [
639
+ "## 12. Quantitative Analysis — ARIMA GDP Growth Forecasting\n",
640
+ "\n",
641
+ "We use **ARIMA** (Auto-Regressive Integrated Moving Average) to forecast the average\n",
642
+ "EM GDP growth rate for 2024–2028. ARIMA is fit on the *average* monthly series across\n",
643
+ "all 10 countries, as this gives the fund a macro-level view of EM momentum.\n",
644
+ "\n",
645
+ "The `find_best_arima` helper searches across p ∈ [0,3], d ∈ [0,1], q ∈ [0,1] and\n",
646
+ "selects the order minimising **AIC**."
647
+ ]
648
+ },
649
+ {
650
+ "cell_type": "code",
651
+ "execution_count": null,
652
+ "id": "44eff2cc",
653
+ "metadata": {},
654
+ "outputs": [],
655
+ "source": [
656
+ "from statsmodels.tsa.arima.model import ARIMA\n",
657
+ "\n",
658
+ "def find_best_arima(series, p_range=(0, 3), d_range=(0, 1), q_range=(0, 1)):\n",
659
+ " best_aic, best_order, best_model = float('inf'), None, None\n",
660
+ " for p, d, q in product(range(p_range[0], p_range[1] + 1),\n",
661
+ " range(d_range[0], d_range[1] + 1),\n",
662
+ " range(q_range[0], q_range[1] + 1)):\n",
663
+ " try:\n",
664
+ " m = ARIMA(series, order=(p, d, q)).fit()\n",
665
+ " if m.aic < best_aic:\n",
666
+ " best_aic, best_order, best_model = m.aic, (p, d, q), m\n",
667
+ " except Exception:\n",
668
+ " pass\n",
669
+ " return best_order, best_model"
670
+ ]
671
+ },
672
+ {
673
+ "cell_type": "code",
674
+ "execution_count": null,
675
+ "id": "6ace7aa0",
676
+ "metadata": {},
677
+ "outputs": [],
678
+ "source": [
679
+ "# Prepare monthly series: average across countries\n",
680
+ "df_monthly['month'] = pd.to_datetime(df_monthly['month'], errors='coerce')\n",
681
+ "monthly_avg = (\n",
682
+ " df_monthly.dropna(subset=['month'])\n",
683
+ " .groupby('month')['gdp_growth_monthly']\n",
684
+ " .mean()\n",
685
+ " .sort_index()\n",
686
+ ")\n",
687
+ "\n",
688
+ "# Fit best ARIMA\n",
689
+ "best_order, best_fit = find_best_arima(monthly_avg)\n",
690
+ "print(f'Best ARIMA order: {best_order}')\n",
691
+ "print(f'AIC: {best_fit.aic:.2f}')"
692
+ ]
693
+ },
694
+ {
695
+ "cell_type": "code",
696
+ "execution_count": null,
697
+ "id": "9ab3a37c",
698
+ "metadata": {},
699
+ "outputs": [],
700
+ "source": [
701
+ "# Forecast 60 months ahead (5 years)\n",
702
+ "n_forecast = 60\n",
703
+ "forecast_result = best_fit.get_forecast(steps=n_forecast)\n",
704
+ "forecast_mean = forecast_result.predicted_mean\n",
705
+ "forecast_ci = forecast_result.conf_int()\n",
706
+ "\n",
707
+ "forecast_index = pd.date_range(\n",
708
+ " start=monthly_avg.index[-1] + pd.DateOffset(months=1),\n",
709
+ " periods=n_forecast, freq='MS'\n",
710
+ ")\n",
711
+ "forecast_mean.index = forecast_index\n",
712
+ "forecast_ci.index = forecast_index\n",
713
+ "\n",
714
+ "# Annual roll-up for app\n",
715
+ "arima_annual = (\n",
716
+ " pd.DataFrame({'forecast': forecast_mean,\n",
717
+ " 'lower_ci': forecast_ci.iloc[:, 0],\n",
718
+ " 'upper_ci': forecast_ci.iloc[:, 1]})\n",
719
+ " .resample('YE').mean()\n",
720
+ ")\n",
721
+ "arima_annual.index = arima_annual.index.year\n",
722
+ "arima_annual.index.name = 'year'\n",
723
+ "arima_annual.reset_index().to_csv(PY_TAB / 'arima_gdp_forecast.csv', index=False)\n",
724
+ "print('✅ arima_gdp_forecast.csv saved')\n",
725
+ "print(arima_annual.round(3))"
726
+ ]
727
+ },
728
+ {
729
+ "cell_type": "code",
730
+ "execution_count": null,
731
+ "id": "3b8d4a55",
732
+ "metadata": {},
733
+ "outputs": [],
734
+ "source": [
735
+ "# Plot historical + forecast\n",
736
+ "fig, ax = plt.subplots(figsize=(14, 6))\n",
737
+ "\n",
738
+ "# Historical (annual average for readability)\n",
739
+ "hist_annual = monthly_avg.resample('YE').mean()\n",
740
+ "hist_annual.index = hist_annual.index.year\n",
741
+ "ax.plot(hist_annual.index, hist_annual.values,\n",
742
+ " color='#28096D', linewidth=2.5, marker='o', markersize=5, label='Historical avg')\n",
743
+ "\n",
744
+ "# Forecast\n",
745
+ "ax.plot(arima_annual.index, arima_annual['forecast'],\n",
746
+ " color='#e8537a', linewidth=2.5, linestyle='--', marker='s', markersize=5, label='ARIMA Forecast')\n",
747
+ "ax.fill_between(arima_annual.index, arima_annual['lower_ci'], arima_annual['upper_ci'],\n",
748
+ " color='#e8537a', alpha=0.15, label='95% CI')\n",
749
+ "\n",
750
+ "ax.axvline(2023.5, color='gray', linewidth=1, linestyle=':', label='Forecast start')\n",
751
+ "ax.axhline(0, color='black', linewidth=0.7, linestyle='-', alpha=0.4)\n",
752
+ "ax.set_title('ARIMA GDP Growth Forecast — Average EM Basket (2024–2028)',\n",
753
+ " fontweight='bold', color='#28096D', fontsize=13)\n",
754
+ "ax.set_xlabel('Year', fontsize=11); ax.set_ylabel('Avg GDP Growth (%)', fontsize=11)\n",
755
+ "ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda v, _: f'{v:.1f}%'))\n",
756
+ "ax.legend(fontsize=10)\n",
757
+ "plt.tight_layout()\n",
758
+ "plt.savefig(PY_FIG / 'arima_gdp_forecast.png', dpi=150, bbox_inches='tight')\n",
759
+ "plt.show()\n",
760
+ "print('✅ arima_gdp_forecast.png saved')"
761
+ ]
762
+ },
763
+ {
764
+ "cell_type": "markdown",
765
+ "id": "6856f8f5",
766
+ "metadata": {},
767
+ "source": [
768
+ "## 13. Country Predictions Summary Table\n",
769
+ "\n",
770
+ "Final table merging the Random Forest signal, VADER score, and latest macro values into one actionable view for fund managers."
771
+ ]
772
+ },
773
+ {
774
+ "cell_type": "code",
775
+ "execution_count": null,
776
+ "id": "07aac448",
777
+ "metadata": {},
778
+ "outputs": [],
779
+ "source": [
780
+ "# Latest year for each country\n",
781
+ "latest = df_ml.sort_values('year').groupby('country').last().reset_index()\n",
782
+ "\n",
783
+ "latest_out = latest[['country', 'iso3', 'year',\n",
784
+ " 'gdp_growth', 'fdi_pct_gdp', 'inflation',\n",
785
+ " 'geopolitical_risk_score', 'vader_score',\n",
786
+ " 'investment_signal', 'rf_prediction']].copy()\n",
787
+ "\n",
788
+ "# Readable labels\n",
789
+ "latest_out['recommendation'] = latest_out['rf_prediction'].map({\n",
790
+ " 'overweight': 'OVERWEIGHT ✅',\n",
791
+ " 'underweight': 'UNDERWEIGHT 🔻',\n",
792
+ " 'neutral': 'NEUTRAL ➡️'\n",
793
+ "})\n",
794
+ "\n",
795
+ "latest_out.to_csv(PY_TAB / 'country_predictions_latest.csv', index=False)\n",
796
+ "print('✅ country_predictions_latest.csv saved')\n",
797
+ "print(latest_out[['country','gdp_growth','geopolitical_risk_score',\n",
798
+ " 'vader_score','recommendation']].to_string(index=False))"
799
+ ]
800
+ },
801
+ {
802
+ "cell_type": "markdown",
803
+ "id": "0a01cf38",
804
+ "metadata": {},
805
+ "source": [
806
+ "## 14. Dashboard Data Exports\n",
807
+ "\n",
808
+ "The Hugging Face app (`app.py`) reads `df_dashboard.csv` and `kpis.json`."
809
+ ]
810
+ },
811
+ {
812
+ "cell_type": "code",
813
+ "execution_count": null,
814
+ "id": "1999318c",
815
+ "metadata": {},
816
+ "outputs": [],
817
+ "source": [
818
+ "# df_dashboard: annual average GDP growth (used by the GDP trend chart in the app)\n",
819
+ "df_dashboard = (\n",
820
+ " df_macro.groupby('year')['gdp_growth']\n",
821
+ " .mean()\n",
822
+ " .reset_index()\n",
823
+ " .rename(columns={'gdp_growth': 'avg_gdp_growth'})\n",
824
+ ")\n",
825
+ "df_dashboard.to_csv(PY_TAB / 'df_dashboard.csv', index=False)\n",
826
+ "print('✅ df_dashboard.csv saved')"
827
+ ]
828
+ },
829
+ {
830
+ "cell_type": "code",
831
+ "execution_count": null,
832
+ "id": "fd440bbd",
833
+ "metadata": {},
834
+ "outputs": [],
835
+ "source": [
836
+ "# KPI summary\n",
837
+ "top_ow = signal_summary.iloc[0]['country']\n",
838
+ "top_uw = signal_summary.iloc[-1]['country']\n",
839
+ "\n",
840
+ "kpis = {\n",
841
+ " 'Countries Analysed': int(df_macro['country'].nunique()),\n",
842
+ " 'Years Covered': f\"{int(df_macro['year'].min())}–{int(df_macro['year'].max())}\",\n",
843
+ " 'Avg GDP Growth': f\"{df_macro['gdp_growth'].mean():.2f}%\",\n",
844
+ " 'Avg Geo Risk': f\"{df_risk['geopolitical_risk_score'].mean():.1f}/10\",\n",
845
+ " 'Top Overweight': str(top_ow),\n",
846
+ " 'Top Underweight': str(top_uw),\n",
847
+ " 'RF Accuracy': f\"{accuracy_score(y_test, y_pred)*100:.1f}%\",\n",
848
+ " 'Headlines Analysed': int(len(df_sentiment)),\n",
849
+ "}\n",
850
+ "\n",
851
+ "with open(PY_TAB / 'kpis.json', 'w') as f:\n",
852
+ " json.dump(kpis, f, indent=2)\n",
853
+ "\n",
854
+ "print('✅ kpis.json saved')\n",
855
+ "print(json.dumps(kpis, indent=2))"
856
+ ]
857
+ },
858
+ {
859
+ "cell_type": "markdown",
860
+ "id": "00a6bb3e",
861
+ "metadata": {},
862
+ "source": [
863
+ "## 15. Final Summary — 4-Panel Overview Chart"
864
+ ]
865
+ },
866
+ {
867
+ "cell_type": "code",
868
+ "execution_count": null,
869
+ "id": "59a6cc07",
870
+ "metadata": {},
871
+ "outputs": [],
872
+ "source": [
873
+ "fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n",
874
+ "fig.suptitle('EM Portfolio Risk Advisor — Analysis Overview',\n",
875
+ " fontsize=16, fontweight='bold', color='#28096D')\n",
876
+ "\n",
877
+ "# Panel A: Average GDP growth by country\n",
878
+ "avg_gdp = df_macro.groupby('country')['gdp_growth'].mean().sort_values()\n",
879
+ "clrs = ['#e8537a' if v < 0 else '#2ec4a0' for v in avg_gdp]\n",
880
+ "axes[0,0].barh(avg_gdp.index, avg_gdp.values, color=clrs, edgecolor='white')\n",
881
+ "axes[0,0].axvline(0, color='gray', linewidth=0.8, linestyle='--')\n",
882
+ "axes[0,0].set_title('A. Avg GDP Growth 2000–2023', fontweight='bold', color='#28096D')\n",
883
+ "axes[0,0].set_xlabel('% per year')\n",
884
+ "\n",
885
+ "# Panel B: Average geo risk by country\n",
886
+ "avg_risk = df_risk.groupby('country')['geopolitical_risk_score'].mean().sort_values()\n",
887
+ "axes[0,1].barh(avg_risk.index, avg_risk.values,\n",
888
+ " color='#F2C637', edgecolor='white')\n",
889
+ "axes[0,1].set_title('B. Avg Geopolitical Risk Score', fontweight='bold', color='#28096D')\n",
890
+ "axes[0,1].set_xlabel('Score (0–10)')\n",
891
+ "\n",
892
+ "# Panel C: VADER compound score\n",
893
+ "axes[1,0].barh(vader_by_country['country'], vader_by_country['avg_vader_score'],\n",
894
+ " color=['#2ec4a0' if v>=0.05 else('#e8537a' if v<=-0.05 else '#5e8fef')\n",
895
+ " for v in vader_by_country['avg_vader_score']],\n",
896
+ " edgecolor='white')\n",
897
+ "axes[1,0].axvline(0, color='gray', linewidth=0.8, linestyle='--')\n",
898
+ "axes[1,0].set_title('C. Avg VADER Sentiment Score', fontweight='bold', color='#28096D')\n",
899
+ "axes[1,0].set_xlabel('Compound Score')\n",
900
+ "\n",
901
+ "# Panel D: % years overweight (RF signal)\n",
902
+ "sig_sorted = signal_summary.sort_values('pct_overweight')\n",
903
+ "axes[1,1].barh(sig_sorted['country'], sig_sorted['pct_overweight'],\n",
904
+ " color=['#2ec4a0' if v>=50 else '#e8537a' for v in sig_sorted['pct_overweight']],\n",
905
+ " edgecolor='white')\n",
906
+ "axes[1,1].axvline(50, color='gray', linewidth=1, linestyle='--')\n",
907
+ "axes[1,1].set_title('D. RF Investment Signal (% yrs Overweight)', fontweight='bold', color='#28096D')\n",
908
+ "axes[1,1].set_xlabel('% Years')\n",
909
+ "axes[1,1].set_xlim(0, 100)\n",
910
+ "\n",
911
+ "plt.tight_layout()\n",
912
+ "plt.savefig(PY_FIG / 'analysis_overview.png', dpi=150, bbox_inches='tight')\n",
913
+ "plt.show()\n",
914
+ "print('✅ analysis_overview.png saved')"
915
+ ]
916
+ },
917
+ {
918
+ "cell_type": "code",
919
+ "execution_count": null,
920
+ "id": "0a4810ef",
921
+ "metadata": {},
922
+ "outputs": [],
923
+ "source": [
924
+ "print()\n",
925
+ "print('=' * 55)\n",
926
+ "print(' NOTEBOOK 2 COMPLETE — EM PORTFOLIO RISK ADVISOR')\n",
927
+ "print('=' * 55)\n",
928
+ "\n",
929
+ "import os\n",
930
+ "figures = list(PY_FIG.glob('*.png'))\n",
931
+ "tables = list(PY_TAB.glob('*.csv')) + list(PY_TAB.glob('*.json'))\n",
932
+ "\n",
933
+ "print(f' Figures saved : {len(figures)}')\n",
934
+ "for f in sorted(figures): print(f' • {f.name}')\n",
935
+ "print(f' Tables saved : {len(tables)}')\n",
936
+ "for t in sorted(tables): print(f' • {t.name}')\n",
937
+ "print()\n",
938
+ "print(' Handoff to Hugging Face Space app.py ✅')"
939
+ ]
940
+ }
941
+ ],
942
+ "metadata": {
943
+ "kernelspec": {
944
+ "display_name": "Python 3",
945
+ "language": "python",
946
+ "name": "python3"
947
+ },
948
+ "language_info": {
949
+ "name": "python",
950
+ "version": "3.10.0"
951
+ }
952
+ },
953
+ "nbformat": 4,
954
+ "nbformat_minor": 5
955
+ }
requirements.txt CHANGED
@@ -1,17 +1,12 @@
1
- gradio==6.0.0
2
- pandas>=2.0.0
3
- numpy>=1.24.0
4
- matplotlib>=3.7.0
5
- seaborn>=0.13.0
6
- statsmodels>=0.14.0
7
- scikit-learn>=1.3.0
8
- papermill>=2.5.0
9
- nbformat>=5.9.0
10
- pillow>=10.0.0
11
- requests>=2.31.0
12
- beautifulsoup4>=4.12.0
13
- vaderSentiment>=3.3.2
14
- huggingface_hub>=0.20.0
15
- textblob>=0.18.0
16
- faker>=20.0.0
17
- plotly>=5.18.0
 
1
+ gradio==5.25.0
2
+ papermill
3
+ pandas
4
+ numpy
5
+ matplotlib
6
+ seaborn
7
+ vaderSentiment
8
+ statsmodels
9
+ scikit-learn
10
+ plotly
11
+ wbgapi
12
+ ipykernel
 
 
 
 
 
style.css CHANGED
@@ -1,326 +1,276 @@
1
- /* --- Target the Gradio app wrapper for backgrounds --- */
2
- gradio-app,
3
- .gradio-app,
4
- .main,
5
- #app,
6
- [data-testid="app"] {
7
- background-color: rgb(40,9,109) !important;
8
- background-image:
9
- url('https://huggingface.co/spaces/atascioglu/SE21AppTemplate/resolve/main/background_top.png'),
10
- url('https://huggingface.co/spaces/atascioglu/SE21AppTemplate/resolve/main/background_mid.png'),
11
- url('https://huggingface.co/spaces/atascioglu/SE21AppTemplate/resolve/main/background_bottom.png') !important;
12
- background-position:
13
- top center,
14
- 0 913px,
15
- bottom center !important;
16
- background-repeat:
17
- no-repeat,
18
- repeat-y,
19
- no-repeat !important;
20
- background-size:
21
- 100% auto,
22
- 100% auto,
23
- 100% auto !important;
24
- min-height: 100vh !important;
25
- }
26
-
27
- /* --- Fallback on html/body --- */
28
- html, body {
29
- background-color: rgb(40,9,109) !important;
30
- margin: 0 !important;
31
- padding: 0 !important;
32
- min-height: 100vh !important;
33
- }
34
-
35
- /* Bottom image is now part of the main background layers (positioned at bottom center) */
36
-
37
- /* --- Main container --- */
38
  .gradio-container {
39
- max-width: 1400px !important;
40
- width: 94vw !important;
41
- margin: 0 auto !important;
42
- padding-top: 220px !important;
43
- padding-bottom: 150px !important;
44
- background: transparent !important;
45
  }
46
 
47
- /* --- Title in ESCP gold --- */
48
- #escp_title h1 {
49
- color: rgb(242,198,55) !important;
50
- font-size: 3rem !important;
51
- font-weight: 800 !important;
52
- text-align: center !important;
53
- margin: 0 0 12px 0 !important;
54
- }
55
-
56
- /* --- Subtitle --- */
57
- #escp_title p, #escp_title em {
58
- color: rgba(255,255,255,0.85) !important;
59
- text-align: center !important;
60
- }
61
-
62
- /* --- Tab bar background --- */
63
- .tabs > .tab-nav,
64
- .tab-nav,
65
- div[role="tablist"],
66
- .svelte-tabs > .tab-nav {
67
- background: rgba(40,9,109,0.6) !important;
68
- border-radius: 10px 10px 0 0 !important;
69
- padding: 4px !important;
70
  }
71
 
72
- /* --- ALL tab buttons: force white text --- */
73
- .tabs > .tab-nav button,
74
- .tab-nav button,
75
- div[role="tablist"] button,
76
- button[role="tab"],
77
- .svelte-tabs button,
78
- .tab-nav > button,
79
- .tabs button {
80
- color: #ffffff !important;
81
- font-weight: 600 !important;
82
- border: none !important;
83
- background: transparent !important;
84
- padding: 10px 20px !important;
85
- border-radius: 8px 8px 0 0 !important;
86
- opacity: 1 !important;
87
- }
88
-
89
- /* --- Selected tab: ESCP gold --- */
90
- .tabs > .tab-nav button.selected,
91
- .tab-nav button.selected,
92
- button[role="tab"][aria-selected="true"],
93
- button[role="tab"].selected,
94
- div[role="tablist"] button[aria-selected="true"],
95
- .svelte-tabs button.selected {
96
- color: rgb(242,198,55) !important;
97
- background: rgba(255,255,255,0.12) !important;
98
  }
99
 
100
- /* --- Unselected tabs: ensure visibility --- */
101
- .tabs > .tab-nav button:not(.selected),
102
- .tab-nav button:not(.selected),
103
- button[role="tab"][aria-selected="false"],
104
- button[role="tab"]:not(.selected),
105
- div[role="tablist"] button:not([aria-selected="true"]) {
106
  color: #ffffff !important;
107
- opacity: 1 !important;
 
 
108
  }
109
 
110
- /* --- White card panels --- */
111
- .gradio-container .gr-block,
112
- .gradio-container .gr-box,
113
- .gradio-container .gr-panel,
114
- .gradio-container .gr-group {
115
- background: #ffffff !important;
116
- border-radius: 10px !important;
117
  }
118
 
119
- /* --- Tab content area --- */
120
- .tabitem {
121
- background: rgba(255,255,255,0.95) !important;
122
- border-radius: 0 0 10px 10px !important;
123
- padding: 16px !important;
124
  }
125
 
126
- /* --- Inputs --- */
127
- .gradio-container input,
128
- .gradio-container textarea,
129
- .gradio-container select {
130
- background: #ffffff !important;
131
- border: 1px solid #d1d5db !important;
132
- border-radius: 8px !important;
133
  }
134
 
135
- /* --- Buttons: ESCP purple primary --- */
136
- .gradio-container button:not([role="tab"]) {
137
- font-weight: 600 !important;
138
- padding: 10px 16px !important;
139
- border-radius: 10px !important;
 
 
140
  }
141
 
142
- button.primary {
143
- background-color: rgb(40,9,109) !important;
144
  color: #ffffff !important;
145
- border: none !important;
146
- }
147
-
148
- button.primary:hover {
149
- background-color: rgb(60,20,140) !important;
150
  }
151
 
152
- button.secondary {
153
- background-color: #ffffff !important;
154
- color: rgb(40,9,109) !important;
155
- border: 2px solid rgb(40,9,109) !important;
156
- }
157
-
158
- button.secondary:hover {
159
- background-color: rgb(240,238,250) !important;
160
  }
161
 
162
- /* --- Dataframes --- */
163
- [data-testid="dataframe"] {
164
- background-color: #ffffff !important;
165
- border-radius: 10px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  }
167
 
168
- table {
169
- font-size: 0.85rem !important;
 
 
170
  }
171
 
172
- /* --- Chatbot (AI Dashboard tab) --- */
173
- .gr-chatbot {
174
- min-height: 380px !important;
175
- background-color: #ffffff !important;
 
176
  border-radius: 12px !important;
 
 
 
177
  }
178
 
179
- .gr-chatbot .message.user {
180
- background-color: rgb(232,225,250) !important;
181
- border-radius: 12px !important;
182
  }
183
 
184
- .gr-chatbot .message.bot {
185
- background-color: #f3f4f6 !important;
 
 
 
 
186
  border-radius: 12px !important;
 
 
187
  }
188
 
189
- /* --- Gallery --- */
190
- .gallery {
191
- background: #ffffff !important;
 
 
 
192
  border-radius: 10px !important;
 
193
  }
194
 
195
- /* --- Log textbox --- */
196
- textarea {
197
- font-family: monospace !important;
198
- font-size: 0.8rem !important;
199
  }
200
 
201
- /* --- Markdown headings inside tabs --- */
202
- .tabitem h3 {
203
- color: rgb(40,9,109) !important;
204
- font-weight: 700 !important;
 
 
 
205
  }
206
 
207
- .tabitem h4 {
208
- color: #374151 !important;
209
  }
210
 
211
- /* --- Examples row (AI Dashboard) --- */
212
- .examples-row button {
213
- background: rgb(240,238,250) !important;
214
- color: rgb(40,9,109) !important;
215
- border: 1px solid rgb(40,9,109) !important;
216
- border-radius: 8px !important;
217
- font-size: 0.85rem !important;
218
  }
219
 
220
- .examples-row button:hover {
221
- background: rgb(232,225,250) !important;
 
 
 
 
 
 
 
 
222
  }
223
 
224
- /* --- Header / footer: transparent over banner --- */
225
- header, header *,
226
- footer, footer * {
227
- background: transparent !important;
228
- box-shadow: none !important;
229
- }
230
 
231
- footer a, footer button,
232
- header a, header button {
233
- background: transparent !important;
234
- border: none !important;
235
- box-shadow: none !important;
236
  }
237
 
238
- #footer, #footer *,
239
- [class*="footer"], [class*="footer"] *,
240
- [class*="chip"], [class*="pill"], [class*="chip"] *, [class*="pill"] * {
241
- background: transparent !important;
242
- border: none !important;
243
- box-shadow: none !important;
244
  }
245
 
246
- [data-testid*="api"], [data-testid*="settings"],
247
- [id*="api"], [id*="settings"],
248
- [class*="api"], [class*="settings"],
249
- [class*="bottom"], [class*="toolbar"], [class*="controls"] {
250
- background: transparent !important;
251
- box-shadow: none !important;
252
  }
253
 
254
- [data-testid*="api"] *, [data-testid*="settings"] *,
255
- [id*="api"] *, [id*="settings"] *,
256
- [class*="api"] *, [class*="settings"] * {
257
- background: transparent !important;
258
- box-shadow: none !important;
259
  }
260
 
261
- section footer {
262
- background: transparent !important;
 
 
 
 
263
  }
264
 
265
- section footer button,
266
- section footer a {
267
- background: transparent !important;
268
- background-color: transparent !important;
269
- border: none !important;
270
- box-shadow: none !important;
271
- color: white !important;
272
  }
273
 
274
- section footer button:hover,
275
- section footer button:focus,
276
- section footer a:hover,
277
- section footer a:focus {
278
- background: transparent !important;
279
- background-color: transparent !important;
280
- box-shadow: none !important;
281
- }
282
 
283
- section footer button,
284
- section footer button * {
285
- background: transparent !important;
286
- background-color: transparent !important;
287
- background-image: none !important;
288
- box-shadow: none !important;
289
- filter: none !important;
290
  }
291
 
292
- section footer button::before,
293
- section footer button::after {
294
- background: transparent !important;
295
- background-color: transparent !important;
296
- background-image: none !important;
297
- box-shadow: none !important;
298
- filter: none !important;
299
  }
300
 
301
- section footer a,
302
- section footer a * {
303
- background: transparent !important;
304
- background-color: transparent !important;
305
- box-shadow: none !important;
 
 
 
306
  }
307
 
308
- .gradio-container footer button,
309
- .gradio-container footer button *,
310
- .gradio-container .footer button,
311
- .gradio-container .footer button * {
312
- background: transparent !important;
313
- background-color: transparent !important;
314
- background-image: none !important;
315
- box-shadow: none !important;
316
  }
317
 
318
- .gradio-container footer button::before,
319
- .gradio-container footer button::after,
320
- .gradio-container .footer button::before,
321
- .gradio-container .footer button::after {
322
- background: transparent !important;
323
- background-color: transparent !important;
324
- background-image: none !important;
325
- box-shadow: none !important;
326
- }
 
1
+ /* =========================================================
2
+ EM Portfolio Risk Advisor — Hugging Face Space Stylesheet
3
+ ESCP SE21 · Amaryllis · Kuang · Tommaso · Logan · Achille
4
+ ========================================================= */
5
+
6
+ /* ── Root Variables ────────────────────────────────────── */
7
+ :root {
8
+ --escp-deep: #28096D; /* ESCP signature purple */
9
+ --escp-mid: #4a2a9e;
10
+ --escp-light: #a48de8;
11
+ --escp-pale: #ede8fc;
12
+ --teal: #2ec4a0;
13
+ --coral: #e8537a;
14
+ --gold: #F2C637;
15
+ --blue: #5e8fef;
16
+ --bg: #f0ecfa;
17
+ --card-bg: rgba(255,255,255,0.80);
18
+ --radius: 18px;
19
+ --shadow: 0 6px 28px rgba(40,9,109,0.12);
20
+ }
21
+
22
+ /* ── Page Background ───────────────────────────────────── */
23
+ body,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  .gradio-container {
25
+ background: linear-gradient(140deg, #1a0550 0%, #28096D 40%, #3d1a8a 70%, #1a0550 100%) !important;
26
+ min-height: 100vh;
27
+ font-family: "Inter", "Segoe UI", system-ui, -apple-system, sans-serif;
 
 
 
28
  }
29
 
30
+ /* ── Main Wrapper ──────────────────────────────────────── */
31
+ .main.svelte-1kyws56,
32
+ .wrap.svelte-1kyws56 {
33
+ background: transparent !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
 
36
+ /* ── Title / Header ────────────────────────────────────── */
37
+ #escp_title {
38
+ background: rgba(255,255,255,0.08);
39
+ border: 1px solid rgba(255,255,255,0.18);
40
+ border-radius: var(--radius);
41
+ padding: 28px 32px 22px;
42
+ margin-bottom: 20px;
43
+ backdrop-filter: blur(10px);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  }
45
 
46
+ #escp_title h1 {
 
 
 
 
 
47
  color: #ffffff !important;
48
+ font-size: 1.8rem;
49
+ font-weight: 800;
50
+ margin-bottom: 6px;
51
  }
52
 
53
+ #escp_title p,
54
+ #escp_title em {
55
+ color: rgba(255,255,255,0.80) !important;
56
+ font-size: 0.95rem;
 
 
 
57
  }
58
 
59
+ #escp_title strong {
60
+ color: var(--teal) !important;
 
 
 
61
  }
62
 
63
+ /* ── Tabs ──────────────────────────────────────────────── */
64
+ .tabs > .tab-nav {
65
+ background: rgba(255,255,255,0.07) !important;
66
+ border-radius: 14px 14px 0 0 !important;
67
+ border-bottom: 1px solid rgba(255,255,255,0.14) !important;
68
+ padding: 6px 8px 0 !important;
 
69
  }
70
 
71
+ .tabs > .tab-nav button {
72
+ color: rgba(255,255,255,0.65) !important;
73
+ font-weight: 600;
74
+ font-size: 0.88rem;
75
+ padding: 9px 18px;
76
+ border-radius: 10px 10px 0 0 !important;
77
+ transition: all 0.2s ease;
78
  }
79
 
80
+ .tabs > .tab-nav button.selected {
81
+ background: rgba(255,255,255,0.18) !important;
82
  color: #ffffff !important;
83
+ border-bottom: 2px solid var(--teal) !important;
 
 
 
 
84
  }
85
 
86
+ .tabs > .tab-nav button:hover:not(.selected) {
87
+ background: rgba(255,255,255,0.10) !important;
88
+ color: #ffffff !important;
 
 
 
 
 
89
  }
90
 
91
+ /* ── Tab Content Panels ────────────────────────────────── */
92
+ .tabitem {
93
+ background: rgba(255,255,255,0.06) !important;
94
+ border-radius: 0 0 var(--radius) var(--radius) !important;
95
+ border: 1px solid rgba(255,255,255,0.12) !important;
96
+ border-top: none !important;
97
+ padding: 24px !important;
98
+ }
99
+
100
+ /* ── Buttons ───────────────────────────────────────────── */
101
+ button.primary,
102
+ .gr-button-primary {
103
+ background: linear-gradient(135deg, var(--teal) 0%, #1da889 100%) !important;
104
+ color: #fff !important;
105
+ border: none !important;
106
+ border-radius: 12px !important;
107
+ font-weight: 700 !important;
108
+ padding: 11px 24px !important;
109
+ font-size: 0.92rem !important;
110
+ box-shadow: 0 4px 16px rgba(46,196,160,0.35) !important;
111
+ transition: transform 0.15s, box-shadow 0.15s !important;
112
  }
113
 
114
+ button.primary:hover,
115
+ .gr-button-primary:hover {
116
+ transform: translateY(-2px) !important;
117
+ box-shadow: 0 8px 24px rgba(46,196,160,0.45) !important;
118
  }
119
 
120
+ button.secondary,
121
+ .gr-button-secondary {
122
+ background: rgba(255,255,255,0.12) !important;
123
+ color: #fff !important;
124
+ border: 1px solid rgba(255,255,255,0.25) !important;
125
  border-radius: 12px !important;
126
+ font-weight: 600 !important;
127
+ padding: 11px 22px !important;
128
+ transition: background 0.15s !important;
129
  }
130
 
131
+ button.secondary:hover,
132
+ .gr-button-secondary:hover {
133
+ background: rgba(255,255,255,0.22) !important;
134
  }
135
 
136
+ /* ── Text Areas / Logs ─────────────────────────────────── */
137
+ textarea,
138
+ .scroll-hide {
139
+ background: rgba(10,3,30,0.55) !important;
140
+ color: #d4c8f8 !important;
141
+ border: 1px solid rgba(255,255,255,0.14) !important;
142
  border-radius: 12px !important;
143
+ font-family: "JetBrains Mono", "Fira Code", monospace !important;
144
+ font-size: 0.83rem !important;
145
  }
146
 
147
+ /* ── Textbox Inputs ────────────────────────────────────── */
148
+ input[type="text"],
149
+ .gr-text-input {
150
+ background: rgba(255,255,255,0.10) !important;
151
+ color: #fff !important;
152
+ border: 1px solid rgba(255,255,255,0.20) !important;
153
  border-radius: 10px !important;
154
+ padding: 10px 14px !important;
155
  }
156
 
157
+ input[type="text"]::placeholder {
158
+ color: rgba(255,255,255,0.40) !important;
 
 
159
  }
160
 
161
+ /* ── Labels & Markdown Text ────────────────────────────── */
162
+ label, .label-wrap span {
163
+ color: rgba(255,255,255,0.75) !important;
164
+ font-size: 0.82rem !important;
165
+ font-weight: 600 !important;
166
+ text-transform: uppercase;
167
+ letter-spacing: 0.06em;
168
  }
169
 
170
+ .prose p, .prose li, .markdown-body p, .markdown-body li {
171
+ color: rgba(255,255,255,0.85) !important;
172
  }
173
 
174
+ .prose h3, .prose h4, .markdown-body h3, .markdown-body h4 {
175
+ color: var(--teal) !important;
 
 
 
 
 
176
  }
177
 
178
+ /* ── KPI Cards ─────────────────────────────────────────── */
179
+ .kpi-card {
180
+ background: var(--card-bg);
181
+ border-radius: 18px;
182
+ padding: 18px 14px 16px;
183
+ text-align: center;
184
+ border-top: 3px solid var(--teal);
185
+ box-shadow: var(--shadow);
186
+ backdrop-filter: blur(8px);
187
+ transition: transform 0.2s;
188
  }
189
 
190
+ .kpi-card:hover { transform: translateY(-3px); }
 
 
 
 
 
191
 
192
+ /* ── Chatbot ───────────────────────────────────────────── */
193
+ .chatbot .message.user {
194
+ background: linear-gradient(135deg, var(--escp-mid), var(--escp-deep)) !important;
195
+ color: #fff !important;
196
+ border-radius: 16px 16px 4px 16px !important;
197
  }
198
 
199
+ .chatbot .message.bot {
200
+ background: rgba(255,255,255,0.88) !important;
201
+ color: var(--escp-deep) !important;
202
+ border-radius: 4px 16px 16px 16px !important;
203
+ box-shadow: 0 2px 12px rgba(40,9,109,0.10) !important;
 
204
  }
205
 
206
+ /* ── Plots / Gallery ───────────────────────────────────── */
207
+ .gr-plot, .plot-container {
208
+ background: rgba(255,255,255,0.95) !important;
209
+ border-radius: var(--radius) !important;
210
+ box-shadow: var(--shadow) !important;
211
+ padding: 4px !important;
212
  }
213
 
214
+ .gallery-item {
215
+ border-radius: 12px !important;
216
+ overflow: hidden !important;
217
+ box-shadow: var(--shadow) !important;
 
218
  }
219
 
220
+ /* ── Dataframe ─────────────────────────────────────────── */
221
+ .gr-dataframe, table {
222
+ background: rgba(255,255,255,0.93) !important;
223
+ border-radius: var(--radius) !important;
224
+ overflow: hidden !important;
225
+ box-shadow: var(--shadow) !important;
226
  }
227
 
228
+ th {
229
+ background: var(--escp-deep) !important;
230
+ color: #fff !important;
231
+ font-weight: 700 !important;
232
+ text-transform: uppercase;
233
+ font-size: 0.78rem;
234
+ letter-spacing: 0.05em;
235
  }
236
 
237
+ tr:nth-child(even) { background: var(--escp-pale) !important; }
238
+ tr:hover { background: rgba(164,141,232,0.18) !important; }
 
 
 
 
 
 
239
 
240
+ /* ── Dropdown ────────────────���─────────────────────────── */
241
+ .gr-dropdown select {
242
+ background: rgba(255,255,255,0.12) !important;
243
+ color: #fff !important;
244
+ border: 1px solid rgba(255,255,255,0.22) !important;
245
+ border-radius: 10px !important;
 
246
  }
247
 
248
+ /* ── Scrollbars ────────────────────────────────────────── */
249
+ ::-webkit-scrollbar { width: 6px; height: 6px; }
250
+ ::-webkit-scrollbar-track { background: rgba(255,255,255,0.05); }
251
+ ::-webkit-scrollbar-thumb {
252
+ background: var(--escp-light);
253
+ border-radius: 3px;
 
254
  }
255
 
256
+ /* ── Examples (chat suggestions) ──────────────────────── */
257
+ .examples-list button {
258
+ background: rgba(255,255,255,0.08) !important;
259
+ color: rgba(255,255,255,0.80) !important;
260
+ border: 1px solid rgba(255,255,255,0.16) !important;
261
+ border-radius: 8px !important;
262
+ font-size: 0.82rem !important;
263
+ transition: background 0.15s !important;
264
  }
265
 
266
+ .examples-list button:hover {
267
+ background: rgba(255,255,255,0.18) !important;
268
+ color: #fff !important;
 
 
 
 
 
269
  }
270
 
271
+ /* ── Responsive ────────────────────────────────────────── */
272
+ @media (max-width: 768px) {
273
+ #escp_title { padding: 18px 16px; }
274
+ #escp_title h1 { font-size: 1.3rem; }
275
+ .tabitem { padding: 14px !important; }
276
+ }