# ╔══════════════════════════════════════════════════════════════════════╗ # ║ BubbleBusters — AI Bubble Sentiment Analytics ║ # ║ RX12 Group Project · ESCP Europe ║ # ║ app.py — Three-notebook pipeline + live dashboard ║ # ╚══════════════════════════════════════════════════════════════════════╝ import os import re import json import time import traceback import sys import subprocess from pathlib import Path from typing import Dict, Any, List, Tuple, Optional import pandas as pd import gradio as gr import papermill as pm import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots # ── Optional dependencies ─────────────────────────────────────────────── try: import yfinance as yf YFINANCE_AVAILABLE = True except ImportError: YFINANCE_AVAILABLE = False try: from huggingface_hub import InferenceClient except Exception: InferenceClient = None # ══════════════════════════════════════════════════════════════════════ # CONFIG # ══════════════════════════════════════════════════════════════════════ BASE_DIR = Path(__file__).resolve().parent NB1 = os.environ.get("NB1", "datacreation_bubblebusters.ipynb").strip() NB2 = os.environ.get("NB2", "pythonanalysis_bubblebusters.ipynb").strip() NB3 = os.environ.get("NB3", "ranalysis_bubblebusters.ipynb").strip() RUNS_DIR = BASE_DIR / "runs" ART_DIR = BASE_DIR / "artifacts" PY_FIG_DIR = ART_DIR / "py" / "figures" PY_TAB_DIR = ART_DIR / "py" / "tables" R_FIG_DIR = ART_DIR / "r" / "figures" R_TAB_DIR = ART_DIR / "r" / "tables" PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800")) MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50")) HF_API_KEY = os.environ.get("HF_API_KEY", "").strip() MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip() HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip() # Colour palette ESCP_PURPLE = "#00d2be" BULLISH = "#2ec4a0" # deep mint-teal NEUTRAL = "#5e8fef" # medium periwinkle-blue BEARISH = "#e8537a" # deep blush-rose AMBER = "#e8a230" # rich amber # LLM client LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None llm_client = ( InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY) if LLM_ENABLED else None ) # AI-related tickers shown in the prices section AI_TICKERS_DEFAULT = "NVDA, MSFT, GOOGL, META, AMD" AI_PRESET_MEGA = "NVDA, MSFT, GOOGL, META, AMZN, AAPL" AI_PRESET_SEMI = "NVDA, AMD, TSM, INTC, QCOM, SMCI" AI_PRESET_PURE = "AI, PLTR, SOUN, PATH, BBAI, GFAI" # ══════════════════════════════════════════════════════════════════════ # KERNEL SETUP (for papermill) # ══════════════════════════════════════════════════════════════════════ def ensure_python_kernelspec() -> str: from jupyter_client.kernelspec import KernelSpecManager ksm = KernelSpecManager() specs = ksm.find_kernel_specs() if not specs: try: import ipykernel # noqa: F401 except Exception as e: raise RuntimeError( "ipykernel is not installed. " "Add 'ipykernel' to requirements.txt and rebuild the Space.\n" f"Original error: {e}" ) subprocess.check_call([ sys.executable, "-m", "ipykernel", "install", "--user", "--name", "python3", "--display-name", "Python 3 (Space)" ]) specs = ksm.find_kernel_specs() if "python3" in specs: return "python3" for name in specs: if "python" in name.lower(): return name raise RuntimeError(f"No Python kernel found. Available: {list(specs.keys())}") try: PY_KERNEL = ensure_python_kernelspec() KERNEL_INIT_ERROR = "" except Exception as e: PY_KERNEL = None KERNEL_INIT_ERROR = str(e) # ══════════════════════════════════════════════════════════════════════ # HELPERS # ══════════════════════════════════════════════════════════════════════ def ensure_dirs(): for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR, R_FIG_DIR, R_TAB_DIR]: p.mkdir(parents=True, exist_ok=True) def stamp(): return time.strftime("%Y%m%d-%H%M%S") def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]: if not dir_path.is_dir(): return [] return sorted(p.name for p in dir_path.iterdir() if p.is_file() and p.suffix.lower() in exts) def _read_csv(path: Path) -> pd.DataFrame: return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS) def _read_json(path: Path): with path.open(encoding="utf-8") as f: return json.load(f) def artifacts_index() -> Dict[str, Any]: return { "python": { "figures": _ls(PY_FIG_DIR, (".png", ".jpg", ".jpeg")), "tables": _ls(PY_TAB_DIR, (".csv", ".json")), }, "r": { "figures": _ls(R_FIG_DIR, (".png", ".jpg", ".jpeg")), "tables": _ls(R_TAB_DIR, (".csv", ".json")), }, } # ══════════════════════════════════════════════════════════════════════ # PIPELINE STATUS # ══════════════════════════════════════════════════════════════════════ def get_pipeline_status() -> Dict[str, Any]: clean_csv = BASE_DIR / "ai_bubble_clean.csv" monthly_csv = BASE_DIR / "ai_bubble_monthly.csv" data_ok = clean_csv.exists() and monthly_csv.exists() py_figs = _ls(PY_FIG_DIR, (".png",)) py_tabs = _ls(PY_TAB_DIR, (".csv", ".json")) py_ok = len(py_figs) >= 5 and len(py_tabs) >= 4 r_figs = _ls(R_FIG_DIR, (".png",)) r_tabs = _ls(R_TAB_DIR, (".csv", ".json")) r_ok = len(r_figs) >= 3 and len(r_tabs) >= 2 return { "data": { "ok": data_ok, "detail": ( f"ai_bubble_clean.csv: {'✅' if clean_csv.exists() else '❌'} | " f"ai_bubble_monthly.csv: {'✅' if monthly_csv.exists() else '❌'}" ), }, "python": { "ok": py_ok, "detail": f"{len(py_figs)} figures · {len(py_tabs)} tables", }, "r": { "ok": r_ok, "detail": f"{len(r_figs)} figures · {len(r_tabs)} tables", }, } def render_status_html() -> str: status = get_pipeline_status() def badge(ok: bool, label: str, detail: str, icon: str) -> str: colour = "#3dcba8" if ok else "#ff6b8a" bg = "rgba(61,203,168,.10)" if ok else "rgba(255,107,138,.08)" border = "rgba(61,203,168,.30)" if ok else "rgba(255,107,138,.25)" pill = "READY" if ok else "PENDING" return f"""
{icon}
{label} {pill}
{detail}
""" html = """
🔍 Pipeline Status
""" html += badge(status["data"]["ok"], "Data Creation", status["data"]["detail"], "📦") html += badge(status["python"]["ok"], "Python Analysis", status["python"]["detail"], "🐍") html += badge(status["r"]["ok"], "R Analysis", status["r"]["detail"], "📊") html += "
" return html # ══════════════════════════════════════════════════════════════════════ # PIPELINE RUNNERS # ══════════════════════════════════════════════════════════════════════ def run_notebook(nb_name: str, kernel_name: str) -> str: ensure_dirs() nb_in = BASE_DIR / nb_name if not nb_in.exists(): return f"ERROR: {nb_name} not found at {nb_in}" nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}" pm.execute_notebook( input_path=str(nb_in), output_path=str(nb_out), cwd=str(BASE_DIR), log_output=True, progress_bar=False, request_save_on_cell_execute=True, execution_timeout=PAPERMILL_TIMEOUT, kernel_name=kernel_name, ) return f"✅ Executed {nb_name}" def run_datacreation() -> str: try: if not PY_KERNEL: return f"❌ Kernel unavailable:\n{KERNEL_INIT_ERROR}" return run_notebook(NB1, kernel_name=PY_KERNEL) except Exception as e: return f"❌ FAILED: {e}\n\n{traceback.format_exc()[-2000:]}" def run_pythonanalysis() -> str: try: if not PY_KERNEL: return f"❌ Kernel unavailable:\n{KERNEL_INIT_ERROR}" return run_notebook(NB2, kernel_name=PY_KERNEL) except Exception as e: return f"❌ FAILED: {e}\n\n{traceback.format_exc()[-2000:]}" def run_r() -> str: """Run the R analysis notebook via papermill + IRkernel.""" try: # Check IRkernel is registered from jupyter_client.kernelspec import KernelSpecManager specs = KernelSpecManager().find_kernel_specs() if "ir" not in specs: return ( "❌ IRkernel not found in this environment.\n\n" "If you are running locally, install it with:\n" " Rscript -e \"install.packages('IRkernel')\"\n" " Rscript -e \"IRkernel::installspec()\"\n\n" "On the Hugging Face Space (Docker), this is pre-installed — " "if you see this message, try rebuilding the Space." ) return run_notebook(NB3, kernel_name="ir") except Exception as e: return f"❌ FAILED: {e}\n\n{traceback.format_exc()[-2000:]}" def run_full_pipeline() -> str: logs = [] for label, fn in [ ("📦 STEP 1/3 — Data Creation", run_datacreation), ("🐍 STEP 2a/3 — Python Analysis", run_pythonanalysis), ("📊 STEP 2b/3 — R Analysis", run_r), ]: logs.append(f"\n{'─'*52}\n{label}\n{'─'*52}") logs.append(fn()) return "\n".join(logs) # ══════════════════════════════════════════════════════════════════════ # ASSET PRICES # ══════════════════════════════════════════════════════════════════════ def fetch_asset_prices( tickers_str: str, period: str = "6mo", ) -> Tuple[go.Figure, str]: """Fetch prices via yfinance and return normalised Plotly chart + summary.""" def _empty(msg: str) -> Tuple[go.Figure, str]: fig = go.Figure() fig.update_layout( title=msg, template="plotly_white", paper_bgcolor="rgba(247,244,255,0.85)", plot_bgcolor="rgba(255,255,255,0.95)", height=420, ) return fig, msg if not YFINANCE_AVAILABLE: return _empty("⚠️ yfinance not installed — add it to requirements.txt") tickers = [t.strip().upper() for t in tickers_str.split(",") if t.strip()] if not tickers: return _empty("Please enter at least one ticker symbol.") try: raw = yf.download(tickers, period=period, auto_adjust=True, progress=False) if raw.empty: return _empty("No price data returned. Check ticker symbols.") # Flatten: single ticker → single column if len(tickers) == 1: close = raw[["Close"]].rename(columns={"Close": tickers[0]}) else: close = raw["Close"] # Normalise to base 100 norm = close / close.iloc[0] * 100 palette = [ "#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230", "#5e8fef", "#c45ea8", "#3dbacc", "#a0522d", "#6aaa3a", "#d46060", "#4a7fc1", "#8e6abf", ] fig = go.Figure() for i, col in enumerate(norm.columns): fig.add_trace(go.Scatter( x=norm.index, y=norm[col].round(2), name=str(col), mode="lines", line=dict(color=palette[i % len(palette)], width=2), hovertemplate=( f"{col}
%{{x|%d %b %Y}}
" "Index: %{y:.1f}" ), )) fig.add_hline( y=100, line_dash="dot", line_color="rgba(124,92,191,0.4)", annotation_text="Base (100)", annotation_position="bottom right", ) fig.update_layout( title=dict( text="AI-Related Asset Prices — Normalised (base = 100 at start of period)", font=dict(size=15, color="#4b2d8a", family="Syne, sans-serif"), ), template="plotly_white", paper_bgcolor="rgba(247,244,255,0.85)", plot_bgcolor="rgba(255,255,255,0.95)", font=dict(color="#2d1f4e", family="Lato, sans-serif"), height=460, margin=dict(l=60, r=20, t=70, b=70), legend=dict( orientation="h", yanchor="bottom", y=-0.22, xanchor="center", x=0.5, bgcolor="rgba(255,255,255,0.92)", bordercolor="rgba(124,92,191,0.35)", borderwidth=1, ), hovermode="x unified", ) fig.update_xaxes(gridcolor="rgba(124,92,191,0.18)", showgrid=True) fig.update_yaxes(gridcolor="rgba(124,92,191,0.18)", showgrid=True, title="Index (base 100)") # Summary markdown latest = close.iloc[-1] first = close.iloc[0] rows = [] for t in close.columns: try: chg = ((float(latest[t]) - float(first[t])) / float(first[t])) * 100 sign = "+" if chg >= 0 else "" col = BULLISH if chg >= 0 else BEARISH rows.append( f"| **{t}** | ${float(latest[t]):.2f} " f"| {sign}{chg:.1f}% |" ) except Exception: pass summary = ( "| Ticker | Latest Price | Period Return |\n" "|--------|:------------:|:-------------:|\n" + "\n".join(rows) ) if rows else "*(no data)*" return fig, summary except Exception as e: return _empty(f"Error fetching prices: {e}") # ══════════════════════════════════════════════════════════════════════ # SENTIMENT CHARTS (interactive Plotly) # ══════════════════════════════════════════════════════════════════════ def _dark_layout(**kwargs) -> dict: defaults = dict( template="plotly_white", paper_bgcolor="rgba(247,244,255,0.85)", plot_bgcolor="rgba(255,255,255,0.95)", font=dict(family="Lato, sans-serif", color="#2d1f4e", size=12), margin=dict(l=60, r=20, t=70, b=70), legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, bgcolor="rgba(255,255,255,0.92)", bordercolor="rgba(124,92,191,0.35)", borderwidth=1, ), title=dict(font=dict(family="Syne, sans-serif", size=15, color="#4b2d8a")), ) defaults.update(kwargs) return defaults def _grid_axes(fig: go.Figure, **kwargs): fig.update_xaxes(gridcolor="rgba(124,92,191,0.18)", showgrid=True, **kwargs) fig.update_yaxes(gridcolor="rgba(124,92,191,0.18)", showgrid=True) return fig def _empty_chart(title: str) -> go.Figure: fig = go.Figure() fig.update_layout( title=title, template="plotly_white", paper_bgcolor="rgba(247,244,255,0.85)", plot_bgcolor="rgba(255,255,255,0.95)", font=dict(family="Lato, sans-serif", color="#2d1f4e"), height=420, annotations=[dict( text="Run the pipeline to generate data", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=14, color="rgba(124,92,191,0.5)", family="Syne, sans-serif"), )], ) return fig # ── KPI Cards ────────────────────────────────────────────────────────── def load_kpis() -> Dict[str, Any]: for candidate in [PY_TAB_DIR / "kpis.json"]: if candidate.exists(): try: return _read_json(candidate) except Exception: pass return {} def render_kpi_cards() -> str: kpis = load_kpis() if not kpis: return ( '
' '
🫧
' '
No data yet
' '
' 'Run the Python analysis pipeline to populate these cards.
' '
' ) def card(icon, label, value, colour): return f"""
{icon}
{label}
{value}
""" cards = [ ("💬", "Comments", f"{kpis.get('total_comments','—'):,}", "#a48de8"), ("📅", "Date Range", kpis.get("date_range","—"), "#7aa6f8"), ("🌐", "Platforms", str(kpis.get("n_platforms","—")), "#6ee7c7"), ("🏷️", "Topics", str(kpis.get("n_topics","—")), "#3dcba8"), ("🐂", "Bullish", f"{kpis.get('pct_bullish','—')}%", "#3dcba8"), ("🐻", "Bearish", f"{kpis.get('pct_bearish','—')}%", "#ff6b8a"), ("⚠️", "Bubble Risk", f"{kpis.get('latest_bubble_risk','—')}", "#ffb347"), ("🔬", "Chi² p-value", f"{kpis.get('chi2_p_value','—')}", "#8fa8f8"), ] html = ( '
' ) for icon, label, value, colour in cards: html += card(icon, label, value, colour) html += "
" return html # ── Overview chart (monthly sentiment over time) ──────────────────── def build_overview_chart() -> go.Figure: path = PY_TAB_DIR / "monthly_sentiment.csv" if not path.exists(): return _empty_chart("Sentiment Over Time — data not yet available") df = pd.read_csv(path) df["month"] = pd.to_datetime(df["month"]) fig = make_subplots( rows=2, cols=1, shared_xaxes=True, subplot_titles=( "Monthly Comment Volume by Sentiment", "3-Month Rolling Average Sentiment Score", ), vertical_spacing=0.14, row_heights=[0.62, 0.38], ) for sentiment, colour in [("bullish", BULLISH), ("neutral", NEUTRAL), ("bearish", BEARISH)]: if sentiment not in df.columns: continue r, g, b = int(colour[1:3], 16), int(colour[3:5], 16), int(colour[5:7], 16) fig.add_trace(go.Scatter( x=df["month"], y=df[sentiment], name=sentiment.title(), mode="lines", stackgroup="one", line=dict(color=colour, width=0.5), fillcolor=f"rgba({r},{g},{b},0.7)", hovertemplate=f"{sentiment.title()}: %{{y}}", ), row=1, col=1) if "avg_score" in df.columns: rolling = df["avg_score"].rolling(3, min_periods=1).mean() fig.add_trace(go.Scatter( x=df["month"], y=rolling.round(3), name="3-mo avg score", mode="lines", line=dict(color="#7c5cbf", width=2.5), hovertemplate="Score: %{y:.2f}", ), row=2, col=1) fig.add_hline( y=0, line_dash="dot", line_color="rgba(124,92,191,0.35)", row=2, col=1, ) fig.update_layout(**_dark_layout(height=580, hovermode="x unified")) _grid_axes(fig) return fig # ── Topic breakdown chart ────────────────────────────────────────── def build_topic_chart() -> go.Figure: path = PY_TAB_DIR / "sentiment_by_topic.csv" if not path.exists(): return _empty_chart("Sentiment by Topic — data not yet available") df = pd.read_csv(path) if "Topic" not in df.columns: return _empty_chart("Unexpected CSV format for sentiment_by_topic.csv") cols = [c for c in ["bullish", "neutral", "bearish"] if c in df.columns] totals = df[cols].sum(axis=1).replace(0, 1) fig = go.Figure() for sentiment, colour in [("bullish", BULLISH), ("neutral", NEUTRAL), ("bearish", BEARISH)]: if sentiment not in df.columns: continue pct = (df[sentiment] / totals * 100).round(1) fig.add_trace(go.Bar( name=sentiment.title(), x=df["Topic"], y=pct, marker_color=colour, hovertemplate=f"{sentiment.title()}
%{{x}}: %{{y:.1f}}%", )) fig.update_layout( **_dark_layout( barmode="stack", title="Sentiment Distribution by Topic (%)", height=420, yaxis_title="% of Comments", ) ) _grid_axes(fig) return fig # ── Platform breakdown chart ─────────────────────────────────────── def build_platform_chart() -> go.Figure: path = PY_TAB_DIR / "sentiment_by_platform.csv" if not path.exists(): return _empty_chart("Sentiment by Platform — data not yet available") df = pd.read_csv(path) if "Platform" not in df.columns: return _empty_chart("Unexpected CSV format for sentiment_by_platform.csv") cols = [c for c in ["bullish", "neutral", "bearish"] if c in df.columns] totals = df[cols].sum(axis=1).replace(0, 1) fig = go.Figure() for sentiment, colour in [("bullish", BULLISH), ("neutral", NEUTRAL), ("bearish", BEARISH)]: if sentiment not in df.columns: continue pct = (df[sentiment] / totals * 100).round(1) fig.add_trace(go.Bar( name=sentiment.title(), x=df["Platform"], y=pct, marker_color=colour, hovertemplate=f"{sentiment.title()}
%{{x}}: %{{y:.1f}}%", )) fig.update_layout( **_dark_layout( barmode="stack", title="Sentiment Distribution by Platform (%)", height=420, yaxis_title="% of Comments", ) ) _grid_axes(fig) return fig # ── Bubble risk chart ────────────────────────────────────────────── def build_bubble_risk_chart() -> go.Figure: path = PY_TAB_DIR / "bubble_risk_score.csv" if not path.exists(): return _empty_chart("Bubble Risk Score — data not yet available") df = pd.read_csv(path) if "month" not in df.columns or "bubble_risk_score" not in df.columns: return _empty_chart("Unexpected CSV format for bubble_risk_score.csv") df["month"] = pd.to_datetime(df["month"]) score = df["bubble_risk_score"] fig = go.Figure() # Shaded area: bullish zone (score < 0.5) fig.add_trace(go.Scatter( x=df["month"], y=score.clip(upper=0.5), mode="none", fill="tozeroy", fillcolor=f"rgba({int(BULLISH[1:3],16)},{int(BULLISH[3:5],16)},{int(BULLISH[5:7],16)},0.15)", name="Bullish zone", showlegend=False, hoverinfo="skip", )) # Shaded area: bearish zone (score > 0.5) base = pd.Series([0.5] * len(df), index=df.index) fig.add_trace(go.Scatter( x=df["month"], y=score.clip(lower=0.5), mode="none", fill="tonexty", fillcolor=f"rgba({int(BEARISH[1:3],16)},{int(BEARISH[3:5],16)},{int(BEARISH[5:7],16)},0.15)", name="Bearish zone", showlegend=False, hoverinfo="skip", )) # Main line fig.add_trace(go.Scatter( x=df["month"], y=score.round(3), name="Bubble Risk Score", mode="lines+markers", line=dict(color="#7c5cbf", width=2.5), marker=dict(size=5), hovertemplate="Risk: %{y:.3f}", )) fig.add_hline( y=0.5, line_dash="dot", line_color="rgba(124,92,191,0.5)", annotation_text="Neutral threshold", annotation_position="top right", annotation_font_color="#7c5cbf", ) fig.update_layout( **_dark_layout( title="AI Bubble Risk Score (0 = all bullish · 1 = all bearish)", height=420, hovermode="x unified", yaxis=dict(range=[0, 1], title="Risk Score"), ) ) _grid_axes(fig) return fig # ── Yearly chart ─────────────────────────────────────────────────── def build_yearly_chart() -> go.Figure: path = PY_TAB_DIR / "yearly_sentiment.csv" if not path.exists(): return _empty_chart("Yearly Sentiment — data not yet available") df = pd.read_csv(path) year_col = [c for c in ["Year", "year", "Year_num"] if c in df.columns] if not year_col: return _empty_chart("No year column found") year_col = year_col[0] cols = [c for c in ["bullish", "neutral", "bearish"] if c in df.columns] totals = df[cols].sum(axis=1).replace(0, 1) fig = go.Figure() for sentiment, colour in [("bullish", BULLISH), ("neutral", NEUTRAL), ("bearish", BEARISH)]: if sentiment not in df.columns: continue pct = (df[sentiment] / totals * 100).round(1) fig.add_trace(go.Bar( name=sentiment.title(), x=df[year_col].astype(str), y=pct, marker_color=colour, hovertemplate=f"{sentiment.title()}
%{{x}}: %{{y:.1f}}%", )) fig.update_layout( **_dark_layout( barmode="stack", title="Sentiment Share by Year (%)", height=400, yaxis_title="% of Comments", ) ) _grid_axes(fig) return fig # ── Static R figures ─────────────────────────────────────────────── def _r_fig(name: str) -> Optional[str]: p = R_FIG_DIR / name return str(p) if p.exists() else None # ── Full sentiment refresh ───────────────────────────────────────── def refresh_sentiment(): return ( render_kpi_cards(), build_overview_chart(), build_topic_chart(), build_platform_chart(), build_bubble_risk_chart(), build_yearly_chart(), _r_fig("r01_monthly_sentiment_trend.png"), _r_fig("r02_rolling_sentiment_score.png"), _r_fig("r03_chi_square_residuals.png"), _r_fig("r04_regression_coefficients.png"), _r_fig("r05_yearly_grouped_bars.png"), ) # ══════════════════════════════════════════════════════════════════════ # AI CHAT # ══════════════════════════════════════════════════════════════════════ DASHBOARD_SYSTEM = """You are a sharp, concise analytics assistant for **BubbleBusters** — an AI Bubble Sentiment Analytics dashboard built for ESCP Europe (RX12). The dataset contains online comments about whether AI is a "bubble", scraped from platforms like HackerNews, Twitter/X, and Reddit. Each comment is classified as: - **bullish** (AI is real / valuable / here to stay) - **neutral** (balanced / uncertain) - **bearish** (AI is overhyped / a bubble / will crash) Topics covered: hype, investment, productivity, skepticism. AVAILABLE ARTIFACTS: {artifacts_json} KEY METRICS: {kpis_json} INSTRUCTIONS: 1. Answer in 2-4 concise sentences. 2. At the END of every response, output exactly one JSON block specifying what chart to show: ```json {{"show": "chart", "chart_type": "overview"}} ``` chart_type must be one of: "overview", "topic", "platform", "risk", "yearly", "none" ROUTING RULES: - Trends over time / monthly / rolling → "overview" - Topics / hype / investment / skepticism / productivity → "topic" - Platforms / HackerNews / Twitter / Reddit → "platform" - Bubble risk / danger / fear score → "risk" - Year-over-year / annual → "yearly" - General / unclear → "none" """ _JSON_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL) def _parse_directive(text: str) -> Dict[str, str]: m = _JSON_RE.search(text) if m: try: return json.loads(m.group(1)) except Exception: pass return {"show": "none"} def _clean(text: str) -> str: return _JSON_RE.sub("", text).strip() def _keyword_chat(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]: has_data = any( idx[s]["figures"] or idx[s]["tables"] for s in ("python", "r") ) if not has_data: return ( "No analysis data found yet. Please run the pipeline first (⚙️ Pipeline tab).", {"show": "none"}, ) ml = msg.lower() kpi_line = "" if kpis: total = kpis.get("total_comments", 0) kpi_line = ( f" The dataset contains **{total:,}** comments" f" spanning {kpis.get('date_range', 'various dates')}." ) if any(w in ml for w in ["risk", "bubble risk", "danger", "score"]): return ( f"Here's the AI Bubble Risk Score over time.{kpi_line}", {"show": "chart", "chart_type": "risk"}, ) if any(w in ml for w in ["year", "annual", "over year"]): return ( f"Here's the year-over-year sentiment breakdown.{kpi_line}", {"show": "chart", "chart_type": "yearly"}, ) if any(w in ml for w in ["topic", "hype", "investment", "productivity", "skepticism"]): mb = kpis.get("most_bearish_topic", "") mbu = kpis.get("most_bullish_topic", "") extra = f" The most bearish topic is **{mb}** and the most bullish is **{mbu}**." if mb else "" return ( f"Here's sentiment broken down by topic.{extra}{kpi_line}", {"show": "chart", "chart_type": "topic"}, ) if any(w in ml for w in ["platform", "hackernews", "twitter", "reddit", "source"]): dom = kpis.get("dominant_platform", "") extra = f" The dominant platform is **{dom}**." if dom else "" return ( f"Here's sentiment broken down by platform.{extra}{kpi_line}", {"show": "chart", "chart_type": "platform"}, ) if any(w in ml for w in ["trend", "time", "monthly", "over time", "evolution", "sentiment"]): risk = kpis.get("latest_bubble_risk", "") extra = f" The latest 3-month bubble risk score is **{risk}**." if risk else "" return ( f"Here are sentiment trends over time.{extra}{kpi_line}", {"show": "chart", "chart_type": "overview"}, ) bearish = kpis.get("pct_bearish", "?") bullish = kpis.get("pct_bullish", "?") neutral = kpis.get("pct_neutral", "?") return ( f"Overall: **{bullish}%** bullish · **{neutral}%** neutral · **{bearish}%** bearish.{kpi_line}\n\n" "Try: *'Show sentiment trends'*, *'Which topics are most bearish?'*, " "*'Compare platforms'*, *'What's the bubble risk?'*", {"show": "none"}, ) def _directive_to_chart(directive: Dict) -> Optional[go.Figure]: ct = directive.get("chart_type", "none") if directive.get("show") != "chart" or ct == "none": return None return { "overview": build_overview_chart, "topic": build_topic_chart, "platform": build_platform_chart, "risk": build_bubble_risk_chart, "yearly": build_yearly_chart, }.get(ct, lambda: None)() def ai_chat(user_msg: str, history: list): if not user_msg or not user_msg.strip(): return history, "", None idx = artifacts_index() kpis = load_kpis() if not LLM_ENABLED: reply, directive = _keyword_chat(user_msg, idx, kpis) else: system = DASHBOARD_SYSTEM.format( artifacts_json=json.dumps(idx, indent=2), kpis_json=json.dumps(kpis, indent=2) if kpis else "(no KPIs — run pipeline first)", ) msgs = [{"role": "system", "content": system}] for entry in (history or [])[-6:]: if isinstance(entry, dict) and "role" in entry: msgs.append(entry) msgs.append({"role": "user", "content": user_msg}) try: r = llm_client.chat_completion( model=MODEL_NAME, messages=msgs, temperature=0.3, max_tokens=600, stream=False, ) raw = ( r["choices"][0]["message"]["content"] if isinstance(r, dict) else r.choices[0].message.content ) directive = _parse_directive(raw) reply = _clean(raw) except Exception as e: fallback_reply, directive = _keyword_chat(user_msg, idx, kpis) reply = f"*(LLM error: {e})*\n\n{fallback_reply}" chart_out = _directive_to_chart(directive) new_history = list(history or []) + [ {"role": "user", "content": user_msg}, {"role": "assistant", "content": reply}, ] return new_history, "", chart_out # ══════════════════════════════════════════════════════════════════════ # CSS — Bloomberg-terminal-inspired dark theme # ══════════════════════════════════════════════════════════════════════ CSS = (BASE_DIR / "style.css").read_text(encoding="utf-8") # ══════════════════════════════════════════════════════════════════════ # GRADIO APP # ══════════════════════════════════════════════════════════════════════ ensure_dirs() with gr.Blocks(title="BubbleBusters — AI Bubble Analytics") as demo: # ── Master header ────────────────────────────────────────────── gr.HTML("""
🫧

BubbleBusters

AI Bubble Sentiment Analytics  ·  RX12  ·  ESCP Europe

🎓 Group Project
🐍 Python  ·  📊 R  ·  🤖 AI
""") with gr.Tabs(): # ══════════════════════════════════════════════════════════ # TAB 1 — PIPELINE # ══════════════════════════════════════════════════════════ with gr.Tab("⚙️ Pipeline"): with gr.Row(equal_height=False): # Status column with gr.Column(scale=1, min_width=280): gr.HTML('
System Status
') status_html = gr.HTML(value=render_status_html) refresh_status_btn = gr.Button( "🔄 Refresh", elem_classes=["btn-secondary"] ) # Runner column with gr.Column(scale=2): gr.HTML('
Run Pipeline
') if PY_KERNEL: gr.HTML( f'
' f'✦ Notebook kernel ready  ·  ' f'' f'{PY_KERNEL}
' ) else: gr.HTML( f'
' f'✖ Kernel unavailable — add ' f'' f'ipykernel to requirements.txt
' f'' f'{KERNEL_INIT_ERROR[:180]}' f'
' ) with gr.Row(): btn_nb1 = gr.Button("📦 Step 1: Data", elem_classes=["btn-secondary"]) btn_nb2 = gr.Button("🐍 Step 2a: Python", elem_classes=["btn-secondary"]) btn_r = gr.Button("📊 Step 2b: R", elem_classes=["btn-secondary"]) btn_all = gr.Button("🚀 Run Full Pipeline", elem_classes=["btn-primary"]) run_log = gr.Textbox( label="Execution Log", lines=18, max_lines=18, interactive=False, elem_id="pipeline-log", autoscroll=True, ) refresh_status_btn.click(fn=render_status_html, outputs=status_html) btn_nb1.click(fn=run_datacreation, outputs=run_log) btn_nb2.click(fn=run_pythonanalysis, outputs=run_log) btn_r.click(fn=run_r, outputs=run_log) btn_all.click(fn=run_full_pipeline, outputs=run_log) # ══════════════════════════════════════════════════════════ # TAB 2 — ASSET PRICES # ══════════════════════════════════════════════════════════ with gr.Tab("📈 Asset Prices"): gr.HTML( '
' 'Track AI-related stocks in real time. Select tickers and a period ' 'to compare normalised performance (base = 100).
' ) with gr.Row(): with gr.Column(scale=3): ticker_box = gr.Textbox( label="Tickers (comma-separated)", value=AI_TICKERS_DEFAULT, placeholder="e.g. NVDA, MSFT, GOOGL, META", ) with gr.Column(scale=1): period_radio = gr.Radio( choices=["1mo", "3mo", "6mo", "1y", "2y", "5y"], value="6mo", label="Period", ) with gr.Column(scale=1): fetch_btn = gr.Button("📡 Fetch Prices", elem_classes=["btn-primary"]) gr.HTML('
Quick presets:
') with gr.Row(): preset_mega = gr.Button("🏦 Mega-Cap AI", elem_classes=["btn-secondary"]) preset_semi = gr.Button("🔧 Semiconductors", elem_classes=["btn-secondary"]) preset_pure = gr.Button("🤖 Pure-Play AI", elem_classes=["btn-secondary"]) with gr.Row(equal_height=False): with gr.Column(scale=3): price_chart = gr.Plot(label="", container=False) with gr.Column(scale=1, min_width=220): price_summary = gr.Markdown() fetch_btn.click( fn=lambda t, p: fetch_asset_prices(t, p), inputs=[ticker_box, period_radio], outputs=[price_chart, price_summary], ) preset_mega.click( fn=lambda p: fetch_asset_prices(AI_PRESET_MEGA, p), inputs=period_radio, outputs=[price_chart, price_summary], ) preset_semi.click( fn=lambda p: fetch_asset_prices(AI_PRESET_SEMI, p), inputs=period_radio, outputs=[price_chart, price_summary], ) preset_pure.click( fn=lambda p: fetch_asset_prices(AI_PRESET_PURE, p), inputs=period_radio, outputs=[price_chart, price_summary], ) # ══════════════════════════════════════════════════════════ # TAB 3 — SENTIMENT ANALYSIS # ══════════════════════════════════════════════════════════ with gr.Tab("🎭 Sentiment Analysis"): gr.HTML( '
' 'Interactive charts and R figures from the full analysis pipeline. ' 'Run the pipeline first if charts are empty.
' ) with gr.Row(): refresh_sent_btn = gr.Button("🔄 Refresh All Charts", elem_classes=["btn-secondary"]) # KPI cards kpi_html_comp = gr.HTML(value=render_kpi_cards) # Main interactive charts overview_chart_comp = gr.Plot(label="Sentiment Over Time", container=False) with gr.Row(): with gr.Column(): topic_chart_comp = gr.Plot(label="By Topic", container=False) with gr.Column(): platform_chart_comp = gr.Plot(label="By Platform", container=False) with gr.Row(): with gr.Column(): risk_chart_comp = gr.Plot(label="Bubble Risk Score", container=False) with gr.Column(): yearly_chart_comp = gr.Plot(label="Year-over-Year", container=False) # R figures (static) inside accordion with gr.Accordion("📊 R Analysis Figures (static)", open=False): gr.HTML( '
' 'Generated by ranalysis_bubblebusters.ipynb (run locally).' '
' ) with gr.Row(): r1 = gr.Image(label="R01 · Monthly Trend", show_label=True) r2 = gr.Image(label="R02 · Rolling Score", show_label=True) with gr.Row(): r3 = gr.Image(label="R03 · Chi-Square Residuals", show_label=True) r4 = gr.Image(label="R04 · Regression Coefficients", show_label=True) with gr.Row(): r5 = gr.Image(label="R05 · Yearly Grouped Bars", show_label=True) SENT_OUTPUTS = [ kpi_html_comp, overview_chart_comp, topic_chart_comp, platform_chart_comp, risk_chart_comp, yearly_chart_comp, r1, r2, r3, r4, r5, ] refresh_sent_btn.click(fn=refresh_sentiment, outputs=SENT_OUTPUTS) # ══════════════════════════════════════════════════════════ # TAB 4 — AI CHAT # ══════════════════════════════════════════════════════════ with gr.Tab("🤖 AI Chat"): llm_badge = ( f'' f'✦ LLM active — {MODEL_NAME}' if LLM_ENABLED else f'' f'◈ Keyword mode — set HF_API_KEY secret for full AI support' ) gr.HTML( f'
' f'Ask questions about the AI bubble data. {llm_badge}
' ) with gr.Row(equal_height=True): with gr.Column(scale=1): chatbot = gr.Chatbot( label="Conversation", height=430 ) user_msg = gr.Textbox( label="Ask about the data", placeholder=( "e.g. Show me sentiment trends · " "Which topics are most bearish? · " "What's the current bubble risk?" ), lines=1, ) gr.Examples( examples=[ "Show me sentiment trends over time", "Which topics are most bearish about AI?", "Compare sentiment across platforms", "What is the latest bubble risk score?", "Is sentiment getting more bullish or bearish recently?", "Give me an overview of the dataset", ], inputs=user_msg, ) with gr.Column(scale=1): chat_chart = gr.Plot(label="Data Visualisation", container=False) user_msg.submit( fn=ai_chat, inputs=[user_msg, chatbot], outputs=[chatbot, user_msg, chat_chart], ) # On page load, populate sentiment charts if data is available demo.load(fn=refresh_sentiment, outputs=SENT_OUTPUTS) demo.launch( allowed_paths=[str(BASE_DIR)], css=CSS, theme=gr.themes.Base( primary_hue=gr.themes.colors.teal, neutral_hue=gr.themes.colors.slate, ), )