Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import json | |
| import subprocess | |
| import sys | |
| import traceback | |
| from pathlib import Path | |
| from datetime import datetime | |
| # ββ output folders (same structure Notebook 2 writes to) | |
| ART_DIR = Path("artifacts") | |
| FIG_DIR = ART_DIR / "figures" | |
| TAB_DIR = ART_DIR / "tables" | |
| for p in [FIG_DIR, TAB_DIR]: | |
| p.mkdir(parents=True, exist_ok=True) | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| # PIPELINE RUNNER | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_notebook(path: str) -> str: | |
| try: | |
| result = subprocess.run( | |
| [sys.executable, "-m", "jupyter", "nbconvert", | |
| "--to", "notebook", "--execute", | |
| "--ExecutePreprocessor.timeout=600", | |
| "--inplace", path], | |
| capture_output=True, text=True | |
| ) | |
| if result.returncode != 0: | |
| return f"β Error running {path}:\n{result.stderr[-2000:]}" | |
| return f"β {path} completed successfully." | |
| except Exception as e: | |
| return f"β Exception: {traceback.format_exc()}" | |
| def run_data_creation(): | |
| log = "βΆ Running Notebook 1 β Data Collection & Creation...\n" | |
| log += run_notebook("datacreation.ipynb") | |
| return log | |
| def run_analysis(): | |
| log = "βΆ Running Notebook 2 β Data Analysis & Modelling...\n" | |
| log += run_notebook("pythonanalysis.ipynb") | |
| return log | |
| def run_full_pipeline(): | |
| log = "βΆ Running full pipeline...\n\n" | |
| log += "Step 1 β Data Collection & Creation\n" | |
| log += run_notebook("datacreation.ipynb") + "\n\n" | |
| log += "Step 2 β Data Analysis & Modelling\n" | |
| log += run_notebook("pythonanalysis.ipynb") | |
| return log | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| # DASHBOARD HELPERS | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| def load_kpis(): | |
| kpi_path = TAB_DIR / "kpis.json" | |
| if not kpi_path.exists(): | |
| return None | |
| with open(kpi_path) as f: | |
| return json.load(f) | |
| def load_shows(): | |
| path = TAB_DIR / "shows_final.csv" | |
| if not path.exists(): | |
| path = ART_DIR / "shows_master.csv" | |
| if not path.exists(): | |
| return None | |
| return pd.read_csv(path) | |
| def load_recommendations(): | |
| path = TAB_DIR / "renewal_recommendations.csv" | |
| if not path.exists(): | |
| return None | |
| return pd.read_csv(path) | |
| def load_monthly(): | |
| path = ART_DIR / "monthly_platform_totals.csv" | |
| if not path.exists(): | |
| return None | |
| df = pd.read_csv(path) | |
| df["month"] = pd.to_datetime(df["month"]) | |
| return df | |
| def kpi_html(kpis): | |
| if not kpis: | |
| return "<p style='color:#888;text-align:center;padding:40px'>Run the pipeline first to populate the dashboard.</p>" | |
| return f""" | |
| <div style="display:flex;gap:16px;flex-wrap:wrap;justify-content:center;padding:16px 0"> | |
| <div class="kpi-card kpi-total"> | |
| <div class="kpi-value">{kpis.get('total_shows','β')}</div> | |
| <div class="kpi-label">Total Shows</div> | |
| </div> | |
| <div class="kpi-card kpi-renew"> | |
| <div class="kpi-value">{kpis.get('shows_to_renew','β')}</div> | |
| <div class="kpi-label">Renew</div> | |
| </div> | |
| <div class="kpi-card kpi-invest"> | |
| <div class="kpi-value">{kpis.get('shows_invest_more','β')}</div> | |
| <div class="kpi-label">Invest More</div> | |
| </div> | |
| <div class="kpi-card kpi-cancel"> | |
| <div class="kpi-value">{kpis.get('shows_to_cancel','β')}</div> | |
| <div class="kpi-label">Cancel</div> | |
| </div> | |
| <div class="kpi-card kpi-roi"> | |
| <div class="kpi-value">{kpis.get('avg_platform_roi','β')}%</div> | |
| <div class="kpi-label">Avg Platform ROI</div> | |
| </div> | |
| <div class="kpi-card kpi-completion"> | |
| <div class="kpi-value">{round(kpis.get('avg_completion_rate',0)*100,1)}%</div> | |
| <div class="kpi-label">Avg Completion Rate</div> | |
| </div> | |
| <div class="kpi-card kpi-rating"> | |
| <div class="kpi-value">{kpis.get('avg_imdb_rating','β')}</div> | |
| <div class="kpi-label">Avg IMDb Rating</div> | |
| </div> | |
| <div class="kpi-card kpi-sentiment"> | |
| <div class="kpi-value">{round(kpis.get('sentiment_alignment',0)*100,1)}%</div> | |
| <div class="kpi-label">Sentiment Alignment</div> | |
| </div> | |
| </div> | |
| """ | |
| def refresh_dashboard(): | |
| kpis = load_kpis() | |
| shows = load_recommendations() | |
| kpi_block = kpi_html(kpis) | |
| figs = {} | |
| for name in ["vader_sentiment_analysis", "viewership_trends_sampled", | |
| "arima_forecasts", "random_forest_results", | |
| "decision_analysis", "platform_overview"]: | |
| p = FIG_DIR / f"{name}.png" | |
| figs[name] = str(p) if p.exists() else None | |
| table_renew = shows[shows["renewal_decision"] == "Renew"][ | |
| ["title","primary_genre","imdb_rating","num_seasons", | |
| "avg_monthly_streams_k","platform_roi_pct","avg_vader_score"] | |
| ].round(2).head(20) if shows is not None else pd.DataFrame() | |
| table_cancel = shows[shows["renewal_decision"] == "Cancel"][ | |
| ["title","primary_genre","imdb_rating","num_seasons", | |
| "avg_monthly_streams_k","platform_roi_pct","avg_vader_score"] | |
| ].round(2).head(20) if shows is not None else pd.DataFrame() | |
| table_invest = shows[shows["renewal_decision"] == "Invest More"][ | |
| ["title","primary_genre","imdb_rating","num_seasons", | |
| "avg_monthly_streams_k","platform_roi_pct","avg_vader_score"] | |
| ].round(2).head(20) if shows is not None else pd.DataFrame() | |
| return ( | |
| kpi_block, | |
| figs.get("platform_overview"), | |
| figs.get("viewership_trends_sampled"), | |
| figs.get("vader_sentiment_analysis"), | |
| figs.get("arima_forecasts"), | |
| figs.get("random_forest_results"), | |
| figs.get("decision_analysis"), | |
| table_renew, | |
| table_cancel, | |
| table_invest | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| # SEARCH | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| def search_shows(query, decision_filter): | |
| shows = load_recommendations() | |
| if shows is None: | |
| return pd.DataFrame({"message": ["Run the pipeline first."]}) | |
| df = shows.copy() | |
| if query.strip(): | |
| df = df[df["title"].str.contains(query.strip(), case=False, na=False)] | |
| if decision_filter != "All": | |
| df = df[df["renewal_decision"] == decision_filter] | |
| cols = ["title","primary_genre","imdb_rating","num_seasons", | |
| "avg_monthly_streams_k","platform_roi_pct", | |
| "avg_vader_score","renewal_decision"] | |
| return df[cols].round(2).head(50) | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| # AI DASHBOARD β n8n webhook | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| import requests as req | |
| N8N_WEBHOOK = "https://jimkaufmann.app.n8n.cloud/webhook/ai-analyst" | |
| def ask_ai(question, history): | |
| if not question.strip(): | |
| return history, "" | |
| shows = load_shows() | |
| kpis = load_kpis() | |
| context = "" | |
| if kpis: | |
| context += f"Platform KPIs: {json.dumps(kpis)}\n" | |
| if shows is not None: | |
| try: | |
| summary = shows[["title","renewal_decision","imdb_rating", | |
| "platform_roi_pct","avg_monthly_streams_k"]]\ | |
| .head(30).to_dict(orient="records") | |
| context += f"Sample shows data: {json.dumps(summary)}\n" | |
| except: | |
| context += "Show data available but could not be serialised.\n" | |
| try: | |
| response = req.post( | |
| N8N_WEBHOOK, | |
| json={"question": question, "context": context}, | |
| timeout=30 | |
| ) | |
| if response.status_code == 200: | |
| data = response.json() | |
| answer = data.get("answer") or data.get("text") or str(data) | |
| else: | |
| answer = f"Webhook returned status {response.status_code}. Make sure your n8n workflow is active and published." | |
| except Exception as e: | |
| answer = f"Could not reach the n8n workflow: {e}" | |
| history = history or [] | |
| history.append({"role": "user", "content": question}) | |
| history.append({"role": "assistant", "content": answer}) | |
| return history, "" | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| # BUILD UI | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| css_string = open("style.css").read() if Path("style.css").exists() else "" | |
| with gr.Blocks(title="Streaming Cancellation Risk Predictor", css=css_string) as demo: | |
| # ββ HEADER | |
| gr.HTML(""" | |
| <div class="header-wrap"> | |
| <img src="/file=background_top.png" class="bg-top"/> | |
| <div class="header-content"> | |
| <h1 class="app-title">π¬ Streaming Cancellation Risk Predictor</h1> | |
| <p class="app-subtitle">Which shows should we Renew, Cancel, or Invest More in?</p> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| # ββ TAB 1: PIPELINE RUNNER | |
| with gr.Tab("βΆ Pipeline Runner"): | |
| gr.Markdown(""" | |
| Run the two notebooks to collect IMDb data, generate synthetic viewership and reviews, | |
| run VADER sentiment analysis, ARIMA forecasting, and train the Random Forest classifier. | |
| Results are saved automatically and populate the Dashboard tab. | |
| """) | |
| with gr.Row(): | |
| btn_nb1 = gr.Button("Step 1 β Data Collection & Creation", variant="secondary", size="lg") | |
| btn_nb2 = gr.Button("Step 2 β Data Analysis & Modelling", variant="secondary", size="lg") | |
| btn_full = gr.Button("π Run Full Pipeline (Both Steps)", variant="primary", size="lg") | |
| log_box = gr.Textbox(label="Execution Log", lines=12, interactive=False) | |
| btn_nb1.click(run_data_creation, outputs=log_box) | |
| btn_nb2.click(run_analysis, outputs=log_box) | |
| btn_full.click(run_full_pipeline, outputs=log_box) | |
| # ββ TAB 2: DASHBOARD | |
| with gr.Tab("π Dashboard"): | |
| btn_refresh = gr.Button("π Refresh Dashboard", variant="primary") | |
| kpi_display = gr.HTML(label="KPIs") | |
| gr.Markdown("### Platform Overview") | |
| img_platform = gr.Image(label="Total Monthly Streams", show_label=False) | |
| gr.Markdown("### Viewership Trends") | |
| img_trends = gr.Image(label="Viewership Trends", show_label=False) | |
| gr.Markdown("### Sentiment Analysis") | |
| img_vader = gr.Image(label="VADER Sentiment", show_label=False) | |
| gr.Markdown("### ARIMA Forecasts") | |
| img_arima = gr.Image(label="ARIMA Forecasts", show_label=False) | |
| gr.Markdown("### Random Forest Results") | |
| img_rf = gr.Image(label="Random Forest", show_label=False) | |
| gr.Markdown("### Decision Analysis") | |
| img_decisions = gr.Image(label="Decision Analysis", show_label=False) | |
| gr.Markdown("### π’ Shows to Renew") | |
| tbl_renew = gr.DataFrame(label="Renew") | |
| gr.Markdown("### π΄ Shows to Cancel") | |
| tbl_cancel = gr.DataFrame(label="Cancel") | |
| gr.Markdown("### π‘ Shows to Invest More In") | |
| tbl_invest = gr.DataFrame(label="Invest More") | |
| all_outputs = [ | |
| kpi_display, | |
| img_platform, img_trends, img_vader, | |
| img_arima, img_rf, img_decisions, | |
| tbl_renew, tbl_cancel, tbl_invest | |
| ] | |
| btn_refresh.click(refresh_dashboard, outputs=all_outputs) | |
| demo.load(refresh_dashboard, outputs=all_outputs) | |
| # ββ TAB 3: SEARCH | |
| with gr.Tab("π Show Search"): | |
| gr.Markdown(""" | |
| Search across all shows in the dataset. Filter by renewal decision to quickly find | |
| the platform's top renewal candidates or shows flagged for cancellation. | |
| """) | |
| with gr.Row(): | |
| search_box = gr.Textbox(placeholder="Search by show title...", label="", scale=3) | |
| decision_drop = gr.Dropdown( | |
| choices=["All", "Renew", "Invest More", "Cancel"], | |
| value="All", label="Filter by decision", scale=1 | |
| ) | |
| search_btn = gr.Button("Search", variant="primary") | |
| search_table = gr.DataFrame(label="Results") | |
| search_btn.click(search_shows, | |
| inputs=[search_box, decision_drop], | |
| outputs=search_table) | |
| search_box.submit(search_shows, | |
| inputs=[search_box, decision_drop], | |
| outputs=search_table) | |
| # ββ TAB 4: AI DASHBOARD | |
| with gr.Tab("π€ AI Dashboard"): | |
| gr.Markdown(""" | |
| Ask questions about the platform's content portfolio and get AI-powered answers. | |
| Connected to our n8n workflow which has access to the full show dataset and KPIs. | |
| *Examples: "Which drama shows should we prioritise for renewal?", "What genres have the best ROI?", | |
| "Which shows have high viewership but negative sentiment?"* | |
| """) | |
| chatbot = gr.Chatbot(value=[], height=420, label="") | |
| with gr.Row(): | |
| msg_box = gr.Textbox(value="", placeholder="Ask a question about the data...", | |
| label="", scale=4) | |
| send_btn = gr.Button("Send", variant="primary", scale=1) | |
| send_btn.click(ask_ai, inputs=[msg_box, chatbot], outputs=[chatbot, msg_box]) | |
| msg_box.submit(ask_ai, inputs=[msg_box, chatbot], outputs=[chatbot, msg_box]) | |
| # ββ FOOTER | |
| gr.HTML(""" | |
| <div class="footer"> | |
| <img src="/file=background_bottom.png" class="bg-bottom"/> | |
| <p>ESCP Business School β AI for Big Data Management β Group Project 2026</p> | |
| </div> | |
| """) | |
| demo.launch() | |