Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import time | |
| import traceback | |
| from pathlib import Path | |
| import pandas as pd | |
| import gradio as gr | |
| import papermill as pm | |
| import plotly.graph_objects as go | |
| import requests | |
| BASE_DIR = Path(__file__).resolve().parent | |
| NB1 = os.environ.get("NB1", "datacreation.ipynb").strip() | |
| NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip() | |
| N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip() | |
| RUNS_DIR = BASE_DIR / "runs" | |
| PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800")) | |
| MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50")) | |
| def ensure_dirs(): | |
| RUNS_DIR.mkdir(parents=True, exist_ok=True) | |
| def stamp(): | |
| return time.strftime("%Y%m%d-%H%M%S") | |
| def load_css(): | |
| css_path = BASE_DIR / "style.css" | |
| return css_path.read_text(encoding="utf-8") if css_path.exists() else "" | |
| def safe_read_csv(filename, nrows=None): | |
| path = BASE_DIR / filename | |
| if not path.exists(): | |
| return pd.DataFrame() | |
| try: | |
| return pd.read_csv(path, nrows=nrows, encoding="latin1", engine="python") | |
| except Exception: | |
| try: | |
| return pd.read_csv(path, nrows=nrows) | |
| except Exception: | |
| return pd.DataFrame() | |
| def run_notebook(nb_name: str) -> str: | |
| ensure_dirs() | |
| nb_in = BASE_DIR / nb_name | |
| if not nb_in.exists(): | |
| return f"ERROR: {nb_name} not found." | |
| nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}" | |
| pm.execute_notebook( | |
| input_path=str(nb_in), | |
| output_path=str(nb_out), | |
| cwd=str(BASE_DIR), | |
| log_output=True, | |
| progress_bar=False, | |
| execution_timeout=PAPERMILL_TIMEOUT, | |
| ) | |
| return f"Executed {nb_name}" | |
| def run_datacreation(): | |
| try: | |
| log = run_notebook(NB1) | |
| csvs = sorted([f.name for f in BASE_DIR.glob("*.csv")]) | |
| return f"OK - {log}\n\nGenerated CSV files:\n" + "\n".join(f"- {c}" for c in csvs) | |
| except Exception as e: | |
| return f"FAILED - {e}\n\n{traceback.format_exc()[-2000:]}" | |
| def run_pythonanalysis(): | |
| try: | |
| log = run_notebook(NB2) | |
| return f"OK - {log}\n\nAnalysis notebook completed." | |
| except Exception as e: | |
| return f"FAILED - {e}\n\n{traceback.format_exc()[-2000:]}" | |
| def run_full_pipeline(): | |
| parts = [] | |
| parts.append("STEP 1/2 - Data Creation") | |
| parts.append(run_datacreation()) | |
| parts.append("") | |
| parts.append("STEP 2/2 - Python Analysis") | |
| parts.append(run_pythonanalysis()) | |
| return "\n".join(parts) | |
| def load_kpis_html(): | |
| df = safe_read_csv("netflix_title_level_features.csv") | |
| df_views = safe_read_csv("netflix_monthly_views_series.csv") | |
| if df.empty: | |
| return """ | |
| <div style="padding:20px;border-radius:16px;background:rgba(255,255,255,.7);text-align:center;"> | |
| <h3>No data yet</h3> | |
| <p>Run the pipeline first to populate the dashboard.</p> | |
| </div> | |
| """ | |
| n_titles = len(df) | |
| avg_completion = round(df["completion_rate"].mean(), 2) if "completion_rate" in df.columns else "N/A" | |
| avg_views = f"{df['avg_views'].mean():,.0f}" if "avg_views" in df.columns else "N/A" | |
| total_views = f"{df['total_views'].sum():,.0f}" if "total_views" in df.columns else "N/A" | |
| n_months = len(df_views) if not df_views.empty else "N/A" | |
| def card(label, value): | |
| return f""" | |
| <div style="background:rgba(255,255,255,.78);padding:18px;border-radius:18px;text-align:center; | |
| border:1px solid rgba(255,255,255,.8);box-shadow:0 4px 14px rgba(0,0,0,.08);"> | |
| <div style="font-size:12px;font-weight:700;letter-spacing:1px;color:#6b5ca5;text-transform:uppercase;">{label}</div> | |
| <div style="font-size:24px;font-weight:800;color:#2d1f4e;margin-top:8px;">{value}</div> | |
| </div> | |
| """ | |
| return f""" | |
| <div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));gap:12px;"> | |
| {card("Titles", n_titles)} | |
| {card("Months", n_months)} | |
| {card("Avg Completion", avg_completion)} | |
| {card("Avg Views", avg_views)} | |
| {card("Total Views", total_views)} | |
| </div> | |
| """ | |
| def empty_chart(title): | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title=title, | |
| template="plotly_white", | |
| height=420, | |
| annotations=[dict( | |
| text="Run the pipeline first", | |
| x=0.5, y=0.5, xref="paper", yref="paper", | |
| showarrow=False, font=dict(size=16) | |
| )] | |
| ) | |
| return fig | |
| def build_views_chart(): | |
| df = safe_read_csv("netflix_monthly_views_series.csv") | |
| if df.empty or "month" not in df.columns or "total_views" not in df.columns: | |
| return empty_chart("Monthly Views") | |
| df["month"] = pd.to_datetime(df["month"], errors="coerce") | |
| df = df.dropna(subset=["month"]) | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter( | |
| x=df["month"], | |
| y=df["total_views"], | |
| mode="lines+markers", | |
| name="Total Views" | |
| )) | |
| fig.update_layout( | |
| title="Netflix Monthly Views Over Time", | |
| template="plotly_white", | |
| height=430 | |
| ) | |
| return fig | |
| def build_success_chart(): | |
| df = safe_read_csv("netflix_title_level_features.csv") | |
| if df.empty or "success_label" not in df.columns: | |
| return empty_chart("Success Label Distribution") | |
| counts = df["success_label"].value_counts().reset_index() | |
| counts.columns = ["success_label", "count"] | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| x=counts["success_label"], | |
| y=counts["count"], | |
| name="Titles" | |
| )) | |
| fig.update_layout( | |
| title="Success Label Distribution", | |
| template="plotly_white", | |
| height=430 | |
| ) | |
| return fig | |
| def build_sentiment_chart(): | |
| df = safe_read_csv("netflix_title_level_features.csv") | |
| needed = {"share_positive", "share_neutral", "share_negative"} | |
| if df.empty or not needed.issubset(df.columns): | |
| return empty_chart("Average Sentiment Mix") | |
| values = [ | |
| df["share_positive"].mean(), | |
| df["share_neutral"].mean(), | |
| df["share_negative"].mean(), | |
| ] | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| x=["Positive", "Neutral", "Negative"], | |
| y=values, | |
| name="Average Share" | |
| )) | |
| fig.update_layout( | |
| title="Average Sentiment Mix", | |
| template="plotly_white", | |
| height=430 | |
| ) | |
| return fig | |
| def load_table_preview(choice): | |
| if not choice: | |
| return pd.DataFrame([{"info": "Select a table"}]) | |
| return safe_read_csv(choice, nrows=MAX_PREVIEW_ROWS) | |
| def refresh_dashboard(): | |
| table_choices = [] | |
| for name in [ | |
| "netflix_title_level_features.csv", | |
| "netflix_monthly_views_series.csv", | |
| "synthetic_netflix_reviews.csv", | |
| "synthetic_views_data.csv", | |
| ]: | |
| if (BASE_DIR / name).exists(): | |
| table_choices.append(name) | |
| default_df = safe_read_csv(table_choices[0], nrows=MAX_PREVIEW_ROWS) if table_choices else pd.DataFrame() | |
| return ( | |
| load_kpis_html(), | |
| build_views_chart(), | |
| build_success_chart(), | |
| build_sentiment_chart(), | |
| gr.update(choices=table_choices, value=table_choices[0] if table_choices else None), | |
| default_df, | |
| ) | |
| def call_n8n(payload): | |
| if not N8N_WEBHOOK_URL: | |
| return "N8N_WEBHOOK_URL is not set in Space variables." | |
| try: | |
| response = requests.post(N8N_WEBHOOK_URL, json=payload, timeout=30) | |
| response.raise_for_status() | |
| content_type = response.headers.get("content-type", "").lower() | |
| if "application/json" in content_type: | |
| data = response.json() | |
| if isinstance(data, dict): | |
| for key in ["result", "answer", "response", "output"]: | |
| if key in data and data[key]: | |
| return str(data[key]) | |
| return json.dumps(data, indent=2) | |
| return response.text.strip() or "No response returned from webhook." | |
| except Exception as e: | |
| return f"Webhook error: {e}" | |
| def evaluate_show( | |
| show_type, | |
| release_year, | |
| completion_rate, | |
| avg_views, | |
| total_views, | |
| share_positive, | |
| share_neutral, | |
| share_negative, | |
| ): | |
| payload = { | |
| "show_type": show_type, | |
| "release_year": release_year, | |
| "completion_rate": completion_rate, | |
| "avg_views": avg_views, | |
| "total_views": total_views, | |
| "share_positive": share_positive, | |
| "share_neutral": share_neutral, | |
| "share_negative": share_negative, | |
| } | |
| return call_n8n(payload) | |
| def ask_ai(question, history): | |
| if not question or not question.strip(): | |
| return history, "" | |
| answer = call_n8n({"question": question}) | |
| history = history or [] | |
| history.append({"role": "user", "content": question}) | |
| history.append({"role": "assistant", "content": answer}) | |
| return history, "" | |
| ensure_dirs() | |
| with gr.Blocks(title="Netflix Success Advisor") as demo: | |
| gr.Markdown( | |
| "# Netflix Success Advisor\n" | |
| "*AI app for evaluating Netflix show success*", | |
| elem_id="escp_title", | |
| ) | |
| with gr.Tab("Pipeline Runner"): | |
| gr.Markdown("Run the notebooks used in the project pipeline.") | |
| with gr.Row(): | |
| btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary") | |
| btn_nb2 = gr.Button("Step 2: Python Analysis", variant="secondary") | |
| btn_all = gr.Button("Run Netflix Success Assessment Pipeline", variant="primary") | |
| run_log = gr.Textbox( | |
| label="Execution Log", | |
| lines=18, | |
| max_lines=30, | |
| interactive=False, | |
| ) | |
| btn_nb1.click(run_datacreation, outputs=run_log) | |
| btn_nb2.click(run_pythonanalysis, outputs=run_log) | |
| btn_all.click(run_full_pipeline, outputs=run_log) | |
| with gr.Tab("Dashboard"): | |
| gr.Markdown("Explore the key outputs from the Netflix analysis pipeline.") | |
| kpi_html = gr.HTML(value=load_kpis_html()) | |
| refresh_btn = gr.Button("Refresh Dashboard", variant="primary") | |
| with gr.Row(): | |
| chart_views = gr.Plot(label="Monthly Views") | |
| chart_success = gr.Plot(label="Success Labels") | |
| chart_sentiment = gr.Plot(label="Average Sentiment Mix") | |
| table_dropdown = gr.Dropdown( | |
| label="Select a table", | |
| choices=[], | |
| interactive=True, | |
| ) | |
| table_display = gr.Dataframe( | |
| label="Table Preview", | |
| interactive=False, | |
| ) | |
| refresh_btn.click( | |
| refresh_dashboard, | |
| outputs=[kpi_html, chart_views, chart_success, chart_sentiment, table_dropdown, table_display], | |
| ) | |
| table_dropdown.change(load_table_preview, inputs=table_dropdown, outputs=table_display) | |
| with gr.Tab("AI Dashboard"): | |
| status = ( | |
| "Connected to n8n webhook." if N8N_WEBHOOK_URL | |
| else "Set N8N_WEBHOOK_URL in your Space variables." | |
| ) | |
| gr.Markdown( | |
| "### Netflix Content Strategy Assistant\n" | |
| f"{status}" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| show_type = gr.Dropdown( | |
| choices=["Movie", "TV Show"], | |
| label="Show Type", | |
| value="TV Show" | |
| ) | |
| release_year = gr.Number(label="Release Year", value=2024) | |
| completion_rate = gr.Slider(0, 100, value=75, label="Completion Rate (%)") | |
| avg_views = gr.Number(label="Average Views", value=1200000) | |
| total_views = gr.Number(label="Total Views", value=18000000) | |
| share_positive = gr.Slider(0, 100, value=60, label="Positive Sentiment (%)") | |
| share_neutral = gr.Slider(0, 100, value=25, label="Neutral Sentiment (%)") | |
| share_negative = gr.Slider(0, 100, value=15, label="Negative Sentiment (%)") | |
| eval_btn = gr.Button("Evaluate Show", variant="primary") | |
| with gr.Column(scale=1): | |
| result_box = gr.Textbox( | |
| label="Assessment Result", | |
| lines=20, | |
| max_lines=30, | |
| interactive=False | |
| ) | |
| eval_btn.click( | |
| evaluate_show, | |
| inputs=[ | |
| show_type, | |
| release_year, | |
| completion_rate, | |
| avg_views, | |
| total_views, | |
| share_positive, | |
| share_neutral, | |
| share_negative, | |
| ], | |
| outputs=result_box, | |
| ) | |
| gr.Markdown("### Ask a follow-up question") | |
| chatbot = gr.Chatbot(height=350) | |
| user_input = gr.Textbox( | |
| label="Ask about the Netflix data or strategy output", | |
| placeholder="Example: Compare this title to a stronger one, or explain why the recommendation is Maintain." | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| "Explain why strong completion rate matters for Netflix.", | |
| "What does high views but low completion suggest?", | |
| "How should Netflix react to mixed sentiment and flat view growth?", | |
| "Summarize the main drivers of success in business language.", | |
| ], | |
| inputs=user_input, | |
| ) | |
| user_input.submit( | |
| ask_ai, | |
| inputs=[user_input, chatbot], | |
| outputs=[chatbot, user_input], | |
| ) | |
| demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)]) | |