Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| import numpy as np | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PAGE CONFIG | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.set_page_config( | |
| page_title="Netflix Analytics 2.0", | |
| page_icon="π¬", | |
| layout="wide", | |
| initial_sidebar_state="expanded", | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # THEME CONSTANTS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| NF_RED = "#E50914" | |
| NF_DARK = "#0a0a0a" | |
| NF_CARD = "#181818" | |
| NF_GRAY = "#222222" | |
| NF_BORDER = "#2f2f2f" | |
| PURPLE = "#6C5CE7" | |
| TEAL = "#00B4D8" | |
| GOLD = "#F39C12" | |
| GREEN = "#00B894" | |
| PINK = "#E84393" | |
| TEXT_PRI = "#FFFFFF" | |
| TEXT_MUT = "#9e9e9e" | |
| TEXT_DIM = "#555555" | |
| CHART_COLORS = [NF_RED, PURPLE, TEAL, GOLD, GREEN, PINK, "#A29BFE", "#55EFC4", "#FD79A8"] | |
| def hex_rgba(h, a=0.15): | |
| h = h.lstrip("#") | |
| r, g, b = int(h[0:2],16), int(h[2:4],16), int(h[4:6],16) | |
| return f"rgba({r},{g},{b},{a})" | |
| PLOTLY_BASE = dict( | |
| paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", | |
| font=dict(family="DM Sans, sans-serif", color=TEXT_PRI, size=12), | |
| xaxis=dict(gridcolor=NF_BORDER, linecolor=NF_BORDER, tickcolor=TEXT_MUT), | |
| yaxis=dict(gridcolor=NF_BORDER, linecolor=NF_BORDER, tickcolor=TEXT_MUT), | |
| colorway=CHART_COLORS, | |
| legend=dict(bgcolor="rgba(0,0,0,0)", font=dict(color=TEXT_PRI)), | |
| margin=dict(l=10, r=10, t=40, b=10), | |
| title=dict(font=dict(size=14, color=TEXT_PRI)), | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CSS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown(f""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600;700&family=Bebas+Neue&family=Space+Grotesk:wght@400;600&display=swap'); | |
| html, body, [data-testid="stAppViewContainer"] {{ | |
| background-color: {NF_DARK}; | |
| color: {TEXT_PRI}; | |
| font-family: 'DM Sans', sans-serif; | |
| }} | |
| [data-testid="stAppViewContainer"] {{ | |
| background: radial-gradient(ellipse 120% 60% at 10% 0%, #1a0505 0%, {NF_DARK} 55%); | |
| }} | |
| [data-testid="stSidebar"] {{ | |
| background: linear-gradient(180deg, #0d0d0d 0%, #0a0a0a 100%) !important; | |
| border-right: 1px solid {NF_BORDER}; | |
| }} | |
| [data-testid="stSidebar"] * {{ color: {TEXT_PRI} !important; }} | |
| [data-testid="stMetricLabel"] {{ color: {TEXT_MUT} !important; font-size:11px !important; }} | |
| [data-testid="stMetricValue"] {{ color: {TEXT_PRI} !important; font-size:22px !important; font-weight:700; }} | |
| hr {{ border-color: {NF_BORDER} !important; margin: 1.5rem 0; }} | |
| /* Tabs */ | |
| [data-testid="stTabs"] [role="tab"] {{ | |
| color: {TEXT_MUT}; font-weight:500; font-family:'DM Sans',sans-serif; | |
| padding: 8px 16px; border-radius: 6px 6px 0 0; | |
| transition: all 0.2s; | |
| }} | |
| [data-testid="stTabs"] [role="tab"][aria-selected="true"] {{ | |
| color: {TEXT_PRI}; border-bottom: 2px solid {NF_RED}; | |
| background: {hex_rgba(NF_RED, 0.08)}; | |
| }} | |
| [data-testid="stTabs"] [role="tablist"] {{ | |
| border-bottom: 1px solid {NF_BORDER}; | |
| gap: 4px; | |
| }} | |
| /* Expander */ | |
| [data-testid="stExpander"] {{ | |
| background: {NF_CARD}; border: 1px solid {NF_BORDER}; border-radius:10px; | |
| }} | |
| /* Multiselect / Input */ | |
| [data-testid="stMultiSelect"] > div > div, | |
| [data-testid="stSelectbox"] > div > div, | |
| .stTextInput > div > div {{ | |
| background: {NF_GRAY} !important; border-color: {NF_BORDER} !important; | |
| border-radius: 8px !important; color: {TEXT_PRI} !important; | |
| }} | |
| .stSlider [data-testid="stSliderThumb"] {{ background: {NF_RED}; }} | |
| .stCheckbox label {{ color: {TEXT_PRI} !important; }} | |
| /* Plotly */ | |
| .js-plotly-plot .plotly {{ border-radius: 12px; }} | |
| /* Components */ | |
| .hero-title {{ | |
| font-family: 'Bebas Neue', sans-serif; | |
| font-size: 56px; letter-spacing: 4px; color: {TEXT_PRI}; line-height:1; margin:0; | |
| }} | |
| .hero-title span {{ color: {NF_RED}; }} | |
| .hero-sub {{ | |
| font-size: 12px; color: {TEXT_MUT}; letter-spacing: 2.5px; | |
| text-transform: uppercase; margin-top: 6px; | |
| }} | |
| .section-hdr {{ | |
| font-family: 'Bebas Neue', sans-serif; font-size: 24px; | |
| letter-spacing: 2px; color: {TEXT_PRI}; | |
| padding-bottom: 6px; border-bottom: 2px solid {NF_RED}; | |
| display: inline-block; margin-bottom: 16px; | |
| }} | |
| .kpi-card {{ | |
| background: linear-gradient(135deg, {NF_CARD} 0%, #1e1e1e 100%); | |
| border: 1px solid {NF_BORDER}; border-radius:12px; | |
| padding: 18px 16px; text-align:center; position:relative; overflow:hidden; | |
| transition: transform 0.2s, border-color 0.2s; | |
| }} | |
| .kpi-card::before {{ | |
| content:''; position:absolute; top:0; left:0; right:0; height:3px; | |
| background: var(--accent, linear-gradient(90deg, {NF_RED}, {PURPLE})); | |
| }} | |
| .kpi-card:hover {{ transform:translateY(-3px); border-color:{NF_RED}; }} | |
| .kpi-icon {{ font-size:26px; margin-bottom:5px; }} | |
| .kpi-val {{ font-size:26px; font-weight:700; color:{TEXT_PRI}; line-height:1; }} | |
| .kpi-lbl {{ font-size:10px; font-weight:600; color:{TEXT_MUT}; | |
| letter-spacing:1.5px; text-transform:uppercase; margin-top:4px; }} | |
| .kpi-delta {{ | |
| font-size:11px; margin-top:7px; padding:2px 8px; border-radius:20px; display:inline-block; | |
| }} | |
| .delta-pos {{ background:rgba(0,184,148,0.2); color:{GREEN}; }} | |
| .delta-neg {{ background:rgba(229,9,20,0.2); color:{NF_RED}; }} | |
| .delta-neu {{ background:rgba(158,158,158,0.15); color:{TEXT_MUT}; }} | |
| .insight-card {{ | |
| background: linear-gradient(135deg, #111 0%, #161616 100%); | |
| border: 1px solid {NF_BORDER}; border-left: 3px solid {PURPLE}; | |
| border-radius:10px; padding:13px 15px; margin-bottom:9px; | |
| }} | |
| .insight-card.red {{ border-left-color:{NF_RED}; background:linear-gradient(135deg,#150505,#1a0808); }} | |
| .insight-card.teal {{ border-left-color:{TEAL}; background:linear-gradient(135deg,#03121a,#051a24); }} | |
| .insight-card.gold {{ border-left-color:{GOLD}; background:linear-gradient(135deg,#141004,#1a1508); }} | |
| .insight-card.green {{ border-left-color:{GREEN}; background:linear-gradient(135deg,#031410,#051a14); }} | |
| .insight-text {{ font-size:12.5px; color:{TEXT_MUT}; line-height:1.55; }} | |
| .insight-text strong {{ color:{TEXT_PRI}; }} | |
| /* Poster card grid */ | |
| .poster-grid {{ display:flex; flex-wrap:wrap; gap:12px; padding:8px 0; }} | |
| .poster-card {{ | |
| background:{NF_CARD}; border:1px solid {NF_BORDER}; border-radius:10px; | |
| overflow:hidden; width:150px; flex-shrink:0; | |
| transition:transform 0.2s, border-color 0.2s; cursor:pointer; | |
| }} | |
| .poster-card:hover {{ transform:translateY(-5px) scale(1.02); border-color:{NF_RED}; }} | |
| .poster-card img {{ width:100%; height:220px; object-fit:cover; display:block; }} | |
| .poster-card .pc-info {{ padding:8px; }} | |
| .poster-card .pc-title {{ | |
| font-size:11px; font-weight:600; color:{TEXT_PRI}; line-height:1.3; | |
| overflow:hidden; display:-webkit-box; -webkit-line-clamp:2; -webkit-box-orient:vertical; | |
| }} | |
| .poster-card .pc-meta {{ font-size:10px; color:{TEXT_MUT}; margin-top:3px; }} | |
| .poster-card .pc-badge {{ | |
| font-size:9px; font-weight:700; padding:2px 5px; border-radius:4px; | |
| display:inline-block; margin-top:4px; | |
| }} | |
| .badge-rank {{ background:rgba(229,9,20,0.25); color:{NF_RED}; }} | |
| .badge-score {{ background:rgba(243,156,18,0.25); color:{GOLD}; }} | |
| .badge-trend {{ background:rgba(0,180,216,0.25); color:{TEAL}; }} | |
| /* Score pill */ | |
| .score-pill {{ | |
| display:inline-flex; align-items:center; gap:5px; | |
| padding:3px 10px; border-radius:20px; font-size:12px; font-weight:600; | |
| margin:2px; | |
| }} | |
| .sp-tmdb {{ background:rgba(1,180,200,0.15); color:{TEAL}; border:1px solid {hex_rgba(TEAL,0.3)}; }} | |
| .sp-imdb {{ background:rgba(243,156,18,0.15); color:{GOLD}; border:1px solid {hex_rgba(GOLD,0.3)}; }} | |
| .sp-rt {{ background:rgba(229,9,20,0.15); color:{NF_RED}; border:1px solid {hex_rgba(NF_RED,0.3)};}} | |
| .sp-mc {{ background:rgba(108,92,231,0.15); color:{PURPLE}; border:1px solid {hex_rgba(PURPLE,0.3)};}} | |
| /* Content detail card */ | |
| .detail-card {{ | |
| background:{NF_CARD}; border:1px solid {NF_BORDER}; border-radius:14px; | |
| padding:20px; margin-bottom:16px; | |
| }} | |
| .detail-title {{ font-size:22px; font-weight:700; color:{TEXT_PRI}; margin-bottom:6px; }} | |
| .detail-meta {{ font-size:12px; color:{TEXT_MUT}; }} | |
| .tag {{ | |
| display:inline-block; padding:3px 10px; border-radius:20px; | |
| font-size:11px; font-weight:600; margin:3px; | |
| }} | |
| .tag-genre {{ background:{hex_rgba(PURPLE,0.2)}; color:{PURPLE}; border:1px solid {hex_rgba(PURPLE,0.3)}; }} | |
| .tag-lang {{ background:{hex_rgba(TEAL,0.2)}; color:{TEAL}; border:1px solid {hex_rgba(TEAL,0.3)}; }} | |
| .tag-status {{ background:{hex_rgba(GREEN,0.2)}; color:{GREEN}; border:1px solid {hex_rgba(GREEN,0.3)}; }} | |
| .tag-award {{ background:{hex_rgba(GOLD,0.2)}; color:{GOLD}; border:1px solid {hex_rgba(GOLD,0.3)}; }} | |
| /* Talent card */ | |
| .talent-card {{ | |
| background:{NF_CARD}; border:1px solid {NF_BORDER}; border-radius:10px; | |
| text-align:center; padding:14px 10px; overflow:hidden; | |
| transition:transform 0.2s, border-color 0.2s; | |
| }} | |
| .talent-card:hover {{ transform:translateY(-4px); border-color:{PURPLE}; }} | |
| .talent-card img {{ | |
| width:72px; height:72px; border-radius:50%; object-fit:cover; | |
| border:2px solid {NF_BORDER}; margin-bottom:8px; | |
| }} | |
| .talent-name {{ font-size:12px; font-weight:600; color:{TEXT_PRI}; }} | |
| .talent-meta {{ font-size:10px; color:{TEXT_MUT}; margin-top:2px; }} | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HELPERS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def apply_theme(fig, h=380): | |
| fig.update_layout(**PLOTLY_BASE, height=h) | |
| return fig | |
| def sec(label): | |
| st.markdown(f'<div class="section-hdr">{label}</div>', unsafe_allow_html=True) | |
| def insight(text, style=""): | |
| st.markdown(f'<div class="insight-card {style}"><div class="insight-text">{text}</div></div>', | |
| unsafe_allow_html=True) | |
| def kpi(icon, val, lbl, delta=None, dt="neu", accent=None): | |
| style = f"--accent: linear-gradient(90deg,{accent},{accent}66);" if accent else "" | |
| dhtml = f'<div class="kpi-delta delta-{dt}">{delta}</div>' if delta else "" | |
| st.markdown(f""" | |
| <div class="kpi-card" style="{style}"> | |
| <div class="kpi-icon">{icon}</div> | |
| <div class="kpi-val">{val}</div> | |
| <div class="kpi-lbl">{lbl}</div> | |
| {dhtml} | |
| </div>""", unsafe_allow_html=True) | |
| def score_pills(tmdb=None, imdb=None, rt=None, mc=None): | |
| pills = "" | |
| if tmdb is not None and not pd.isna(tmdb): | |
| pills += f'<span class="score-pill sp-tmdb">β {tmdb:.1f}</span>' | |
| if imdb is not None and not pd.isna(imdb): | |
| pills += f'<span class="score-pill sp-imdb">π¬ IMDb {imdb:.1f}</span>' | |
| if rt is not None and not pd.isna(rt): | |
| pills += f'<span class="score-pill sp-rt">π {rt}%</span>' | |
| if mc is not None and not pd.isna(mc): | |
| pills += f'<span class="score-pill sp-mc">π― MC {mc}</span>' | |
| st.markdown(pills, unsafe_allow_html=True) | |
| def poster_gallery(df, title_col="title", year_col="release_year", rating_col="vote_average", | |
| poster_col="poster_url", badge_col=None, badge_label="", max_cards=20, | |
| extra_col=None, extra_label=""): | |
| """Render a horizontal poster card gallery.""" | |
| items = df.head(max_cards).to_dict("records") | |
| cards_html = '<div class="poster-grid">' | |
| placeholder = "https://via.placeholder.com/150x220/181818/555555?text=No+Image" | |
| for row in items: | |
| poster = row.get(poster_col) or "" | |
| if not poster or poster == "None": | |
| poster = placeholder | |
| title = str(row.get(title_col, ""))[:40] | |
| year = row.get(year_col, "") | |
| rating = row.get(rating_col, "") | |
| rating_str = f"β {rating:.1f}" if isinstance(rating, (int,float)) and not pd.isna(rating) else "" | |
| badge_html = "" | |
| if badge_col and row.get(badge_col) is not None: | |
| bval = row[badge_col] | |
| if isinstance(bval, float): bval = f"{bval:.1f}" | |
| badge_html = f'<span class="pc-badge badge-rank">{badge_label} #{bval}</span>' | |
| extra_html = "" | |
| if extra_col and row.get(extra_col): | |
| extra_html = f'<span class="pc-badge badge-trend">{extra_label} {row[extra_col]}</span>' | |
| cards_html += f""" | |
| <div class="poster-card"> | |
| <img src="{poster}" alt="{title}" onerror="this.src='{placeholder}'"/> | |
| <div class="pc-info"> | |
| <div class="pc-title">{title}</div> | |
| <div class="pc-meta">{year} {rating_str}</div> | |
| {badge_html}{extra_html} | |
| </div> | |
| </div>""" | |
| cards_html += "</div>" | |
| st.markdown(cards_html, unsafe_allow_html=True) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HELPER: safe column selector | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def safe_cols(df, cols): | |
| """Return only columns that actually exist in df.""" | |
| return [c for c in cols if c in df.columns] | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # DATA LOADING | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| BASE = "hf://datasets/ihhereanth/netflix_dataset/" | |
| def load_data(): | |
| # ββ FIX 1: safe_load with schema-mismatch fallback βββββββββββββββββββββββ | |
| def safe_load(filename): | |
| try: | |
| return pd.read_parquet(BASE + filename) | |
| except Exception as e: | |
| # Fallback: try pyarrow directly with permissive cast | |
| try: | |
| import pyarrow.parquet as pq | |
| table = pq.read_table(BASE + filename) | |
| return table.to_pandas(strings_to_categorical=False) | |
| except Exception as e2: | |
| st.warning(f"β οΈ ΰΉΰΈ‘ΰΉΰΈΰΈ {filename}: {e2}") | |
| return pd.DataFrame() | |
| def to_num(df, cols): | |
| for c in cols: | |
| if c in df.columns: | |
| df[c] = pd.to_numeric(df[c], errors="coerce") | |
| return df | |
| movies = safe_load("movies.parquet") | |
| tv = safe_load("tv_shows.parquet") | |
| credits = safe_load("credits.parquet") | |
| keywords= safe_load("keywords.parquet") | |
| m_num = ["vote_count","vote_average","runtime_min","budget_usd","revenue_usd","popularity", | |
| "release_year","release_month","roi","imdb_rating","imdb_votes","rt_score", | |
| "metacritic_score","audience_engagement_score","profit_usd","omdb_box_office", | |
| "best_weekly_rank","critic_audience_gap","release_decade"] | |
| tv_num = ["vote_count","vote_average","popularity","number_of_seasons","number_of_episodes", | |
| "first_air_year","last_air_year","imdb_rating","imdb_votes","rt_score", | |
| "metacritic_score","audience_engagement_score","votes_per_episode", | |
| "total_content_hours","years_on_air","best_weekly_rank","first_air_decade"] | |
| movies = to_num(movies, m_num) | |
| tv = to_num(tv, tv_num) | |
| # Derived for old pipeline compatibility | |
| if "release_year" in movies.columns and "decade" not in movies.columns: | |
| movies["decade"] = (movies["release_year"] // 10 * 10).astype("Int64").astype(str) + "s" | |
| if "first_air_year" in tv.columns and "decade" not in tv.columns: | |
| tv["decade"] = (tv["first_air_year"] // 10 * 10).astype("Int64").astype(str) + "s" | |
| if "gender" in credits.columns: | |
| credits["gender"] = credits["gender"].map({0:"Unknown",1:"Female",2:"Male"}).fillna("Unknown") | |
| # New analytics tables | |
| content_perf = safe_load("content_performance.parquet") | |
| genre_perf = safe_load("genre_performance.parquet") | |
| lang_summary = safe_load("language_summary.parquet") | |
| top_talent = safe_load("top_talent.parquet") | |
| yoy_trend = safe_load("yoy_trend.parquet") | |
| franchises = safe_load("franchises.parquet") | |
| tv_seasons = safe_load("tv_seasons.parquet") | |
| num_cols_cp = ["vote_average","vote_count","popularity","imdb_rating","imdb_votes", | |
| "rt_score","metacritic_score","audience_engagement_score", | |
| "budget_usd","revenue_usd","roi","best_weekly_rank"] | |
| content_perf = to_num(content_perf, num_cols_cp) | |
| return (movies, tv, credits, keywords, | |
| content_perf, genre_perf, lang_summary, top_talent, yoy_trend, franchises, tv_seasons) | |
| with st.spinner("ΰΈΰΈ³ΰΈ₯ΰΈ±ΰΈΰΉΰΈ«ΰΈ₯ΰΈΰΈΰΉΰΈΰΈ‘ΰΈΉΰΈ₯ Netflix..."): | |
| try: | |
| (movies, tv, credits, keywords, | |
| content_perf, genre_perf, lang_summary, top_talent, yoy_trend, franchises, tv_seasons) = load_data() | |
| except Exception as e: | |
| st.error(f"ΰΉΰΈ«ΰΈ₯ΰΈΰΈΰΉΰΈΰΈ‘ΰΈΉΰΈ₯ΰΉΰΈ‘ΰΉΰΈͺΰΈ³ΰΉΰΈ£ΰΉΰΈ: {e}") | |
| st.stop() | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SIDEBAR FILTERS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with st.sidebar: | |
| st.markdown(""" | |
| <div style="text-align:center; padding:14px 0 22px 0;"> | |
| <div style="font-family:'Bebas Neue',sans-serif;font-size:30px;letter-spacing:3px;color:#E50914;"> | |
| NETFLIX | |
| </div> | |
| <div style="font-size:9px;letter-spacing:2.5px;color:#555;text-transform:uppercase;"> | |
| Analytics Dashboard 2.0 | |
| </div> | |
| </div>""", unsafe_allow_html=True) | |
| st.markdown("### ποΈ Filters") | |
| # Media type | |
| media_type_sel = st.radio("π½οΈ Media Type", ["All","Movies Only","TV Only"], horizontal=True) | |
| # Genres from movies | |
| all_genres = sorted({ | |
| g for genres in movies["genres"].dropna() | |
| for g in (genres if isinstance(genres, list) else []) | |
| }) if "genres" in movies.columns else [] | |
| selected_genres = st.multiselect("π Genre", all_genres, default=[]) | |
| # Year range | |
| y_min = int(movies["release_year"].min()) if "release_year" in movies.columns and len(movies)>0 else 1990 | |
| y_max = int(movies["release_year"].max()) if "release_year" in movies.columns and len(movies)>0 else 2024 | |
| year_range = st.slider("π Release Year (Movies)", y_min, y_max, (2010, y_max)) | |
| # Language | |
| all_langs = [] | |
| if "original_language" in movies.columns: | |
| all_langs = sorted(movies["original_language"].dropna().unique().tolist()) | |
| selected_langs = st.multiselect("π Language", all_langs, default=[]) | |
| # Rating range | |
| rating_min, rating_max = st.slider("β Rating Range", 0.0, 10.0, (5.0, 10.0), step=0.5) | |
| # Min votes | |
| min_votes = st.slider("π³οΈ Min Vote Count", 0, 5000, 100, step=50) | |
| st.markdown("---") | |
| st.markdown("### π¬ Advanced Filters") | |
| trending_only = st.checkbox("π₯ Trending Top 50 Only") | |
| awards_only = st.checkbox("π Awards Titles Only") | |
| has_poster = st.checkbox("πΌοΈ With Poster Image", value=False) | |
| if "budget_tier" in movies.columns: | |
| budget_tiers = ["All"] + sorted(movies["budget_tier"].dropna().unique().tolist()) | |
| sel_budget = st.selectbox("π° Budget Tier", budget_tiers) | |
| else: | |
| sel_budget = "All" | |
| st.markdown("---") | |
| st.markdown(f""" | |
| <div style="font-size:10px;color:#333;text-align:center;"> | |
| TMDB β Airflow β PySpark β HuggingFace<br> | |
| Pipeline updated weekly | |
| </div>""", unsafe_allow_html=True) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # APPLY FILTERS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def apply_movie_filters(df): | |
| if df.empty: return df | |
| if selected_genres and "genres" in df.columns: | |
| df = df[df["genres"].apply(lambda g: bool(set(g or []) & set(selected_genres)))] | |
| if "release_year" in df.columns: | |
| df = df[df["release_year"].between(*year_range)] | |
| if selected_langs and "original_language" in df.columns: | |
| df = df[df["original_language"].isin(selected_langs)] | |
| if "vote_average" in df.columns: | |
| df = df[df["vote_average"].between(rating_min, rating_max)] | |
| if "vote_count" in df.columns: | |
| df = df[df["vote_count"] >= min_votes] | |
| if trending_only and "is_trending_top50" in df.columns: | |
| df = df[df["is_trending_top50"] == True] | |
| if awards_only and "has_awards" in df.columns: | |
| df = df[df["has_awards"] == True] | |
| if has_poster and "poster_url" in df.columns: | |
| df = df[df["poster_url"].notna() & (df["poster_url"] != "None")] | |
| if sel_budget != "All" and "budget_tier" in df.columns: | |
| df = df[df["budget_tier"] == sel_budget] | |
| return df | |
| def apply_tv_filters(df): | |
| if df.empty: return df | |
| if selected_langs and "original_language" in df.columns: | |
| df = df[df["original_language"].isin(selected_langs)] | |
| if "vote_average" in df.columns: | |
| df = df[df["vote_average"].between(rating_min, rating_max)] | |
| if "vote_count" in df.columns: | |
| df = df[df["vote_count"] >= min_votes] | |
| if trending_only and "is_trending_top50" in df.columns: | |
| df = df[df["is_trending_top50"] == True] | |
| if awards_only and "has_awards" in df.columns: | |
| df = df[df["has_awards"] == True] | |
| if has_poster and "poster_url" in df.columns: | |
| df = df[df["poster_url"].notna() & (df["poster_url"] != "None")] | |
| return df | |
| movies_f = apply_movie_filters(movies.copy()) | |
| tv_f = apply_tv_filters(tv.copy()) | |
| show_movies = media_type_sel != "TV Only" | |
| show_tv = media_type_sel != "Movies Only" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HERO HEADER | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown(""" | |
| <div style="padding:24px 0 12px 0;"> | |
| <div class="hero-title">NETFLIX <span>ANALYTICS</span> <span style="color:#6C5CE7;">2.0</span></div> | |
| <div class="hero-sub">Content Intelligence Dashboard Β· TMDB + OMDB + Trending Pipeline</div> | |
| </div>""", unsafe_allow_html=True) | |
| st.markdown("---") | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MAIN TABS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| tab_overview, tab_explorer, tab_engage, tab_genre, tab_movies, tab_tv, tab_talent, tab_trends = st.tabs([ | |
| "π Overview", | |
| "π Explorer", | |
| "π Engagement", | |
| "π Genres", | |
| "π¬ Movies", | |
| "πΊ TV Shows", | |
| "π Talent", | |
| "π Trends", | |
| ]) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 1: OVERVIEW | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_overview: | |
| sec("π KPI OVERVIEW") | |
| avg_m = movies_f["vote_average"].mean() if "vote_average" in movies_f.columns and len(movies_f)>0 else 0 | |
| avg_tv = tv_f["vote_average"].mean() if "vote_average" in tv_f.columns and len(tv_f)>0 else 0 | |
| avg_eng = movies_f["audience_engagement_score"].mean() if "audience_engagement_score" in movies_f.columns and len(movies_f)>0 else 0 | |
| total_r = movies_f["revenue_usd"].sum() if "revenue_usd" in movies_f.columns else 0 | |
| total_b = movies_f["budget_usd"].sum() if "budget_usd" in movies_f.columns else 0 | |
| n_trend = int(movies_f["is_trending_top50"].sum()) if "is_trending_top50" in movies_f.columns else 0 | |
| n_award = int(movies_f["has_awards"].sum()) if "has_awards" in movies_f.columns else 0 | |
| n_oscar = int(movies_f["won_oscar"].sum()) if "won_oscar" in movies_f.columns else 0 | |
| c1,c2,c3,c4,c5,c6,c7,c8 = st.columns(8) | |
| with c1: kpi("π¬",f"{len(movies_f):,}","Movies",accent=NF_RED) | |
| with c2: kpi("πΊ",f"{len(tv_f):,}","TV Shows",accent=PURPLE) | |
| with c3: kpi("β",f"{avg_m:.2f}","Avg Movie Rating", | |
| delta=f"TV: {avg_tv:.2f}", | |
| dt="pos" if avg_m>=avg_tv else "neg", accent=GOLD) | |
| with c4: kpi("π₯",f"{avg_eng:.1f}","Avg Engagement",accent=NF_RED) | |
| with c5: kpi("π°",f"${total_r/1e9:.1f}B","Total Revenue", | |
| delta=f"ROI {total_r/max(total_b,1):.1f}x" if total_b>0 else None, | |
| dt="pos", accent=GREEN) | |
| with c6: kpi("π",f"{n_trend:,}","Trending Movies",accent=TEAL) | |
| with c7: kpi("π",f"{n_award:,}","Award Winners",accent=GOLD) | |
| with c8: kpi("π",f"{n_oscar:,}","Oscar Winners",accent=GOLD) | |
| st.markdown("---") | |
| # Trending gallery with posters | |
| sec("π₯ TRENDING NOW β TOP PICKS WITH POSTERS") | |
| col_tl, col_tr = st.columns([3, 1], gap="large") | |
| with col_tl: | |
| if "best_weekly_rank" in movies_f.columns: | |
| trending_movies = (movies_f[movies_f["best_weekly_rank"].notna()] | |
| .sort_values("best_weekly_rank") | |
| .head(20)) | |
| if not trending_movies.empty: | |
| st.markdown("**π¬ Trending Movies (Weekly Rank)**") | |
| poster_gallery(trending_movies, title_col="title", year_col="release_year", | |
| rating_col="vote_average", poster_col="poster_url", | |
| badge_col="best_weekly_rank", badge_label="Rank", max_cards=12) | |
| else: | |
| st.markdown("**π¬ Top Rated Movies**") | |
| top_m = (movies_f[movies_f["vote_count"]>=200] | |
| .nlargest(12,"vote_average") | |
| .reset_index(drop=True)) | |
| poster_gallery(top_m, max_cards=12) | |
| else: | |
| st.markdown("**π¬ Top Rated Movies**") | |
| top_m = (movies_f.nlargest(12,"vote_average") if "vote_average" in movies_f.columns | |
| else movies_f.head(12)) | |
| poster_gallery(top_m, max_cards=12) | |
| with col_tr: | |
| sec("π HIGHLIGHTS") | |
| if not movies_f.empty and "vote_average" in movies_f.columns: | |
| vc_col = movies_f["vote_count"] if "vote_count" in movies_f.columns else pd.Series([999]*len(movies_f)) | |
| best = movies_f[vc_col>=200].nlargest(1,"vote_average") if "vote_count" in movies_f.columns else movies_f.nlargest(1,"vote_average") | |
| if not best.empty: | |
| best = best.iloc[0] | |
| insight(f"π₯ Best Rated: <strong>{best.get('title','')}</strong><br>β {best.get('vote_average',0):.1f}/10", "red") | |
| if "audience_engagement_score" in movies_f.columns and not movies_f.empty: | |
| top_eng = movies_f.nlargest(1,"audience_engagement_score") | |
| if not top_eng.empty: | |
| te = top_eng.iloc[0] | |
| insight(f"π₯ Highest Engagement: <strong>{te.get('title','')}</strong><br>Score: {te.get('audience_engagement_score',0):.1f}", "teal") | |
| if "won_oscar" in movies_f.columns: | |
| oscars = movies_f[movies_f["won_oscar"]==True] | |
| if not oscars.empty: | |
| o = oscars.nlargest(1,"vote_average").iloc[0] | |
| insight(f"π Oscar Winner: <strong>{o.get('title','')}</strong><br>β {o.get('vote_average',0):.1f}", "gold") | |
| if "imdb_votes" in movies_f.columns and not movies_f.empty: | |
| most_voted = movies_f.nlargest(1,"imdb_votes") | |
| if not most_voted.empty: | |
| mv = most_voted.iloc[0] | |
| votes = mv.get("imdb_votes",0) | |
| insight(f"π₯ Most Watched (IMDb Votes):<br><strong>{mv.get('title','')}</strong><br>{votes/1e6:.1f}M votes", "green") | |
| st.markdown("---") | |
| # Trending TV gallery | |
| if show_tv and "best_weekly_rank" in tv_f.columns: | |
| trending_tv = (tv_f[tv_f["best_weekly_rank"].notna()] | |
| .sort_values("best_weekly_rank").head(12)) | |
| if not trending_tv.empty: | |
| st.markdown("**πΊ Trending TV Shows**") | |
| poster_gallery(trending_tv, title_col="name", year_col="first_air_year", | |
| rating_col="vote_average", poster_col="poster_url", | |
| badge_col="best_weekly_rank", badge_label="Rank", max_cards=12) | |
| st.markdown("---") | |
| # Overview charts | |
| col_ov1, col_ov2 = st.columns(2, gap="large") | |
| with col_ov1: | |
| sec("π CONTENT OVER TIME") | |
| if not yoy_trend.empty and "year" in yoy_trend.columns: | |
| yoy_m = yoy_trend[yoy_trend["media_type"]=="movie"].sort_values("year") | |
| yoy_tv = yoy_trend[yoy_trend["media_type"]=="tv"].sort_values("year") | |
| fig = go.Figure() | |
| if not yoy_m.empty: | |
| fig.add_trace(go.Scatter(x=yoy_m["year"], y=yoy_m["title_count"], | |
| name="Movies", mode="lines+markers", fill="tozeroy", | |
| line=dict(color=NF_RED,width=2), | |
| fillcolor=hex_rgba(NF_RED,0.12))) | |
| if not yoy_tv.empty: | |
| fig.add_trace(go.Scatter(x=yoy_tv["year"], y=yoy_tv["title_count"], | |
| name="TV Shows", mode="lines+markers", fill="tozeroy", | |
| line=dict(color=PURPLE,width=2), | |
| fillcolor=hex_rgba(PURPLE,0.12))) | |
| fig.update_layout(xaxis_title="Year", yaxis_title="Titles Added") | |
| apply_theme(fig) | |
| st.plotly_chart(fig, use_container_width=True) | |
| elif "release_year" in movies_f.columns: | |
| by_year = movies_f.groupby("release_year").size().reset_index(name="count") | |
| fig = px.area(by_year, x="release_year", y="count", | |
| color_discrete_sequence=[NF_RED]) | |
| apply_theme(fig) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col_ov2: | |
| sec("π RATING DISTRIBUTION") | |
| fig2 = go.Figure() | |
| if show_movies and "vote_average" in movies_f.columns and not movies_f.empty: | |
| fig2.add_trace(go.Histogram(x=movies_f["vote_average"].dropna(), | |
| name="Movies", nbinsx=25, marker_color=NF_RED, opacity=0.7, histnorm="percent")) | |
| if show_tv and "vote_average" in tv_f.columns and not tv_f.empty: | |
| fig2.add_trace(go.Histogram(x=tv_f["vote_average"].dropna(), | |
| name="TV Shows", nbinsx=25, marker_color=PURPLE, opacity=0.7, histnorm="percent")) | |
| fig2.update_layout(barmode="overlay", xaxis_title="Rating", yaxis_title="% of Titles") | |
| apply_theme(fig2) | |
| st.plotly_chart(fig2, use_container_width=True) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 2: CONTENT EXPLORER | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_explorer: | |
| sec("π CONTENT EXPLORER") | |
| st.markdown("ΰΈΰΉΰΈΰΈ«ΰΈ²ΰΉΰΈ₯ΰΈ°ΰΈͺΰΈ³ΰΈ£ΰΈ§ΰΈ Content ΰΈΰΈ£ΰΉΰΈΰΈ‘ΰΈ£ΰΈΉΰΈΰΈΰΈ ΰΉΰΈ₯ΰΈ°ΰΈΰΈ°ΰΉΰΈΰΈΰΈΰΈ²ΰΈΰΈΰΈΈΰΈΰΉΰΈ«ΰΈ₯ΰΉΰΈ") | |
| col_ex1, col_ex2, col_ex3, col_ex4 = st.columns([2,1,1,1]) | |
| with col_ex1: | |
| search_q = st.text_input("π ΰΈΰΉΰΈΰΈ«ΰΈ²ΰΈΰΈ·ΰΉΰΈ", placeholder="ΰΈΰΈ΄ΰΈ‘ΰΈΰΉΰΈΰΈ·ΰΉΰΈΰΈ«ΰΈΰΈ±ΰΈ / ΰΈΰΈ΅ΰΈ£ΰΈ΅ΰΈͺΰΉ...") | |
| with col_ex2: | |
| ex_type = st.selectbox("Type", ["Movies","TV Shows"]) | |
| with col_ex3: | |
| ex_sort = st.selectbox("Sort by", [ | |
| "vote_average","audience_engagement_score","imdb_votes", | |
| "popularity","release_year","revenue_usd","rt_score" | |
| ]) | |
| with col_ex4: | |
| ex_limit = st.selectbox("Show", [24, 48, 96], index=0) | |
| # Select working df | |
| ex_df = movies_f.copy() if ex_type == "Movies" else tv_f.copy() | |
| name_col = "title" if ex_type == "Movies" else "name" | |
| year_col = "release_year" if ex_type == "Movies" else "first_air_year" | |
| if search_q and name_col in ex_df.columns: | |
| ex_df = ex_df[ex_df[name_col].fillna("").str.contains(search_q, case=False, na=False)] | |
| # Only sort by columns that exist | |
| if ex_sort in ex_df.columns: | |
| ex_df = ex_df.sort_values(ex_sort, ascending=False) | |
| st.caption(f"ΰΈΰΈ {len(ex_df):,} ΰΈ£ΰΈ²ΰΈ’ΰΈΰΈ²ΰΈ£") | |
| # Grid view | |
| view_mode = st.radio("π View", ["πΌοΈ Poster Grid", "π List View", "ποΈ Detail Card"], horizontal=True) | |
| if view_mode == "πΌοΈ Poster Grid": | |
| poster_gallery(ex_df.reset_index(drop=True), title_col=name_col, year_col=year_col, | |
| rating_col="vote_average", poster_col="poster_url", max_cards=ex_limit) | |
| elif view_mode == "π List View": | |
| show_cols = safe_cols(ex_df, [name_col, year_col, | |
| "vote_average","imdb_rating","rt_score","metacritic_score", | |
| "audience_engagement_score","imdb_votes","popularity", | |
| "original_language","rating_bucket","imdb_votes_tier"]) | |
| st.dataframe(ex_df[show_cols].head(ex_limit).reset_index(drop=True), | |
| use_container_width=True, height=500) | |
| else: # Detail Card | |
| detail_sel = st.selectbox("ΰΉΰΈ₯ΰΈ·ΰΈΰΈ Title", ex_df[name_col].dropna().head(200).tolist() if name_col in ex_df.columns else []) | |
| if detail_sel and name_col in ex_df.columns: | |
| row = ex_df[ex_df[name_col]==detail_sel].iloc[0] | |
| col_dc1, col_dc2 = st.columns([1,3], gap="large") | |
| with col_dc1: | |
| poster = row.get("poster_url","") | |
| if poster and poster != "None": | |
| st.image(poster, width=220) | |
| else: | |
| st.markdown('<div style="width:220px;height:320px;background:#1a1a1a;border-radius:10px;display:flex;align-items:center;justify-content:center;color:#555;font-size:12px;">No Image</div>', unsafe_allow_html=True) | |
| if row.get("backdrop_url") and row["backdrop_url"] != "None": | |
| with st.expander("πΌοΈ Backdrop"): | |
| st.image(row["backdrop_url"], use_container_width=True) | |
| with col_dc2: | |
| title_display = row.get(name_col,"") | |
| yr = row.get(year_col,"") | |
| lang = row.get("original_language","") | |
| st.markdown(f'<div class="detail-title">{title_display}</div>', unsafe_allow_html=True) | |
| st.markdown(f'<div class="detail-meta">π {yr} | π {lang.upper() if lang else ""}</div>', unsafe_allow_html=True) | |
| st.markdown("**Multi-Source Scores:**") | |
| score_pills( | |
| tmdb=row.get("vote_average"), | |
| imdb=row.get("imdb_rating"), | |
| rt=row.get("rt_score"), | |
| mc=row.get("metacritic_score") | |
| ) | |
| # Tags | |
| tag_html = "" | |
| genres = row.get("genres",[]) or [] | |
| for g in (genres[:5] if isinstance(genres,list) else []): | |
| tag_html += f'<span class="tag tag-genre">{g}</span>' | |
| for status_field in ["status","renewal_signal","critic_audience_verdict"]: | |
| if row.get(status_field): | |
| tag_html += f'<span class="tag tag-status">{row[status_field]}</span>' | |
| if row.get("has_awards"): | |
| tag_html += '<span class="tag tag-award">π Award Winner</span>' | |
| if row.get("won_oscar"): | |
| tag_html += '<span class="tag tag-award">π Oscar Winner</span>' | |
| if tag_html: | |
| st.markdown(tag_html, unsafe_allow_html=True) | |
| # Overview | |
| overview = row.get("overview","") | |
| if overview and overview != "None": | |
| st.markdown("---") | |
| st.markdown(f"**π Overview**") | |
| st.markdown(f'<div style="color:{TEXT_MUT};font-size:13px;line-height:1.6;">{overview}</div>', | |
| unsafe_allow_html=True) | |
| # Stats | |
| st.markdown("---") | |
| col_s1, col_s2, col_s3, col_s4 = st.columns(4) | |
| with col_s1: | |
| votes = row.get("vote_count",0) or 0 | |
| st.metric("TMDB Votes", f"{int(votes):,}" if pd.notna(votes) else "N/A") | |
| with col_s2: | |
| iv = row.get("imdb_votes",0) or 0 | |
| st.metric("IMDb Votes", f"{int(iv):,}" if pd.notna(iv) else "N/A") | |
| with col_s3: | |
| eng = row.get("audience_engagement_score") | |
| st.metric("Engagement", f"{eng:.1f}" if pd.notna(eng) else "N/A") | |
| with col_s4: | |
| pop = row.get("popularity") | |
| st.metric("Popularity", f"{pop:.0f}" if pd.notna(pop) else "N/A") | |
| # Movie-specific | |
| if ex_type == "Movies": | |
| col_m1, col_m2, col_m3 = st.columns(3) | |
| with col_m1: | |
| rev = row.get("revenue_usd",0) or 0 | |
| st.metric("Revenue", f"${rev/1e6:.0f}M" if rev>0 else "N/A") | |
| with col_m2: | |
| roi = row.get("roi") | |
| st.metric("ROI", f"{roi:.1f}x" if pd.notna(roi) else "N/A") | |
| with col_m3: | |
| rt = row.get("runtime_min") | |
| st.metric("Runtime", f"{int(rt)} min" if pd.notna(rt) else "N/A") | |
| else: | |
| col_m1, col_m2, col_m3 = st.columns(3) | |
| with col_m1: | |
| s = row.get("number_of_seasons") | |
| st.metric("Seasons", f"{int(s)}" if pd.notna(s) else "N/A") | |
| with col_m2: | |
| e = row.get("number_of_episodes") | |
| st.metric("Episodes", f"{int(e)}" if pd.notna(e) else "N/A") | |
| with col_m3: | |
| h = row.get("total_content_hours") | |
| st.metric("Total Hours", f"{h:.0f}h" if pd.notna(h) else "N/A") | |
| # Trailer | |
| yk = row.get("trailer_youtube_key") | |
| if yk and yk != "None": | |
| st.markdown("---") | |
| st.markdown(f"**π¬ Trailer**") | |
| st.video(f"https://www.youtube.com/watch?v={yk}") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 3: ENGAGEMENT & VIEWERSHIP | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_engage: | |
| sec("π ENGAGEMENT & VIEWERSHIP ANALYTICS") | |
| col_e1, col_e2 = st.columns(2, gap="large") | |
| with col_e1: | |
| st.markdown("**Audience Engagement Score Distribution**") | |
| if "audience_engagement_score" in movies_f.columns and not movies_f.empty: | |
| fig_eng = go.Figure() | |
| if show_movies: | |
| fig_eng.add_trace(go.Histogram( | |
| x=movies_f["audience_engagement_score"].dropna(), | |
| name="Movies", nbinsx=30, marker_color=NF_RED, opacity=0.75, histnorm="percent")) | |
| if show_tv and "audience_engagement_score" in tv_f.columns: | |
| fig_eng.add_trace(go.Histogram( | |
| x=tv_f["audience_engagement_score"].dropna(), | |
| name="TV Shows", nbinsx=30, marker_color=PURPLE, opacity=0.75, histnorm="percent")) | |
| fig_eng.update_layout(barmode="overlay", xaxis_title="Engagement Score", yaxis_title="%") | |
| apply_theme(fig_eng, 340) | |
| st.plotly_chart(fig_eng, use_container_width=True) | |
| with col_e2: | |
| st.markdown("**IMDb Votes Tier (Viewership Proxy)**") | |
| if "imdb_votes_tier" in movies_f.columns and not movies_f.empty: | |
| tier_order = ["Mega (1M+)","Hit (500K+)","Popular (100K+)","Moderate (10K+)","Niche (<10K)"] | |
| tier_counts = (movies_f["imdb_votes_tier"].value_counts() | |
| .reindex(tier_order, fill_value=0).reset_index()) | |
| tier_counts.columns = ["tier","count"] | |
| fig_tier = px.bar(tier_counts, x="count", y="tier", orientation="h", | |
| color="count", | |
| color_continuous_scale=["#1a0505", NF_RED], | |
| text="count", labels={"count":"Movies","tier":""}) | |
| fig_tier.update_traces(texttemplate="%{text:,}", textposition="outside") | |
| fig_tier.update_layout(yaxis={"categoryorder":"array","categoryarray":tier_order[::-1]}, | |
| coloraxis_showscale=False) | |
| apply_theme(fig_tier, 340) | |
| st.plotly_chart(fig_tier, use_container_width=True) | |
| st.markdown("---") | |
| # Scatter: Engagement vs IMDb Votes | |
| col_e3, col_e4 = st.columns([2,1], gap="large") | |
| with col_e3: | |
| st.markdown("**Engagement Score vs IMDb Votes (Viewership)**") | |
| if all(c in movies_f.columns for c in ["audience_engagement_score","imdb_votes","title"]): | |
| sc_df = movies_f[ | |
| movies_f["imdb_votes"].notna() & movies_f["audience_engagement_score"].notna() | |
| ].copy() | |
| if not sc_df.empty: | |
| sc_df["size_norm"] = sc_df["vote_count"].fillna(100).clip(100, 50000) if "vote_count" in sc_df.columns else 100 | |
| fig_sc = px.scatter( | |
| sc_df, x="imdb_votes", y="audience_engagement_score", | |
| color="vote_average" if "vote_average" in sc_df.columns else "imdb_votes", | |
| hover_name="title", | |
| size="size_norm", size_max=25, | |
| log_x=True, | |
| color_continuous_scale=["#6C1F1F", NF_RED, GOLD, GREEN], | |
| labels={"imdb_votes":"IMDb Votes (log)","audience_engagement_score":"Engagement Score"}, | |
| ) | |
| apply_theme(fig_sc, 400) | |
| st.plotly_chart(fig_sc, use_container_width=True) | |
| with col_e4: | |
| st.markdown("**π Engagement Insights**") | |
| if "audience_engagement_score" in movies_f.columns and not movies_f.empty: | |
| avg_eng = movies_f["audience_engagement_score"].mean() | |
| top10 = movies_f.nlargest(10,"audience_engagement_score")["audience_engagement_score"].mean() | |
| insight(f"Avg Engagement Score: <strong>{avg_eng:.1f}/10</strong><br>Top 10 avg: <strong>{top10:.1f}</strong>", "red") | |
| if "imdb_votes" in movies_f.columns and not movies_f.empty: | |
| mega = (movies_f["imdb_votes"] >= 1_000_000).sum() | |
| insight(f"Movies ΰΈ£ΰΈ°ΰΈΰΈ±ΰΈ Mega (1M+ IMDb Votes): <strong>{mega} ΰΉΰΈ£ΰΈ·ΰΉΰΈΰΈ</strong><br>= ΰΈΰΈ²ΰΈΰΈΰΈΉΰΉΰΈΰΈ‘ΰΈΰΈΰΈ²ΰΈΰΉΰΈ«ΰΈΰΉΰΈ‘ΰΈ²ΰΈ", "teal") | |
| if "critic_audience_verdict" in movies_f.columns and not movies_f.empty: | |
| verdict_vc = movies_f["critic_audience_verdict"].value_counts() | |
| for v, cnt in verdict_vc.items(): | |
| insight(f"<strong>{v}:</strong> {cnt:,} ΰΉΰΈ£ΰΈ·ΰΉΰΈΰΈ") | |
| # Critic vs Audience Divergence | |
| st.markdown("---") | |
| sec("π― CRITIC VS AUDIENCE DIVERGENCE") | |
| col_div1, col_div2 = st.columns([3,1], gap="large") | |
| with col_div1: | |
| if all(c in movies_f.columns for c in ["rt_score","vote_average","title"]): | |
| div_df = movies_f[ | |
| movies_f["rt_score"].notna() & movies_f["vote_average"].notna() | |
| ].copy() | |
| div_df["critic_gap"] = (div_df["rt_score"]/10) - div_df["vote_average"] | |
| fig_div = px.scatter( | |
| div_df.sample(min(500,len(div_df))), | |
| x="vote_average", y="rt_score", | |
| color="critic_audience_verdict" if "critic_audience_verdict" in div_df.columns else "vote_average", | |
| hover_name="title", | |
| color_discrete_map={"Critics Favorite":PURPLE,"Audience Favorite":NF_RED,"Consensus":TEAL}, | |
| labels={"vote_average":"TMDB Rating","rt_score":"Rotten Tomatoes (%)"}, | |
| ) | |
| fig_div.add_shape(type="line", x0=0, y0=0, x1=10, y1=100, | |
| line=dict(color=NF_BORDER, dash="dash", width=1)) | |
| apply_theme(fig_div, 380) | |
| st.plotly_chart(fig_div, use_container_width=True) | |
| with col_div2: | |
| if "critic_audience_verdict" in movies_f.columns and not movies_f.empty: | |
| vc = movies_f["critic_audience_verdict"].value_counts() | |
| for v, cnt in vc.items(): | |
| style = "teal" if "Critics" in str(v) else ("red" if "Audience" in str(v) else "") | |
| icon = "π¬" if "Critics" in str(v) else ("πΏ" if "Audience" in str(v) else "β ") | |
| insight(f"{icon} <strong>{v}</strong>: {cnt:,} movies", style) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 4: GENRE INTELLIGENCE | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_genre: | |
| sec("π GENRE INTELLIGENCE") | |
| if not genre_perf.empty: | |
| col_gp1, col_gp2 = st.columns(2, gap="large") | |
| gp_movies = genre_perf[genre_perf["media_type"]=="movie"].copy() if "media_type" in genre_perf.columns else genre_perf.copy() | |
| gp_tv = genre_perf[genre_perf["media_type"]=="tv"].copy() if "media_type" in genre_perf.columns else pd.DataFrame() | |
| with col_gp1: | |
| st.markdown("**π¬ Movie Genres β Viewership (Total IMDb Votes)**") | |
| if not gp_movies.empty and "total_imdb_votes" in gp_movies.columns: | |
| top_gm = gp_movies.nlargest(15,"total_imdb_votes") | |
| fig_gv = px.bar(top_gm, x="total_imdb_votes", y="genre", orientation="h", | |
| color="avg_vote_average", | |
| color_continuous_scale=["#3D0000",NF_RED,GOLD,GREEN], | |
| text="total_imdb_votes", | |
| labels={"total_imdb_votes":"Total IMDb Votes","genre":""}) | |
| fig_gv.update_traces(texttemplate="%{text:,.0f}", textposition="outside") | |
| fig_gv.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=True, | |
| coloraxis_colorbar=dict(title="Avg Rating", len=0.6)) | |
| apply_theme(fig_gv, 450) | |
| st.plotly_chart(fig_gv, use_container_width=True) | |
| with col_gp2: | |
| st.markdown("**πΊ TV Genres β Engagement Score**") | |
| if not gp_tv.empty and "avg_engagement" in gp_tv.columns: | |
| top_gt = gp_tv.nlargest(15,"avg_engagement") | |
| fig_ge = px.bar(top_gt, x="avg_engagement", y="genre", orientation="h", | |
| color="avg_engagement", | |
| color_continuous_scale=["#1a0040",PURPLE,"#A29BFE"], | |
| text="avg_engagement", | |
| labels={"avg_engagement":"Avg Engagement Score","genre":""}) | |
| fig_ge.update_traces(texttemplate="%{text:.2f}", textposition="outside") | |
| fig_ge.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_ge, 450) | |
| st.plotly_chart(fig_ge, use_container_width=True) | |
| st.markdown("---") | |
| # Bubble chart: Volume vs Rating vs Viewership | |
| st.markdown("**Genre Bubble: Volume Γ Rating Γ Viewership**") | |
| if not gp_movies.empty and all(c in gp_movies.columns for c in ["genre","title_count","avg_vote_average","avg_imdb_votes"]): | |
| gp_bubble = gp_movies.dropna(subset=["avg_imdb_votes"]).head(20) | |
| fig_bub = px.scatter( | |
| gp_bubble, x="title_count", y="avg_vote_average", | |
| size="avg_imdb_votes", color="genre", | |
| hover_name="genre", | |
| color_discrete_sequence=CHART_COLORS, | |
| size_max=60, | |
| labels={"title_count":"Number of Titles","avg_vote_average":"Avg Rating"}, | |
| text="genre" | |
| ) | |
| fig_bub.update_traces(textposition="top center", textfont_size=10) | |
| apply_theme(fig_bub, 420) | |
| st.plotly_chart(fig_bub, use_container_width=True) | |
| else: | |
| # Fallback to computed genres | |
| col_g1, col_g2 = st.columns(2, gap="large") | |
| with col_g1: | |
| if "genres" in movies_f.columns and not movies_f.empty: | |
| gc = (movies_f.explode("genres").groupby("genres")["title"] | |
| .count().reset_index().rename(columns={"title":"count","genres":"genre"}) | |
| .sort_values("count",ascending=False).head(15)) | |
| fig_gc = px.bar(gc, x="count", y="genre", orientation="h", | |
| color="count", color_continuous_scale=["#3D0000",NF_RED], | |
| text="count", title="Volume by Genre") | |
| fig_gc.update_traces(texttemplate="%{text:,}", textposition="outside") | |
| fig_gc.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_gc, 420) | |
| st.plotly_chart(fig_gc, use_container_width=True) | |
| with col_g2: | |
| if "genres" in movies_f.columns and "vote_average" in movies_f.columns and not movies_f.empty: | |
| gr = (movies_f.explode("genres").groupby("genres")["vote_average"] | |
| .agg(["mean","count"]).reset_index() | |
| .rename(columns={"genres":"genre","mean":"avg_rating"}) | |
| .query("count >= 10").sort_values("avg_rating",ascending=False).head(15)) | |
| fig_gr = px.bar(gr, x="avg_rating", y="genre", orientation="h", | |
| color="avg_rating", | |
| color_continuous_scale=[NF_RED,GOLD,GREEN], | |
| text="avg_rating", title="Quality by Genre") | |
| fig_gr.update_traces(texttemplate="%{text:.2f}", textposition="outside") | |
| fig_gr.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_gr, 420) | |
| st.plotly_chart(fig_gr, use_container_width=True) | |
| # Genre Γ Decade Heatmap | |
| st.markdown("---") | |
| sec("ποΈ GENRE POPULARITY BY DECADE") | |
| if "genres" in movies_f.columns and "release_year" in movies_f.columns and not movies_f.empty: | |
| movies_f_dec = movies_f.copy() | |
| movies_f_dec["decade_str"] = (movies_f_dec["release_year"] // 10 * 10).astype("Int64").astype(str) + "s" | |
| hmap = (movies_f_dec.explode("genres") | |
| .groupby(["decade_str","genres"]).size().reset_index(name="count")) | |
| if not hmap.empty: | |
| top_genres_hmap = hmap.groupby("genres")["count"].sum().nlargest(12).index.tolist() | |
| hmap_top = hmap[hmap["genres"].isin(top_genres_hmap)] | |
| pivot = hmap_top.pivot(index="genres", columns="decade_str", values="count").fillna(0) | |
| fig_hm = px.imshow(pivot, color_continuous_scale=["#0a0a0a",hex_rgba(NF_RED,0.4),NF_RED], | |
| labels={"color":"Titles"}, aspect="auto") | |
| fig_hm.update_layout(xaxis_title="Decade", yaxis_title="") | |
| apply_theme(fig_hm, 380) | |
| st.plotly_chart(fig_hm, use_container_width=True) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 5: MOVIES ANALYSIS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_movies: | |
| sec("π¬ MOVIES DEEP-DIVE") | |
| tab_m1, tab_m2, tab_m3, tab_m4 = st.tabs(["π Top Rated","π° Box Office","π Multi-Score","π’ Franchises"]) | |
| with tab_m1: | |
| col_m1, col_m2 = st.columns([3,1], gap="large") | |
| with col_m1: | |
| if "vote_average" in movies_f.columns and not movies_f.empty: | |
| # ββ FIX 2: guard optional OMDB columns βββββββββββββββββββββββ | |
| _m1_want = ["title","vote_average","vote_count","release_year","imdb_rating","has_awards"] | |
| _m1_cols = safe_cols(movies_f, _m1_want) | |
| vc_series = movies_f["vote_count"] if "vote_count" in movies_f.columns else pd.Series([999]*len(movies_f), index=movies_f.index) | |
| top_r = (movies_f[vc_series >= 200] | |
| .nlargest(12, "vote_average") | |
| [_m1_cols] | |
| .reset_index(drop=True)) | |
| fig_tr = px.bar(top_r, x="vote_average", y="title", orientation="h", | |
| color="vote_average", | |
| color_continuous_scale=["#6C1F1F",NF_RED,"#FF8C8C"], | |
| text="vote_average", | |
| custom_data=safe_cols(top_r, ["vote_count","release_year","imdb_rating"])) | |
| fig_tr.update_traces( | |
| texttemplate="%{text:.2f}", textposition="outside", | |
| hovertemplate="<b>%{y}</b><br>Rating: %{x:.2f}<extra></extra>") | |
| fig_tr.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_tr, 450) | |
| st.plotly_chart(fig_tr, use_container_width=True) | |
| with col_m2: | |
| st.markdown("#### πΌοΈ Top Picks") | |
| vc_series2 = movies_f["vote_count"] if "vote_count" in movies_f.columns else pd.Series([999]*len(movies_f), index=movies_f.index) | |
| top_imgs = (movies_f[vc_series2 >= 200].nlargest(4,"vote_average") | |
| if "vote_average" in movies_f.columns else movies_f.head(4)) | |
| for _, row in top_imgs.iterrows(): | |
| p = row.get("poster_url","") | |
| if p and p != "None": | |
| st.image(p, width=150, caption=str(row.get("title",""))[:30]) | |
| with tab_m2: | |
| if all(c in movies_f.columns for c in ["budget_usd","revenue_usd","title"]): | |
| sc_bo = movies_f[(movies_f["budget_usd"]>1e6)&(movies_f["revenue_usd"]>1e6)].copy() | |
| if not sc_bo.empty: | |
| col_b1, col_b2 = st.columns([3,1], gap="large") | |
| with col_b1: | |
| fig_bo = px.scatter( | |
| sc_bo, x="budget_usd", y="revenue_usd", | |
| color="roi" if "roi" in sc_bo.columns else "vote_average", | |
| size="vote_count" if "vote_count" in sc_bo.columns else None, | |
| hover_name="title", | |
| color_continuous_scale=["#6C1F1F",NF_RED,GOLD,GREEN], | |
| log_x=True, log_y=True, | |
| labels={"budget_usd":"Budget (USD)","revenue_usd":"Revenue (USD)"}, | |
| ) | |
| mx = max(sc_bo["budget_usd"].max(), sc_bo["revenue_usd"].max()) | |
| fig_bo.add_shape(type="line",x0=1e6,y0=1e6,x1=mx,y1=mx, | |
| line=dict(color="#444",dash="dash",width=1)) | |
| apply_theme(fig_bo, 450) | |
| st.plotly_chart(fig_bo, use_container_width=True) | |
| with col_b2: | |
| st.markdown("#### π° Box Office") | |
| top_rev = sc_bo.nlargest(5,"revenue_usd") | |
| for _, r in top_rev.iterrows(): | |
| p = r.get("poster_url","") | |
| col_pi, col_ti = st.columns([1,2]) | |
| with col_pi: | |
| if p and p!="None": st.image(p, width=60) | |
| with col_ti: | |
| st.markdown(f"**{str(r.get('title',''))[:20]}**") | |
| st.caption(f"${r.get('revenue_usd',0)/1e9:.1f}B") | |
| # Budget tier breakdown | |
| if "budget_tier" in movies_f.columns: | |
| st.markdown("---") | |
| bt = movies_f["budget_tier"].value_counts().reset_index() | |
| bt.columns = ["tier","count"] | |
| col_bt1, col_bt2 = st.columns(2) | |
| with col_bt1: | |
| fig_bt = px.pie(bt, names="tier", values="count", hole=0.5, | |
| color_discrete_sequence=CHART_COLORS, title="Movies by Budget Tier") | |
| fig_bt.update_traces(textinfo="percent+label") | |
| apply_theme(fig_bt, 300) | |
| st.plotly_chart(fig_bt, use_container_width=True) | |
| with col_bt2: | |
| if "roi" in movies_f.columns: | |
| roi_by_tier = (movies_f.groupby("budget_tier")["roi"] | |
| .mean().reset_index().rename(columns={"roi":"avg_roi"})) | |
| fig_rt = px.bar(roi_by_tier, x="budget_tier", y="avg_roi", | |
| color="avg_roi", | |
| color_continuous_scale=["#3D0000",NF_RED,GREEN], | |
| text="avg_roi", title="Avg ROI by Budget Tier") | |
| fig_rt.update_traces(texttemplate="%{text:.1f}x", textposition="outside") | |
| fig_rt.update_layout(coloraxis_showscale=False) | |
| apply_theme(fig_rt, 300) | |
| st.plotly_chart(fig_rt, use_container_width=True) | |
| with tab_m3: | |
| st.markdown("**Multi-Score Comparison: TMDB vs IMDb vs RT vs Metacritic**") | |
| if all(c in movies_f.columns for c in ["vote_average","imdb_rating"]): | |
| multi_df = movies_f.dropna(subset=["vote_average","imdb_rating"]).copy() | |
| if not multi_df.empty: | |
| multi_sample = multi_df.nlargest(50,"vote_count") if "vote_count" in multi_df.columns else multi_df.head(50) | |
| fig_ms = go.Figure() | |
| x_titles = multi_sample["title"].str[:25].tolist() | |
| for col_name, color, label in [ | |
| ("vote_average", TEAL, "TMDB (Γ10)"), | |
| ("imdb_rating", GOLD, "IMDb (Γ10)"), | |
| ]: | |
| if col_name in multi_sample.columns: | |
| fig_ms.add_trace(go.Bar( | |
| name=label, x=x_titles, | |
| y=multi_sample[col_name]*10, | |
| marker_color=color, opacity=0.8 | |
| )) | |
| if "rt_score" in multi_sample.columns: | |
| fig_ms.add_trace(go.Bar( | |
| name="RT Score", x=x_titles, | |
| y=multi_sample["rt_score"].fillna(0), | |
| marker_color=NF_RED, opacity=0.8 | |
| )) | |
| fig_ms.update_layout(barmode="group", xaxis_tickangle=-45, | |
| xaxis_title="", yaxis_title="Score (normalized to 100)") | |
| apply_theme(fig_ms, 450) | |
| st.plotly_chart(fig_ms, use_container_width=True) | |
| else: | |
| st.info("Multi-score comparison ΰΈΰΉΰΈΰΈΰΈΰΈ²ΰΈ£ΰΈΰΉΰΈΰΈ‘ΰΈΉΰΈ₯ OMDB β ΰΈ£ΰΈ±ΰΈ fetch_omdb_enrichment ΰΉΰΈ pipeline ΰΈΰΉΰΈΰΈ") | |
| with tab_m4: | |
| if not franchises.empty: | |
| st.markdown("**π’ Top Franchises by Total Revenue**") | |
| col_f1, col_f2 = st.columns([2,1], gap="large") | |
| with col_f1: | |
| top_fr = franchises.nlargest(15,"total_revenue_usd") if "total_revenue_usd" in franchises.columns else franchises.head(15) | |
| fig_fr = px.bar(top_fr, x="total_revenue_usd", y="collection_name", | |
| orientation="h", color="movie_count", | |
| color_continuous_scale=["#1a0010",PINK], | |
| text="total_revenue_usd", | |
| labels={"total_revenue_usd":"Total Revenue (USD)","collection_name":""}) | |
| fig_fr.update_traces(texttemplate="$%{text:,.0f}", textposition="outside") | |
| fig_fr.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_fr, 450) | |
| st.plotly_chart(fig_fr, use_container_width=True) | |
| with col_f2: | |
| if "franchise_roi" in franchises.columns: | |
| top_roi_fr = franchises.dropna(subset=["franchise_roi"]).nlargest(8,"franchise_roi") | |
| fig_froi = px.bar(top_roi_fr, x="franchise_roi", y="collection_name", | |
| orientation="h", color="franchise_roi", | |
| color_continuous_scale=[NF_RED,GOLD,GREEN], | |
| text="franchise_roi", title="Best ROI Franchises") | |
| fig_froi.update_traces(texttemplate="%{text:.1f}x", textposition="outside") | |
| fig_froi.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_froi, 450) | |
| st.plotly_chart(fig_froi, use_container_width=True) | |
| else: | |
| st.info("ΰΉΰΈ‘ΰΉΰΈ‘ΰΈ΅ΰΈΰΉΰΈΰΈ‘ΰΈΉΰΈ₯ Franchise β ΰΈ£ΰΈ±ΰΈ Pipeline ΰΉΰΈ«ΰΈ‘ΰΉΰΉΰΈΰΈ·ΰΉΰΈΰΈͺΰΈ£ΰΉΰΈ²ΰΈ franchises.parquet") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 6: TV SHOWS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_tv: | |
| sec("πΊ TV SHOWS DEEP-DIVE") | |
| tab_tv1, tab_tv2, tab_tv3 = st.tabs(["π Top Rated","π Structure","π± Renewal Signal"]) | |
| with tab_tv1: | |
| col_tv1, col_tv2 = st.columns([3,1], gap="large") | |
| with col_tv1: | |
| if "vote_average" in tv_f.columns and "name" in tv_f.columns and not tv_f.empty: | |
| # ββ FIX 3: guard optional OMDB columns in TV βββββββββββββββββ | |
| _tv1_want = ["name","vote_average","vote_count","number_of_seasons","imdb_rating"] | |
| _tv1_cols = safe_cols(tv_f, _tv1_want) | |
| vc_tv = tv_f["vote_count"] if "vote_count" in tv_f.columns else pd.Series([999]*len(tv_f), index=tv_f.index) | |
| top_tv_r = (tv_f[vc_tv >= 100] | |
| .nlargest(12, "vote_average") | |
| [_tv1_cols] | |
| .reset_index(drop=True)) | |
| fig_tvr = px.bar(top_tv_r, x="vote_average", y="name", orientation="h", | |
| color="vote_average", | |
| color_continuous_scale=["#1a0040",PURPLE,"#A29BFE"], | |
| text="vote_average") | |
| fig_tvr.update_traces( | |
| texttemplate="%{text:.2f}", textposition="outside", | |
| hovertemplate="<b>%{y}</b><br>Rating: %{x:.2f}<extra></extra>") | |
| fig_tvr.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_tvr, 450) | |
| st.plotly_chart(fig_tvr, use_container_width=True) | |
| with col_tv2: | |
| st.markdown("#### πΌοΈ Top Picks") | |
| vc_tv2 = tv_f["vote_count"] if "vote_count" in tv_f.columns else pd.Series([999]*len(tv_f), index=tv_f.index) | |
| tv_top_imgs = (tv_f[vc_tv2 >= 100].nlargest(4,"vote_average") | |
| if "vote_average" in tv_f.columns else tv_f.head(4)) | |
| for _, row in tv_top_imgs.iterrows(): | |
| p = row.get("poster_url","") | |
| if p and p != "None": | |
| st.image(p, width=150, caption=str(row.get("name",""))[:25]) | |
| with tab_tv2: | |
| col_ts1, col_ts2, col_ts3 = st.columns(3, gap="large") | |
| with col_ts1: | |
| if "status" in tv_f.columns and not tv_f.empty: | |
| sc = tv_f["status"].value_counts().reset_index() | |
| sc.columns = ["status","count"] | |
| colors_map = {"Returning Series":GREEN,"Ended":NF_RED,"Canceled":"#E17055", | |
| "In Production":TEAL,"Planned":PURPLE} | |
| fig_st = px.pie(sc, names="status", values="count", hole=0.55, | |
| color="status", color_discrete_map=colors_map, title="TV Status") | |
| fig_st.update_traces(textinfo="percent+label", textfont_size=10) | |
| apply_theme(fig_st, 320) | |
| st.plotly_chart(fig_st, use_container_width=True) | |
| with col_ts2: | |
| if "number_of_seasons" in tv_f.columns and not tv_f.empty: | |
| sd = (tv_f["number_of_seasons"].dropna().astype(int) | |
| .value_counts().sort_index().reset_index()) | |
| sd.columns = ["seasons","count"] | |
| sd = sd[sd["seasons"] <= 20] | |
| fig_sd = px.bar(sd, x="seasons", y="count", | |
| color="count", color_continuous_scale=["#1a0040",PURPLE], | |
| text="count", title="Seasons Distribution") | |
| fig_sd.update_traces(texttemplate="%{text}", textposition="outside") | |
| fig_sd.update_layout(coloraxis_showscale=False, bargap=0.3) | |
| apply_theme(fig_sd, 320) | |
| st.plotly_chart(fig_sd, use_container_width=True) | |
| with col_ts3: | |
| if "episode_format" in tv_f.columns and not tv_f.empty: | |
| ef = tv_f["episode_format"].value_counts().reset_index() | |
| ef.columns = ["format","count"] | |
| fig_ef = px.pie(ef, names="format", values="count", hole=0.5, | |
| color_discrete_sequence=CHART_COLORS, title="Episode Format") | |
| fig_ef.update_traces(textinfo="percent+label", textfont_size=10) | |
| apply_theme(fig_ef, 320) | |
| st.plotly_chart(fig_ef, use_container_width=True) | |
| # Total content hours | |
| if "total_content_hours" in tv_f.columns and not tv_f.empty: | |
| st.markdown("---") | |
| st.markdown("**Total Content Hours (Top 20 shows)**") | |
| top_hours = tv_f.nlargest(20,"total_content_hours")[safe_cols(tv_f,["name","total_content_hours","number_of_seasons"])].dropna() | |
| fig_hrs = px.bar(top_hours, x="total_content_hours", y="name", orientation="h", | |
| color="number_of_seasons" if "number_of_seasons" in top_hours.columns else "total_content_hours", | |
| color_continuous_scale=["#1a0040",PURPLE], | |
| text="total_content_hours", | |
| labels={"total_content_hours":"Total Watch Hours","name":""}) | |
| fig_hrs.update_traces(texttemplate="%{text:.0f}h", textposition="outside") | |
| fig_hrs.update_layout(yaxis={"categoryorder":"total ascending"}) | |
| apply_theme(fig_hrs, 400) | |
| st.plotly_chart(fig_hrs, use_container_width=True) | |
| # TV Seasons detail | |
| if not tv_seasons.empty and "season_number" in tv_seasons.columns: | |
| st.markdown("---") | |
| sec("ποΈ SEASON-LEVEL ANALYSIS") | |
| if "name" in tv_seasons.columns: | |
| sel_show = st.selectbox("ΰΉΰΈ₯ΰΈ·ΰΈΰΈ TV Show", tv_seasons["name"].dropna().unique().tolist()[:100]) | |
| show_seasons = tv_seasons[tv_seasons["name"]==sel_show].sort_values("season_number") | |
| if not show_seasons.empty: | |
| col_ss1, col_ss2 = st.columns(2) | |
| with col_ss1: | |
| if "episode_count" in show_seasons.columns: | |
| fig_ssn = px.bar(show_seasons, x="season_number", y="episode_count", | |
| color="vote_average" if "vote_average" in show_seasons.columns else "season_number", | |
| color_continuous_scale=[PURPLE,"#A29BFE"], | |
| text="episode_count", | |
| labels={"season_number":"Season","episode_count":"Episodes"}) | |
| fig_ssn.update_traces(texttemplate="%{text}", textposition="outside") | |
| fig_ssn.update_layout(coloraxis_showscale=False) | |
| apply_theme(fig_ssn, 280) | |
| st.plotly_chart(fig_ssn, use_container_width=True) | |
| with col_ss2: | |
| disp_cols = safe_cols(show_seasons, ["season_number","season_name","air_date","episode_count","vote_average","season_position"]) | |
| st.dataframe(show_seasons[disp_cols].reset_index(drop=True), | |
| use_container_width=True, height=280) | |
| with tab_tv3: | |
| if "renewal_signal" in tv_f.columns and not tv_f.empty: | |
| rs = tv_f["renewal_signal"].value_counts().reset_index() | |
| rs.columns = ["signal","count"] | |
| colors_rs = { | |
| "Renewed/Ongoing":GREEN,"Strong Candidate":TEAL, | |
| "Possible":GOLD,"Unlikely / Ended":NF_RED | |
| } | |
| col_rs1, col_rs2 = st.columns([2,1], gap="large") | |
| with col_rs1: | |
| fig_rs = px.bar(rs, x="count", y="signal", orientation="h", | |
| color="signal", color_discrete_map=colors_rs, | |
| text="count", title="TV Shows Renewal Likelihood") | |
| fig_rs.update_traces(texttemplate="%{text:,}", textposition="outside") | |
| fig_rs.update_layout(yaxis={"categoryorder":"total ascending"}, showlegend=False) | |
| apply_theme(fig_rs, 320) | |
| st.plotly_chart(fig_rs, use_container_width=True) | |
| st.markdown("**πΊ Strong Candidates β Shows Likely to Return**") | |
| strong = tv_f[tv_f["renewal_signal"]=="Strong Candidate"].nlargest(12,"vote_average") if "vote_average" in tv_f.columns else tv_f[tv_f["renewal_signal"]=="Strong Candidate"].head(12) | |
| if not strong.empty: | |
| poster_gallery(strong, title_col="name", year_col="first_air_year", | |
| rating_col="vote_average", poster_col="poster_url", max_cards=10) | |
| with col_rs2: | |
| for signal, style in [("Renewed/Ongoing","green"),("Strong Candidate","teal"), | |
| ("Possible","gold"),("Unlikely / Ended","red")]: | |
| cnt = tv_f[tv_f["renewal_signal"]==signal].shape[0] | |
| insight(f"<strong>{signal}</strong>: {cnt:,} shows", style) | |
| else: | |
| st.info("ΰΉΰΈ‘ΰΉΰΈ‘ΰΈ΅ΰΈΰΉΰΈΰΈ‘ΰΈΉΰΈ₯ renewal_signal β ΰΈΰΉΰΈΰΈΰΈ£ΰΈ±ΰΈ PySpark pipeline ΰΉΰΈ«ΰΈ‘ΰΉ") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 7: TALENT | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_talent: | |
| sec("π TALENT & CREDITS") | |
| if not top_talent.empty: | |
| col_tt1, col_tt2 = st.columns([3,1], gap="large") | |
| cast_talent = top_talent[top_talent["role"]=="cast"].copy() if "role" in top_talent.columns else top_talent.copy() | |
| crew_talent = top_talent[top_talent["role"]=="crew"].copy() if "role" in top_talent.columns else pd.DataFrame() | |
| with col_tt1: | |
| st.markdown("**π Most Impactful Cast β Avg Engagement Score**") | |
| if not cast_talent.empty and "avg_content_engagement" in cast_talent.columns: | |
| top_cast_t = cast_talent.nlargest(15,"avg_content_engagement") | |
| color_col = "total_imdb_votes_across_titles" if "total_imdb_votes_across_titles" in top_cast_t.columns else "avg_content_engagement" | |
| fig_ct = px.bar(top_cast_t, x="avg_content_engagement", y="name", | |
| orientation="h", color=color_col, | |
| color_continuous_scale=["#001433",TEAL], | |
| text="avg_content_engagement", | |
| labels={"avg_content_engagement":"Avg Engagement","name":""}) | |
| fig_ct.update_traces(texttemplate="%{text:.2f}", textposition="outside") | |
| fig_ct.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_ct, 460) | |
| st.plotly_chart(fig_ct, use_container_width=True) | |
| with col_tt2: | |
| st.markdown("**πΌοΈ Top Talent Profiles**") | |
| top5_cast = cast_talent.nlargest(8,"avg_content_engagement") if not cast_talent.empty and "avg_content_engagement" in cast_talent.columns else cast_talent.head(8) | |
| cards_html = '<div style="display:flex;flex-wrap:wrap;gap:8px;">' | |
| for _, row in top5_cast.iterrows(): | |
| profile = row.get("profile_url","") | |
| if not profile or profile == "None": | |
| profile = "https://via.placeholder.com/72x72/181818/555?text=π€" | |
| name = str(row.get("name",""))[:20] | |
| eng = row.get("avg_content_engagement",0) | |
| tc = row.get("title_count",0) | |
| cards_html += f""" | |
| <div class="talent-card" style="width:calc(50% - 4px);"> | |
| <img src="{profile}" onerror="this.src='https://via.placeholder.com/72x72/181818/555?text=π€'"/> | |
| <div class="talent-name">{name}</div> | |
| <div class="talent-meta">Score: {eng:.1f} Β· {tc} titles</div> | |
| </div>""" | |
| cards_html += "</div>" | |
| st.markdown(cards_html, unsafe_allow_html=True) | |
| st.markdown("---") | |
| if not crew_talent.empty: | |
| st.markdown("**π¬ Top Directors & Producers**") | |
| col_cr1, col_cr2 = st.columns([2,2], gap="large") | |
| with col_cr1: | |
| top_crew = crew_talent.nlargest(12,"avg_content_engagement") if "avg_content_engagement" in crew_talent.columns else crew_talent.head(12) | |
| fig_crew = px.bar(top_crew, x="avg_content_engagement", y="name", | |
| orientation="h", color="title_count" if "title_count" in top_crew.columns else "avg_content_engagement", | |
| color_continuous_scale=["#1a0a20",PURPLE], | |
| text="avg_content_engagement", | |
| labels={"avg_content_engagement":"Avg Engagement","name":""}) | |
| fig_crew.update_traces(texttemplate="%{text:.2f}", textposition="outside") | |
| fig_crew.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_crew, 400) | |
| st.plotly_chart(fig_crew, use_container_width=True) | |
| with col_cr2: | |
| if "gender" in cast_talent.columns and not cast_talent.empty: | |
| gen_dist = cast_talent["gender"].value_counts().reset_index() | |
| gen_dist.columns = ["gender","count"] | |
| fig_gen = px.pie(gen_dist, names="gender", values="count", | |
| hole=0.55, title="Cast Gender Distribution", | |
| color="gender", | |
| color_discrete_map={"Female":TEAL,"Male":PURPLE,"Unknown":NF_BORDER}) | |
| fig_gen.update_traces(textinfo="percent+label", textfont_size=12) | |
| apply_theme(fig_gen, 380) | |
| st.plotly_chart(fig_gen, use_container_width=True) | |
| else: | |
| # Fallback to credits | |
| col_c1, col_c2 = st.columns([3,1], gap="large") | |
| with col_c1: | |
| if not credits.empty and "role" in credits.columns: | |
| top_cast = (credits[credits["role"]=="cast"] | |
| .groupby("name").size().reset_index(name="appearances") | |
| .nlargest(15,"appearances")) | |
| fig_cast = px.bar(top_cast, x="appearances", y="name", orientation="h", | |
| color="appearances", color_continuous_scale=["#001433",TEAL], | |
| text="appearances", | |
| labels={"appearances":"Appearances","name":""}) | |
| fig_cast.update_traces(texttemplate="%{text}", textposition="outside") | |
| fig_cast.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_cast, 450) | |
| st.plotly_chart(fig_cast, use_container_width=True) | |
| with col_c2: | |
| if "gender" in credits.columns and not credits.empty: | |
| gd = credits[credits["role"]=="cast"]["gender"].value_counts().reset_index() | |
| gd.columns = ["gender","count"] | |
| fig_g = px.pie(gd, names="gender", values="count", hole=0.55, | |
| color="gender", | |
| color_discrete_map={"Female":TEAL,"Male":PURPLE,"Unknown":"#333"}, | |
| title="Gender Distribution") | |
| fig_g.update_traces(textinfo="percent+label") | |
| apply_theme(fig_g, 300) | |
| st.plotly_chart(fig_g, use_container_width=True) | |
| # Keywords treemap | |
| st.markdown("---") | |
| sec("π TRENDING THEMES & KEYWORDS") | |
| if not keywords.empty and "keyword" in keywords.columns: | |
| col_kw1, col_kw2 = st.columns([3,1], gap="large") | |
| with col_kw1: | |
| top_kw = keywords.groupby("keyword").size().reset_index(name="count").nlargest(30,"count") | |
| fig_kw = px.treemap(top_kw, path=["keyword"], values="count", | |
| color="count", color_continuous_scale=["#200000","#6C1F1F",NF_RED], | |
| title="Top 30 Content Themes") | |
| fig_kw.update_traces(textfont=dict(size=12,family="DM Sans")) | |
| apply_theme(fig_kw, 400) | |
| st.plotly_chart(fig_kw, use_container_width=True) | |
| with col_kw2: | |
| top5_kw = top_kw.head(5) | |
| for _, r in top5_kw.iterrows(): | |
| insight(f"π <strong>'{r['keyword']}'</strong> β {r['count']:,} titles", "red") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 8: YEAR-OVER-YEAR TRENDS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_trends: | |
| sec("π YEAR-OVER-YEAR TRENDS") | |
| if not yoy_trend.empty and "year" in yoy_trend.columns: | |
| yoy_m = yoy_trend[yoy_trend["media_type"]=="movie"].sort_values("year") if "media_type" in yoy_trend.columns else yoy_trend.sort_values("year") | |
| yoy_tv = yoy_trend[yoy_trend["media_type"]=="tv"].sort_values("year") if "media_type" in yoy_trend.columns else pd.DataFrame() | |
| col_yr1, col_yr2 = st.columns(2, gap="large") | |
| with col_yr1: | |
| st.markdown("**Avg Rating Over Time**") | |
| fig_yr1 = go.Figure() | |
| if not yoy_m.empty and "avg_rating" in yoy_m.columns: | |
| fig_yr1.add_trace(go.Scatter(x=yoy_m["year"], y=yoy_m["avg_rating"], | |
| name="Movies", mode="lines+markers", | |
| line=dict(color=NF_RED,width=2.5), marker_size=5)) | |
| if not yoy_tv.empty and "avg_rating" in yoy_tv.columns: | |
| fig_yr1.add_trace(go.Scatter(x=yoy_tv["year"], y=yoy_tv["avg_rating"], | |
| name="TV Shows", mode="lines+markers", | |
| line=dict(color=PURPLE,width=2.5), marker_size=5)) | |
| fig_yr1.update_layout(xaxis_title="Year", yaxis_title="Avg Rating") | |
| apply_theme(fig_yr1, 320) | |
| st.plotly_chart(fig_yr1, use_container_width=True) | |
| with col_yr2: | |
| st.markdown("**Avg Engagement Over Time**") | |
| fig_yr2 = go.Figure() | |
| if not yoy_m.empty and "avg_engagement" in yoy_m.columns: | |
| fig_yr2.add_trace(go.Scatter(x=yoy_m["year"], y=yoy_m["avg_engagement"], | |
| name="Movies", mode="lines", fill="tozeroy", | |
| line=dict(color=NF_RED,width=2), | |
| fillcolor=hex_rgba(NF_RED,0.12))) | |
| if not yoy_tv.empty and "avg_engagement" in yoy_tv.columns: | |
| fig_yr2.add_trace(go.Scatter(x=yoy_tv["year"], y=yoy_tv["avg_engagement"], | |
| name="TV Shows", mode="lines", fill="tozeroy", | |
| line=dict(color=PURPLE,width=2), | |
| fillcolor=hex_rgba(PURPLE,0.12))) | |
| fig_yr2.update_layout(xaxis_title="Year", yaxis_title="Avg Engagement Score") | |
| apply_theme(fig_yr2, 320) | |
| st.plotly_chart(fig_yr2, use_container_width=True) | |
| st.markdown("---") | |
| col_yr3, col_yr4 = st.columns(2, gap="large") | |
| with col_yr3: | |
| if "trending_titles" in yoy_m.columns and not yoy_m.empty: | |
| st.markdown("**Trending Titles by Year**") | |
| fig_tr = px.bar(yoy_m, x="year", y="trending_titles", | |
| color="trending_titles", | |
| color_continuous_scale=[hex_rgba(TEAL,0.3),TEAL], | |
| labels={"trending_titles":"Trending Titles","year":"Year"}) | |
| fig_tr.update_layout(coloraxis_showscale=False) | |
| apply_theme(fig_tr, 280) | |
| st.plotly_chart(fig_tr, use_container_width=True) | |
| with col_yr4: | |
| if "awarded_titles" in yoy_m.columns and not yoy_m.empty: | |
| st.markdown("**Award Winners by Year**") | |
| fig_aw = px.bar(yoy_m, x="year", y="awarded_titles", | |
| color="awarded_titles", | |
| color_continuous_scale=[hex_rgba(GOLD,0.3),GOLD], | |
| labels={"awarded_titles":"Award Winners","year":"Year"}) | |
| fig_aw.update_layout(coloraxis_showscale=False) | |
| apply_theme(fig_aw, 280) | |
| st.plotly_chart(fig_aw, use_container_width=True) | |
| if "avg_imdb_votes" in yoy_m.columns and not yoy_m.empty: | |
| st.markdown("---") | |
| st.markdown("**Avg IMDb Votes (Viewership Proxy) Over Time**") | |
| fig_iv = go.Figure() | |
| fig_iv.add_trace(go.Scatter(x=yoy_m["year"], y=yoy_m["avg_imdb_votes"], | |
| name="Movies", mode="lines+markers", | |
| line=dict(color=GOLD,width=2.5), marker_size=5, | |
| fill="tozeroy", fillcolor=hex_rgba(GOLD,0.1))) | |
| if not yoy_tv.empty and "avg_imdb_votes" in yoy_tv.columns: | |
| fig_iv.add_trace(go.Scatter(x=yoy_tv["year"], y=yoy_tv["avg_imdb_votes"], | |
| name="TV Shows", mode="lines+markers", | |
| line=dict(color=PURPLE,width=2.5), marker_size=5)) | |
| fig_iv.update_layout(xaxis_title="Year", yaxis_title="Avg IMDb Votes") | |
| apply_theme(fig_iv, 320) | |
| st.plotly_chart(fig_iv, use_container_width=True) | |
| else: | |
| st.info("ΰΉΰΈ‘ΰΉΰΈ‘ΰΈ΅ΰΈΰΉΰΈΰΈ‘ΰΈΉΰΈ₯ yoy_trend.parquet β ΰΉΰΈͺΰΈΰΈΰΈΰΈ²ΰΈΰΈΰΉΰΈΰΈ‘ΰΈΉΰΈ₯ filtered ΰΉΰΈΰΈ") | |
| if "release_year" in movies_f.columns and not movies_f.empty: | |
| yr_data = movies_f.groupby("release_year").agg( | |
| count=("title","count"), | |
| avg_rating=("vote_average","mean") | |
| ).reset_index() | |
| fig_fb = make_subplots(specs=[[{"secondary_y":True}]]) | |
| fig_fb.add_trace(go.Bar(x=yr_data["release_year"], y=yr_data["count"], | |
| name="Count", marker_color=hex_rgba(NF_RED,0.5)), secondary_y=False) | |
| fig_fb.add_trace(go.Scatter(x=yr_data["release_year"], y=yr_data["avg_rating"], | |
| name="Avg Rating", line=dict(color=GOLD,width=2.5)), secondary_y=True) | |
| fig_fb.update_layout(**PLOTLY_BASE, height=380) | |
| st.plotly_chart(fig_fb, use_container_width=True) | |
| # Language Summary | |
| st.markdown("---") | |
| sec("π LANGUAGE & REGION ANALYTICS") | |
| if not lang_summary.empty: | |
| col_ls1, col_ls2 = st.columns(2, gap="large") | |
| with col_ls1: | |
| if all(c in lang_summary.columns for c in ["original_language","title_count"]): | |
| top_lang = lang_summary.nlargest(15,"title_count").copy() | |
| lang_map = {"en":"English","ja":"Japanese","ko":"Korean","fr":"French","es":"Spanish", | |
| "de":"German","it":"Italian","pt":"Portuguese","zh":"Chinese","hi":"Hindi", | |
| "ru":"Russian","th":"Thai","ar":"Arabic","nl":"Dutch","sv":"Swedish"} | |
| top_lang["lang_name"] = top_lang["original_language"].map(lang_map).fillna(top_lang["original_language"]) | |
| fig_ls = px.bar(top_lang, x="title_count", y="lang_name", orientation="h", | |
| color="avg_popularity" if "avg_popularity" in top_lang.columns else "title_count", | |
| color_continuous_scale=["#001a33",TEAL], | |
| text="title_count", | |
| labels={"title_count":"Titles","lang_name":""}) | |
| fig_ls.update_traces(texttemplate="%{text:,}", textposition="outside") | |
| fig_ls.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_ls, 380) | |
| st.plotly_chart(fig_ls, use_container_width=True) | |
| with col_ls2: | |
| if "language_group" in lang_summary.columns: | |
| lg_grp = lang_summary.groupby("language_group")["title_count"].sum().reset_index() | |
| fig_lg = px.pie(lg_grp, names="language_group", values="title_count", | |
| hole=0.55, color_discrete_sequence=CHART_COLORS, | |
| title="Content by Language Group") | |
| fig_lg.update_traces(textinfo="percent+label", textfont_size=11) | |
| apply_theme(fig_lg, 380) | |
| st.plotly_chart(fig_lg, use_container_width=True) | |
| else: | |
| if "original_language" in movies_f.columns and not movies_f.empty: | |
| lang_cnt = movies_f["original_language"].value_counts().head(12).reset_index() | |
| lang_cnt.columns = ["language","count"] | |
| fig_lf = px.bar(lang_cnt, x="count", y="language", orientation="h", | |
| color="count", color_continuous_scale=["#001a33",TEAL], | |
| text="count") | |
| fig_lf.update_traces(texttemplate="%{text:,}", textposition="outside") | |
| fig_lf.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False) | |
| apply_theme(fig_lf, 380) | |
| st.plotly_chart(fig_lf, use_container_width=True) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # RAW DATA EXPLORER | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown("---") | |
| with st.expander("ποΈ Raw Data Explorer", expanded=False): | |
| tabs_raw = st.tabs(["π¬ Movies","πΊ TV","π Credits","π Keywords", | |
| "π Content Perf","π Genre Perf","π Talent","π YoY"]) | |
| data_map = [ | |
| movies_f, tv_f, credits, keywords, | |
| content_perf, genre_perf, top_talent, yoy_trend, | |
| ] | |
| for i, df in enumerate(data_map): | |
| with tabs_raw[i]: | |
| st.caption(f"{len(df):,} records") | |
| if not df.empty: | |
| st.dataframe(df.head(200), use_container_width=True, height=320) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # FOOTER | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown(f""" | |
| <div style="text-align:center; padding:30px 0 10px 0; color:{TEXT_DIM}; font-size:10px; letter-spacing:1.5px;"> | |
| NETFLIX ANALYTICS 2.0 Β· TMDB API + OMDB API Β· AIRFLOW β PYSPARK β HUGGINGFACE | |
| <br><span style="color:{NF_BORDER};">New Tables: content_performance Β· genre_performance Β· top_talent Β· yoy_trend Β· tv_seasons Β· language_summary</span> | |
| </div> | |
| """, unsafe_allow_html=True) |