roblox-dashboard / src /streamlit_app.py
ihhereanth's picture
Update src/streamlit_app.py
2792c26 verified
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
# ─────────────────────────────────────────────────────────────────────────────
# PAGE CONFIG
# ─────────────────────────────────────────────────────────────────────────────
st.set_page_config(
page_title="Netflix Analytics 2.0",
page_icon="🎬",
layout="wide",
initial_sidebar_state="expanded",
)
# ─────────────────────────────────────────────────────────────────────────────
# THEME CONSTANTS
# ─────────────────────────────────────────────────────────────────────────────
NF_RED = "#E50914"
NF_DARK = "#0a0a0a"
NF_CARD = "#181818"
NF_GRAY = "#222222"
NF_BORDER = "#2f2f2f"
PURPLE = "#6C5CE7"
TEAL = "#00B4D8"
GOLD = "#F39C12"
GREEN = "#00B894"
PINK = "#E84393"
TEXT_PRI = "#FFFFFF"
TEXT_MUT = "#9e9e9e"
TEXT_DIM = "#555555"
CHART_COLORS = [NF_RED, PURPLE, TEAL, GOLD, GREEN, PINK, "#A29BFE", "#55EFC4", "#FD79A8"]
def hex_rgba(h, a=0.15):
h = h.lstrip("#")
r, g, b = int(h[0:2],16), int(h[2:4],16), int(h[4:6],16)
return f"rgba({r},{g},{b},{a})"
PLOTLY_BASE = dict(
paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="DM Sans, sans-serif", color=TEXT_PRI, size=12),
xaxis=dict(gridcolor=NF_BORDER, linecolor=NF_BORDER, tickcolor=TEXT_MUT),
yaxis=dict(gridcolor=NF_BORDER, linecolor=NF_BORDER, tickcolor=TEXT_MUT),
colorway=CHART_COLORS,
legend=dict(bgcolor="rgba(0,0,0,0)", font=dict(color=TEXT_PRI)),
margin=dict(l=10, r=10, t=40, b=10),
title=dict(font=dict(size=14, color=TEXT_PRI)),
)
# ─────────────────────────────────────────────────────────────────────────────
# CSS
# ─────────────────────────────────────────────────────────────────────────────
st.markdown(f"""
<style>
@import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600;700&family=Bebas+Neue&family=Space+Grotesk:wght@400;600&display=swap');
html, body, [data-testid="stAppViewContainer"] {{
background-color: {NF_DARK};
color: {TEXT_PRI};
font-family: 'DM Sans', sans-serif;
}}
[data-testid="stAppViewContainer"] {{
background: radial-gradient(ellipse 120% 60% at 10% 0%, #1a0505 0%, {NF_DARK} 55%);
}}
[data-testid="stSidebar"] {{
background: linear-gradient(180deg, #0d0d0d 0%, #0a0a0a 100%) !important;
border-right: 1px solid {NF_BORDER};
}}
[data-testid="stSidebar"] * {{ color: {TEXT_PRI} !important; }}
[data-testid="stMetricLabel"] {{ color: {TEXT_MUT} !important; font-size:11px !important; }}
[data-testid="stMetricValue"] {{ color: {TEXT_PRI} !important; font-size:22px !important; font-weight:700; }}
hr {{ border-color: {NF_BORDER} !important; margin: 1.5rem 0; }}
/* Tabs */
[data-testid="stTabs"] [role="tab"] {{
color: {TEXT_MUT}; font-weight:500; font-family:'DM Sans',sans-serif;
padding: 8px 16px; border-radius: 6px 6px 0 0;
transition: all 0.2s;
}}
[data-testid="stTabs"] [role="tab"][aria-selected="true"] {{
color: {TEXT_PRI}; border-bottom: 2px solid {NF_RED};
background: {hex_rgba(NF_RED, 0.08)};
}}
[data-testid="stTabs"] [role="tablist"] {{
border-bottom: 1px solid {NF_BORDER};
gap: 4px;
}}
/* Expander */
[data-testid="stExpander"] {{
background: {NF_CARD}; border: 1px solid {NF_BORDER}; border-radius:10px;
}}
/* Multiselect / Input */
[data-testid="stMultiSelect"] > div > div,
[data-testid="stSelectbox"] > div > div,
.stTextInput > div > div {{
background: {NF_GRAY} !important; border-color: {NF_BORDER} !important;
border-radius: 8px !important; color: {TEXT_PRI} !important;
}}
.stSlider [data-testid="stSliderThumb"] {{ background: {NF_RED}; }}
.stCheckbox label {{ color: {TEXT_PRI} !important; }}
/* Plotly */
.js-plotly-plot .plotly {{ border-radius: 12px; }}
/* Components */
.hero-title {{
font-family: 'Bebas Neue', sans-serif;
font-size: 56px; letter-spacing: 4px; color: {TEXT_PRI}; line-height:1; margin:0;
}}
.hero-title span {{ color: {NF_RED}; }}
.hero-sub {{
font-size: 12px; color: {TEXT_MUT}; letter-spacing: 2.5px;
text-transform: uppercase; margin-top: 6px;
}}
.section-hdr {{
font-family: 'Bebas Neue', sans-serif; font-size: 24px;
letter-spacing: 2px; color: {TEXT_PRI};
padding-bottom: 6px; border-bottom: 2px solid {NF_RED};
display: inline-block; margin-bottom: 16px;
}}
.kpi-card {{
background: linear-gradient(135deg, {NF_CARD} 0%, #1e1e1e 100%);
border: 1px solid {NF_BORDER}; border-radius:12px;
padding: 18px 16px; text-align:center; position:relative; overflow:hidden;
transition: transform 0.2s, border-color 0.2s;
}}
.kpi-card::before {{
content:''; position:absolute; top:0; left:0; right:0; height:3px;
background: var(--accent, linear-gradient(90deg, {NF_RED}, {PURPLE}));
}}
.kpi-card:hover {{ transform:translateY(-3px); border-color:{NF_RED}; }}
.kpi-icon {{ font-size:26px; margin-bottom:5px; }}
.kpi-val {{ font-size:26px; font-weight:700; color:{TEXT_PRI}; line-height:1; }}
.kpi-lbl {{ font-size:10px; font-weight:600; color:{TEXT_MUT};
letter-spacing:1.5px; text-transform:uppercase; margin-top:4px; }}
.kpi-delta {{
font-size:11px; margin-top:7px; padding:2px 8px; border-radius:20px; display:inline-block;
}}
.delta-pos {{ background:rgba(0,184,148,0.2); color:{GREEN}; }}
.delta-neg {{ background:rgba(229,9,20,0.2); color:{NF_RED}; }}
.delta-neu {{ background:rgba(158,158,158,0.15); color:{TEXT_MUT}; }}
.insight-card {{
background: linear-gradient(135deg, #111 0%, #161616 100%);
border: 1px solid {NF_BORDER}; border-left: 3px solid {PURPLE};
border-radius:10px; padding:13px 15px; margin-bottom:9px;
}}
.insight-card.red {{ border-left-color:{NF_RED}; background:linear-gradient(135deg,#150505,#1a0808); }}
.insight-card.teal {{ border-left-color:{TEAL}; background:linear-gradient(135deg,#03121a,#051a24); }}
.insight-card.gold {{ border-left-color:{GOLD}; background:linear-gradient(135deg,#141004,#1a1508); }}
.insight-card.green {{ border-left-color:{GREEN}; background:linear-gradient(135deg,#031410,#051a14); }}
.insight-text {{ font-size:12.5px; color:{TEXT_MUT}; line-height:1.55; }}
.insight-text strong {{ color:{TEXT_PRI}; }}
/* Poster card grid */
.poster-grid {{ display:flex; flex-wrap:wrap; gap:12px; padding:8px 0; }}
.poster-card {{
background:{NF_CARD}; border:1px solid {NF_BORDER}; border-radius:10px;
overflow:hidden; width:150px; flex-shrink:0;
transition:transform 0.2s, border-color 0.2s; cursor:pointer;
}}
.poster-card:hover {{ transform:translateY(-5px) scale(1.02); border-color:{NF_RED}; }}
.poster-card img {{ width:100%; height:220px; object-fit:cover; display:block; }}
.poster-card .pc-info {{ padding:8px; }}
.poster-card .pc-title {{
font-size:11px; font-weight:600; color:{TEXT_PRI}; line-height:1.3;
overflow:hidden; display:-webkit-box; -webkit-line-clamp:2; -webkit-box-orient:vertical;
}}
.poster-card .pc-meta {{ font-size:10px; color:{TEXT_MUT}; margin-top:3px; }}
.poster-card .pc-badge {{
font-size:9px; font-weight:700; padding:2px 5px; border-radius:4px;
display:inline-block; margin-top:4px;
}}
.badge-rank {{ background:rgba(229,9,20,0.25); color:{NF_RED}; }}
.badge-score {{ background:rgba(243,156,18,0.25); color:{GOLD}; }}
.badge-trend {{ background:rgba(0,180,216,0.25); color:{TEAL}; }}
/* Score pill */
.score-pill {{
display:inline-flex; align-items:center; gap:5px;
padding:3px 10px; border-radius:20px; font-size:12px; font-weight:600;
margin:2px;
}}
.sp-tmdb {{ background:rgba(1,180,200,0.15); color:{TEAL}; border:1px solid {hex_rgba(TEAL,0.3)}; }}
.sp-imdb {{ background:rgba(243,156,18,0.15); color:{GOLD}; border:1px solid {hex_rgba(GOLD,0.3)}; }}
.sp-rt {{ background:rgba(229,9,20,0.15); color:{NF_RED}; border:1px solid {hex_rgba(NF_RED,0.3)};}}
.sp-mc {{ background:rgba(108,92,231,0.15); color:{PURPLE}; border:1px solid {hex_rgba(PURPLE,0.3)};}}
/* Content detail card */
.detail-card {{
background:{NF_CARD}; border:1px solid {NF_BORDER}; border-radius:14px;
padding:20px; margin-bottom:16px;
}}
.detail-title {{ font-size:22px; font-weight:700; color:{TEXT_PRI}; margin-bottom:6px; }}
.detail-meta {{ font-size:12px; color:{TEXT_MUT}; }}
.tag {{
display:inline-block; padding:3px 10px; border-radius:20px;
font-size:11px; font-weight:600; margin:3px;
}}
.tag-genre {{ background:{hex_rgba(PURPLE,0.2)}; color:{PURPLE}; border:1px solid {hex_rgba(PURPLE,0.3)}; }}
.tag-lang {{ background:{hex_rgba(TEAL,0.2)}; color:{TEAL}; border:1px solid {hex_rgba(TEAL,0.3)}; }}
.tag-status {{ background:{hex_rgba(GREEN,0.2)}; color:{GREEN}; border:1px solid {hex_rgba(GREEN,0.3)}; }}
.tag-award {{ background:{hex_rgba(GOLD,0.2)}; color:{GOLD}; border:1px solid {hex_rgba(GOLD,0.3)}; }}
/* Talent card */
.talent-card {{
background:{NF_CARD}; border:1px solid {NF_BORDER}; border-radius:10px;
text-align:center; padding:14px 10px; overflow:hidden;
transition:transform 0.2s, border-color 0.2s;
}}
.talent-card:hover {{ transform:translateY(-4px); border-color:{PURPLE}; }}
.talent-card img {{
width:72px; height:72px; border-radius:50%; object-fit:cover;
border:2px solid {NF_BORDER}; margin-bottom:8px;
}}
.talent-name {{ font-size:12px; font-weight:600; color:{TEXT_PRI}; }}
.talent-meta {{ font-size:10px; color:{TEXT_MUT}; margin-top:2px; }}
</style>
""", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────────────────────────────────────
def apply_theme(fig, h=380):
fig.update_layout(**PLOTLY_BASE, height=h)
return fig
def sec(label):
st.markdown(f'<div class="section-hdr">{label}</div>', unsafe_allow_html=True)
def insight(text, style=""):
st.markdown(f'<div class="insight-card {style}"><div class="insight-text">{text}</div></div>',
unsafe_allow_html=True)
def kpi(icon, val, lbl, delta=None, dt="neu", accent=None):
style = f"--accent: linear-gradient(90deg,{accent},{accent}66);" if accent else ""
dhtml = f'<div class="kpi-delta delta-{dt}">{delta}</div>' if delta else ""
st.markdown(f"""
<div class="kpi-card" style="{style}">
<div class="kpi-icon">{icon}</div>
<div class="kpi-val">{val}</div>
<div class="kpi-lbl">{lbl}</div>
{dhtml}
</div>""", unsafe_allow_html=True)
def score_pills(tmdb=None, imdb=None, rt=None, mc=None):
pills = ""
if tmdb is not None and not pd.isna(tmdb):
pills += f'<span class="score-pill sp-tmdb">⭐ {tmdb:.1f}</span>'
if imdb is not None and not pd.isna(imdb):
pills += f'<span class="score-pill sp-imdb">🎬 IMDb {imdb:.1f}</span>'
if rt is not None and not pd.isna(rt):
pills += f'<span class="score-pill sp-rt">πŸ… {rt}%</span>'
if mc is not None and not pd.isna(mc):
pills += f'<span class="score-pill sp-mc">🎯 MC {mc}</span>'
st.markdown(pills, unsafe_allow_html=True)
def poster_gallery(df, title_col="title", year_col="release_year", rating_col="vote_average",
poster_col="poster_url", badge_col=None, badge_label="", max_cards=20,
extra_col=None, extra_label=""):
"""Render a horizontal poster card gallery."""
items = df.head(max_cards).to_dict("records")
cards_html = '<div class="poster-grid">'
placeholder = "https://via.placeholder.com/150x220/181818/555555?text=No+Image"
for row in items:
poster = row.get(poster_col) or ""
if not poster or poster == "None":
poster = placeholder
title = str(row.get(title_col, ""))[:40]
year = row.get(year_col, "")
rating = row.get(rating_col, "")
rating_str = f"⭐ {rating:.1f}" if isinstance(rating, (int,float)) and not pd.isna(rating) else ""
badge_html = ""
if badge_col and row.get(badge_col) is not None:
bval = row[badge_col]
if isinstance(bval, float): bval = f"{bval:.1f}"
badge_html = f'<span class="pc-badge badge-rank">{badge_label} #{bval}</span>'
extra_html = ""
if extra_col and row.get(extra_col):
extra_html = f'<span class="pc-badge badge-trend">{extra_label} {row[extra_col]}</span>'
cards_html += f"""
<div class="poster-card">
<img src="{poster}" alt="{title}" onerror="this.src='{placeholder}'"/>
<div class="pc-info">
<div class="pc-title">{title}</div>
<div class="pc-meta">{year} &nbsp; {rating_str}</div>
{badge_html}{extra_html}
</div>
</div>"""
cards_html += "</div>"
st.markdown(cards_html, unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# HELPER: safe column selector
# ─────────────────────────────────────────────────────────────────────────────
def safe_cols(df, cols):
"""Return only columns that actually exist in df."""
return [c for c in cols if c in df.columns]
# ─────────────────────────────────────────────────────────────────────────────
# DATA LOADING
# ─────────────────────────────────────────────────────────────────────────────
BASE = "hf://datasets/ihhereanth/netflix_dataset/"
@st.cache_data(ttl=3600)
def load_data():
# ── FIX 1: safe_load with schema-mismatch fallback ───────────────────────
def safe_load(filename):
try:
return pd.read_parquet(BASE + filename)
except Exception as e:
# Fallback: try pyarrow directly with permissive cast
try:
import pyarrow.parquet as pq
table = pq.read_table(BASE + filename)
return table.to_pandas(strings_to_categorical=False)
except Exception as e2:
st.warning(f"⚠️ ΰΉ„ΰΈ‘ΰΉˆΰΈžΰΈš {filename}: {e2}")
return pd.DataFrame()
def to_num(df, cols):
for c in cols:
if c in df.columns:
df[c] = pd.to_numeric(df[c], errors="coerce")
return df
movies = safe_load("movies.parquet")
tv = safe_load("tv_shows.parquet")
credits = safe_load("credits.parquet")
keywords= safe_load("keywords.parquet")
m_num = ["vote_count","vote_average","runtime_min","budget_usd","revenue_usd","popularity",
"release_year","release_month","roi","imdb_rating","imdb_votes","rt_score",
"metacritic_score","audience_engagement_score","profit_usd","omdb_box_office",
"best_weekly_rank","critic_audience_gap","release_decade"]
tv_num = ["vote_count","vote_average","popularity","number_of_seasons","number_of_episodes",
"first_air_year","last_air_year","imdb_rating","imdb_votes","rt_score",
"metacritic_score","audience_engagement_score","votes_per_episode",
"total_content_hours","years_on_air","best_weekly_rank","first_air_decade"]
movies = to_num(movies, m_num)
tv = to_num(tv, tv_num)
# Derived for old pipeline compatibility
if "release_year" in movies.columns and "decade" not in movies.columns:
movies["decade"] = (movies["release_year"] // 10 * 10).astype("Int64").astype(str) + "s"
if "first_air_year" in tv.columns and "decade" not in tv.columns:
tv["decade"] = (tv["first_air_year"] // 10 * 10).astype("Int64").astype(str) + "s"
if "gender" in credits.columns:
credits["gender"] = credits["gender"].map({0:"Unknown",1:"Female",2:"Male"}).fillna("Unknown")
# New analytics tables
content_perf = safe_load("content_performance.parquet")
genre_perf = safe_load("genre_performance.parquet")
lang_summary = safe_load("language_summary.parquet")
top_talent = safe_load("top_talent.parquet")
yoy_trend = safe_load("yoy_trend.parquet")
franchises = safe_load("franchises.parquet")
tv_seasons = safe_load("tv_seasons.parquet")
num_cols_cp = ["vote_average","vote_count","popularity","imdb_rating","imdb_votes",
"rt_score","metacritic_score","audience_engagement_score",
"budget_usd","revenue_usd","roi","best_weekly_rank"]
content_perf = to_num(content_perf, num_cols_cp)
return (movies, tv, credits, keywords,
content_perf, genre_perf, lang_summary, top_talent, yoy_trend, franchises, tv_seasons)
with st.spinner("กำΰΈ₯ΰΈ±ΰΈ‡ΰΉ‚ΰΈ«ΰΈ₯ΰΈ”ΰΈ‚ΰΉ‰ΰΈ­ΰΈ‘ΰΈΉΰΈ₯ Netflix..."):
try:
(movies, tv, credits, keywords,
content_perf, genre_perf, lang_summary, top_talent, yoy_trend, franchises, tv_seasons) = load_data()
except Exception as e:
st.error(f"ΰΉ‚ΰΈ«ΰΈ₯ΰΈ”ΰΈ‚ΰΉ‰ΰΈ­ΰΈ‘ΰΈΉΰΈ₯ΰΉ„ΰΈ‘ΰΉˆΰΈͺΰΈ³ΰΉ€ΰΈ£ΰΉ‡ΰΈˆ: {e}")
st.stop()
# ─────────────────────────────────────────────────────────────────────────────
# SIDEBAR FILTERS
# ─────────────────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("""
<div style="text-align:center; padding:14px 0 22px 0;">
<div style="font-family:'Bebas Neue',sans-serif;font-size:30px;letter-spacing:3px;color:#E50914;">
NETFLIX
</div>
<div style="font-size:9px;letter-spacing:2.5px;color:#555;text-transform:uppercase;">
Analytics Dashboard 2.0
</div>
</div>""", unsafe_allow_html=True)
st.markdown("### πŸŽ›οΈ Filters")
# Media type
media_type_sel = st.radio("πŸ“½οΈ Media Type", ["All","Movies Only","TV Only"], horizontal=True)
# Genres from movies
all_genres = sorted({
g for genres in movies["genres"].dropna()
for g in (genres if isinstance(genres, list) else [])
}) if "genres" in movies.columns else []
selected_genres = st.multiselect("🎭 Genre", all_genres, default=[])
# Year range
y_min = int(movies["release_year"].min()) if "release_year" in movies.columns and len(movies)>0 else 1990
y_max = int(movies["release_year"].max()) if "release_year" in movies.columns and len(movies)>0 else 2024
year_range = st.slider("πŸ“… Release Year (Movies)", y_min, y_max, (2010, y_max))
# Language
all_langs = []
if "original_language" in movies.columns:
all_langs = sorted(movies["original_language"].dropna().unique().tolist())
selected_langs = st.multiselect("🌍 Language", all_langs, default=[])
# Rating range
rating_min, rating_max = st.slider("⭐ Rating Range", 0.0, 10.0, (5.0, 10.0), step=0.5)
# Min votes
min_votes = st.slider("πŸ—³οΈ Min Vote Count", 0, 5000, 100, step=50)
st.markdown("---")
st.markdown("### πŸ”¬ Advanced Filters")
trending_only = st.checkbox("πŸ”₯ Trending Top 50 Only")
awards_only = st.checkbox("πŸ† Awards Titles Only")
has_poster = st.checkbox("πŸ–ΌοΈ With Poster Image", value=False)
if "budget_tier" in movies.columns:
budget_tiers = ["All"] + sorted(movies["budget_tier"].dropna().unique().tolist())
sel_budget = st.selectbox("πŸ’° Budget Tier", budget_tiers)
else:
sel_budget = "All"
st.markdown("---")
st.markdown(f"""
<div style="font-size:10px;color:#333;text-align:center;">
TMDB β†’ Airflow β†’ PySpark β†’ HuggingFace<br>
Pipeline updated weekly
</div>""", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# APPLY FILTERS
# ─────────────────────────────────────────────────────────────────────────────
def apply_movie_filters(df):
if df.empty: return df
if selected_genres and "genres" in df.columns:
df = df[df["genres"].apply(lambda g: bool(set(g or []) & set(selected_genres)))]
if "release_year" in df.columns:
df = df[df["release_year"].between(*year_range)]
if selected_langs and "original_language" in df.columns:
df = df[df["original_language"].isin(selected_langs)]
if "vote_average" in df.columns:
df = df[df["vote_average"].between(rating_min, rating_max)]
if "vote_count" in df.columns:
df = df[df["vote_count"] >= min_votes]
if trending_only and "is_trending_top50" in df.columns:
df = df[df["is_trending_top50"] == True]
if awards_only and "has_awards" in df.columns:
df = df[df["has_awards"] == True]
if has_poster and "poster_url" in df.columns:
df = df[df["poster_url"].notna() & (df["poster_url"] != "None")]
if sel_budget != "All" and "budget_tier" in df.columns:
df = df[df["budget_tier"] == sel_budget]
return df
def apply_tv_filters(df):
if df.empty: return df
if selected_langs and "original_language" in df.columns:
df = df[df["original_language"].isin(selected_langs)]
if "vote_average" in df.columns:
df = df[df["vote_average"].between(rating_min, rating_max)]
if "vote_count" in df.columns:
df = df[df["vote_count"] >= min_votes]
if trending_only and "is_trending_top50" in df.columns:
df = df[df["is_trending_top50"] == True]
if awards_only and "has_awards" in df.columns:
df = df[df["has_awards"] == True]
if has_poster and "poster_url" in df.columns:
df = df[df["poster_url"].notna() & (df["poster_url"] != "None")]
return df
movies_f = apply_movie_filters(movies.copy())
tv_f = apply_tv_filters(tv.copy())
show_movies = media_type_sel != "TV Only"
show_tv = media_type_sel != "Movies Only"
# ─────────────────────────────────────────────────────────────────────────────
# HERO HEADER
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("""
<div style="padding:24px 0 12px 0;">
<div class="hero-title">NETFLIX <span>ANALYTICS</span> <span style="color:#6C5CE7;">2.0</span></div>
<div class="hero-sub">Content Intelligence Dashboard Β· TMDB + OMDB + Trending Pipeline</div>
</div>""", unsafe_allow_html=True)
st.markdown("---")
# ─────────────────────────────────────────────────────────────────────────────
# MAIN TABS
# ─────────────────────────────────────────────────────────────────────────────
tab_overview, tab_explorer, tab_engage, tab_genre, tab_movies, tab_tv, tab_talent, tab_trends = st.tabs([
"🏠 Overview",
"πŸ” Explorer",
"πŸ“Š Engagement",
"🎭 Genres",
"🎬 Movies",
"πŸ“Ί TV Shows",
"🌟 Talent",
"πŸ“ˆ Trends",
])
# ══════════════════════════════════════════════════════════════════════════════
# TAB 1: OVERVIEW
# ══════════════════════════════════════════════════════════════════════════════
with tab_overview:
sec("πŸ“Š KPI OVERVIEW")
avg_m = movies_f["vote_average"].mean() if "vote_average" in movies_f.columns and len(movies_f)>0 else 0
avg_tv = tv_f["vote_average"].mean() if "vote_average" in tv_f.columns and len(tv_f)>0 else 0
avg_eng = movies_f["audience_engagement_score"].mean() if "audience_engagement_score" in movies_f.columns and len(movies_f)>0 else 0
total_r = movies_f["revenue_usd"].sum() if "revenue_usd" in movies_f.columns else 0
total_b = movies_f["budget_usd"].sum() if "budget_usd" in movies_f.columns else 0
n_trend = int(movies_f["is_trending_top50"].sum()) if "is_trending_top50" in movies_f.columns else 0
n_award = int(movies_f["has_awards"].sum()) if "has_awards" in movies_f.columns else 0
n_oscar = int(movies_f["won_oscar"].sum()) if "won_oscar" in movies_f.columns else 0
c1,c2,c3,c4,c5,c6,c7,c8 = st.columns(8)
with c1: kpi("🎬",f"{len(movies_f):,}","Movies",accent=NF_RED)
with c2: kpi("πŸ“Ί",f"{len(tv_f):,}","TV Shows",accent=PURPLE)
with c3: kpi("⭐",f"{avg_m:.2f}","Avg Movie Rating",
delta=f"TV: {avg_tv:.2f}",
dt="pos" if avg_m>=avg_tv else "neg", accent=GOLD)
with c4: kpi("πŸ”₯",f"{avg_eng:.1f}","Avg Engagement",accent=NF_RED)
with c5: kpi("πŸ’°",f"${total_r/1e9:.1f}B","Total Revenue",
delta=f"ROI {total_r/max(total_b,1):.1f}x" if total_b>0 else None,
dt="pos", accent=GREEN)
with c6: kpi("πŸ“ˆ",f"{n_trend:,}","Trending Movies",accent=TEAL)
with c7: kpi("πŸ†",f"{n_award:,}","Award Winners",accent=GOLD)
with c8: kpi("🎭",f"{n_oscar:,}","Oscar Winners",accent=GOLD)
st.markdown("---")
# Trending gallery with posters
sec("πŸ”₯ TRENDING NOW β€” TOP PICKS WITH POSTERS")
col_tl, col_tr = st.columns([3, 1], gap="large")
with col_tl:
if "best_weekly_rank" in movies_f.columns:
trending_movies = (movies_f[movies_f["best_weekly_rank"].notna()]
.sort_values("best_weekly_rank")
.head(20))
if not trending_movies.empty:
st.markdown("**🎬 Trending Movies (Weekly Rank)**")
poster_gallery(trending_movies, title_col="title", year_col="release_year",
rating_col="vote_average", poster_col="poster_url",
badge_col="best_weekly_rank", badge_label="Rank", max_cards=12)
else:
st.markdown("**🎬 Top Rated Movies**")
top_m = (movies_f[movies_f["vote_count"]>=200]
.nlargest(12,"vote_average")
.reset_index(drop=True))
poster_gallery(top_m, max_cards=12)
else:
st.markdown("**🎬 Top Rated Movies**")
top_m = (movies_f.nlargest(12,"vote_average") if "vote_average" in movies_f.columns
else movies_f.head(12))
poster_gallery(top_m, max_cards=12)
with col_tr:
sec("πŸ“Œ HIGHLIGHTS")
if not movies_f.empty and "vote_average" in movies_f.columns:
vc_col = movies_f["vote_count"] if "vote_count" in movies_f.columns else pd.Series([999]*len(movies_f))
best = movies_f[vc_col>=200].nlargest(1,"vote_average") if "vote_count" in movies_f.columns else movies_f.nlargest(1,"vote_average")
if not best.empty:
best = best.iloc[0]
insight(f"πŸ₯‡ Best Rated: <strong>{best.get('title','')}</strong><br>⭐ {best.get('vote_average',0):.1f}/10", "red")
if "audience_engagement_score" in movies_f.columns and not movies_f.empty:
top_eng = movies_f.nlargest(1,"audience_engagement_score")
if not top_eng.empty:
te = top_eng.iloc[0]
insight(f"πŸ”₯ Highest Engagement: <strong>{te.get('title','')}</strong><br>Score: {te.get('audience_engagement_score',0):.1f}", "teal")
if "won_oscar" in movies_f.columns:
oscars = movies_f[movies_f["won_oscar"]==True]
if not oscars.empty:
o = oscars.nlargest(1,"vote_average").iloc[0]
insight(f"πŸ† Oscar Winner: <strong>{o.get('title','')}</strong><br>⭐ {o.get('vote_average',0):.1f}", "gold")
if "imdb_votes" in movies_f.columns and not movies_f.empty:
most_voted = movies_f.nlargest(1,"imdb_votes")
if not most_voted.empty:
mv = most_voted.iloc[0]
votes = mv.get("imdb_votes",0)
insight(f"πŸ‘₯ Most Watched (IMDb Votes):<br><strong>{mv.get('title','')}</strong><br>{votes/1e6:.1f}M votes", "green")
st.markdown("---")
# Trending TV gallery
if show_tv and "best_weekly_rank" in tv_f.columns:
trending_tv = (tv_f[tv_f["best_weekly_rank"].notna()]
.sort_values("best_weekly_rank").head(12))
if not trending_tv.empty:
st.markdown("**πŸ“Ί Trending TV Shows**")
poster_gallery(trending_tv, title_col="name", year_col="first_air_year",
rating_col="vote_average", poster_col="poster_url",
badge_col="best_weekly_rank", badge_label="Rank", max_cards=12)
st.markdown("---")
# Overview charts
col_ov1, col_ov2 = st.columns(2, gap="large")
with col_ov1:
sec("πŸ“… CONTENT OVER TIME")
if not yoy_trend.empty and "year" in yoy_trend.columns:
yoy_m = yoy_trend[yoy_trend["media_type"]=="movie"].sort_values("year")
yoy_tv = yoy_trend[yoy_trend["media_type"]=="tv"].sort_values("year")
fig = go.Figure()
if not yoy_m.empty:
fig.add_trace(go.Scatter(x=yoy_m["year"], y=yoy_m["title_count"],
name="Movies", mode="lines+markers", fill="tozeroy",
line=dict(color=NF_RED,width=2),
fillcolor=hex_rgba(NF_RED,0.12)))
if not yoy_tv.empty:
fig.add_trace(go.Scatter(x=yoy_tv["year"], y=yoy_tv["title_count"],
name="TV Shows", mode="lines+markers", fill="tozeroy",
line=dict(color=PURPLE,width=2),
fillcolor=hex_rgba(PURPLE,0.12)))
fig.update_layout(xaxis_title="Year", yaxis_title="Titles Added")
apply_theme(fig)
st.plotly_chart(fig, use_container_width=True)
elif "release_year" in movies_f.columns:
by_year = movies_f.groupby("release_year").size().reset_index(name="count")
fig = px.area(by_year, x="release_year", y="count",
color_discrete_sequence=[NF_RED])
apply_theme(fig)
st.plotly_chart(fig, use_container_width=True)
with col_ov2:
sec("πŸ“Š RATING DISTRIBUTION")
fig2 = go.Figure()
if show_movies and "vote_average" in movies_f.columns and not movies_f.empty:
fig2.add_trace(go.Histogram(x=movies_f["vote_average"].dropna(),
name="Movies", nbinsx=25, marker_color=NF_RED, opacity=0.7, histnorm="percent"))
if show_tv and "vote_average" in tv_f.columns and not tv_f.empty:
fig2.add_trace(go.Histogram(x=tv_f["vote_average"].dropna(),
name="TV Shows", nbinsx=25, marker_color=PURPLE, opacity=0.7, histnorm="percent"))
fig2.update_layout(barmode="overlay", xaxis_title="Rating", yaxis_title="% of Titles")
apply_theme(fig2)
st.plotly_chart(fig2, use_container_width=True)
# ══════════════════════════════════════════════════════════════════════════════
# TAB 2: CONTENT EXPLORER
# ══════════════════════════════════════════════════════════════════════════════
with tab_explorer:
sec("πŸ” CONTENT EXPLORER")
st.markdown("ค้นหาแΰΈ₯ΰΈ°ΰΈͺำรวจ Content ΰΈžΰΈ£ΰΉ‰ΰΈ­ΰΈ‘ΰΈ£ΰΈΉΰΈ›ΰΈ›ΰΈ แΰΈ₯ΰΈ°ΰΈ„ΰΈ°ΰΉΰΈ™ΰΈ™ΰΈˆΰΈ²ΰΈΰΈ—ΰΈΈΰΈΰΉΰΈ«ΰΈ₯ΰΉˆΰΈ‡")
col_ex1, col_ex2, col_ex3, col_ex4 = st.columns([2,1,1,1])
with col_ex1:
search_q = st.text_input("πŸ”Ž ΰΈ„ΰΉ‰ΰΈ™ΰΈ«ΰΈ²ΰΈŠΰΈ·ΰΉˆΰΈ­", placeholder="ΰΈžΰΈ΄ΰΈ‘ΰΈžΰΉŒΰΈŠΰΈ·ΰΉˆΰΈ­ΰΈ«ΰΈ™ΰΈ±ΰΈ‡ / ΰΈ‹ΰΈ΅ΰΈ£ΰΈ΅ΰΈͺ์...")
with col_ex2:
ex_type = st.selectbox("Type", ["Movies","TV Shows"])
with col_ex3:
ex_sort = st.selectbox("Sort by", [
"vote_average","audience_engagement_score","imdb_votes",
"popularity","release_year","revenue_usd","rt_score"
])
with col_ex4:
ex_limit = st.selectbox("Show", [24, 48, 96], index=0)
# Select working df
ex_df = movies_f.copy() if ex_type == "Movies" else tv_f.copy()
name_col = "title" if ex_type == "Movies" else "name"
year_col = "release_year" if ex_type == "Movies" else "first_air_year"
if search_q and name_col in ex_df.columns:
ex_df = ex_df[ex_df[name_col].fillna("").str.contains(search_q, case=False, na=False)]
# Only sort by columns that exist
if ex_sort in ex_df.columns:
ex_df = ex_df.sort_values(ex_sort, ascending=False)
st.caption(f"พบ {len(ex_df):,} ราฒการ")
# Grid view
view_mode = st.radio("πŸ“ View", ["πŸ–ΌοΈ Poster Grid", "πŸ“‹ List View", "πŸ—‚οΈ Detail Card"], horizontal=True)
if view_mode == "πŸ–ΌοΈ Poster Grid":
poster_gallery(ex_df.reset_index(drop=True), title_col=name_col, year_col=year_col,
rating_col="vote_average", poster_col="poster_url", max_cards=ex_limit)
elif view_mode == "πŸ“‹ List View":
show_cols = safe_cols(ex_df, [name_col, year_col,
"vote_average","imdb_rating","rt_score","metacritic_score",
"audience_engagement_score","imdb_votes","popularity",
"original_language","rating_bucket","imdb_votes_tier"])
st.dataframe(ex_df[show_cols].head(ex_limit).reset_index(drop=True),
use_container_width=True, height=500)
else: # Detail Card
detail_sel = st.selectbox("ΰΉ€ΰΈ₯ือก Title", ex_df[name_col].dropna().head(200).tolist() if name_col in ex_df.columns else [])
if detail_sel and name_col in ex_df.columns:
row = ex_df[ex_df[name_col]==detail_sel].iloc[0]
col_dc1, col_dc2 = st.columns([1,3], gap="large")
with col_dc1:
poster = row.get("poster_url","")
if poster and poster != "None":
st.image(poster, width=220)
else:
st.markdown('<div style="width:220px;height:320px;background:#1a1a1a;border-radius:10px;display:flex;align-items:center;justify-content:center;color:#555;font-size:12px;">No Image</div>', unsafe_allow_html=True)
if row.get("backdrop_url") and row["backdrop_url"] != "None":
with st.expander("πŸ–ΌοΈ Backdrop"):
st.image(row["backdrop_url"], use_container_width=True)
with col_dc2:
title_display = row.get(name_col,"")
yr = row.get(year_col,"")
lang = row.get("original_language","")
st.markdown(f'<div class="detail-title">{title_display}</div>', unsafe_allow_html=True)
st.markdown(f'<div class="detail-meta">πŸ“… {yr} &nbsp;|&nbsp; 🌍 {lang.upper() if lang else ""}</div>', unsafe_allow_html=True)
st.markdown("**Multi-Source Scores:**")
score_pills(
tmdb=row.get("vote_average"),
imdb=row.get("imdb_rating"),
rt=row.get("rt_score"),
mc=row.get("metacritic_score")
)
# Tags
tag_html = ""
genres = row.get("genres",[]) or []
for g in (genres[:5] if isinstance(genres,list) else []):
tag_html += f'<span class="tag tag-genre">{g}</span>'
for status_field in ["status","renewal_signal","critic_audience_verdict"]:
if row.get(status_field):
tag_html += f'<span class="tag tag-status">{row[status_field]}</span>'
if row.get("has_awards"):
tag_html += '<span class="tag tag-award">πŸ† Award Winner</span>'
if row.get("won_oscar"):
tag_html += '<span class="tag tag-award">🎭 Oscar Winner</span>'
if tag_html:
st.markdown(tag_html, unsafe_allow_html=True)
# Overview
overview = row.get("overview","")
if overview and overview != "None":
st.markdown("---")
st.markdown(f"**πŸ“ Overview**")
st.markdown(f'<div style="color:{TEXT_MUT};font-size:13px;line-height:1.6;">{overview}</div>',
unsafe_allow_html=True)
# Stats
st.markdown("---")
col_s1, col_s2, col_s3, col_s4 = st.columns(4)
with col_s1:
votes = row.get("vote_count",0) or 0
st.metric("TMDB Votes", f"{int(votes):,}" if pd.notna(votes) else "N/A")
with col_s2:
iv = row.get("imdb_votes",0) or 0
st.metric("IMDb Votes", f"{int(iv):,}" if pd.notna(iv) else "N/A")
with col_s3:
eng = row.get("audience_engagement_score")
st.metric("Engagement", f"{eng:.1f}" if pd.notna(eng) else "N/A")
with col_s4:
pop = row.get("popularity")
st.metric("Popularity", f"{pop:.0f}" if pd.notna(pop) else "N/A")
# Movie-specific
if ex_type == "Movies":
col_m1, col_m2, col_m3 = st.columns(3)
with col_m1:
rev = row.get("revenue_usd",0) or 0
st.metric("Revenue", f"${rev/1e6:.0f}M" if rev>0 else "N/A")
with col_m2:
roi = row.get("roi")
st.metric("ROI", f"{roi:.1f}x" if pd.notna(roi) else "N/A")
with col_m3:
rt = row.get("runtime_min")
st.metric("Runtime", f"{int(rt)} min" if pd.notna(rt) else "N/A")
else:
col_m1, col_m2, col_m3 = st.columns(3)
with col_m1:
s = row.get("number_of_seasons")
st.metric("Seasons", f"{int(s)}" if pd.notna(s) else "N/A")
with col_m2:
e = row.get("number_of_episodes")
st.metric("Episodes", f"{int(e)}" if pd.notna(e) else "N/A")
with col_m3:
h = row.get("total_content_hours")
st.metric("Total Hours", f"{h:.0f}h" if pd.notna(h) else "N/A")
# Trailer
yk = row.get("trailer_youtube_key")
if yk and yk != "None":
st.markdown("---")
st.markdown(f"**🎬 Trailer**")
st.video(f"https://www.youtube.com/watch?v={yk}")
# ══════════════════════════════════════════════════════════════════════════════
# TAB 3: ENGAGEMENT & VIEWERSHIP
# ══════════════════════════════════════════════════════════════════════════════
with tab_engage:
sec("πŸ“Š ENGAGEMENT & VIEWERSHIP ANALYTICS")
col_e1, col_e2 = st.columns(2, gap="large")
with col_e1:
st.markdown("**Audience Engagement Score Distribution**")
if "audience_engagement_score" in movies_f.columns and not movies_f.empty:
fig_eng = go.Figure()
if show_movies:
fig_eng.add_trace(go.Histogram(
x=movies_f["audience_engagement_score"].dropna(),
name="Movies", nbinsx=30, marker_color=NF_RED, opacity=0.75, histnorm="percent"))
if show_tv and "audience_engagement_score" in tv_f.columns:
fig_eng.add_trace(go.Histogram(
x=tv_f["audience_engagement_score"].dropna(),
name="TV Shows", nbinsx=30, marker_color=PURPLE, opacity=0.75, histnorm="percent"))
fig_eng.update_layout(barmode="overlay", xaxis_title="Engagement Score", yaxis_title="%")
apply_theme(fig_eng, 340)
st.plotly_chart(fig_eng, use_container_width=True)
with col_e2:
st.markdown("**IMDb Votes Tier (Viewership Proxy)**")
if "imdb_votes_tier" in movies_f.columns and not movies_f.empty:
tier_order = ["Mega (1M+)","Hit (500K+)","Popular (100K+)","Moderate (10K+)","Niche (<10K)"]
tier_counts = (movies_f["imdb_votes_tier"].value_counts()
.reindex(tier_order, fill_value=0).reset_index())
tier_counts.columns = ["tier","count"]
fig_tier = px.bar(tier_counts, x="count", y="tier", orientation="h",
color="count",
color_continuous_scale=["#1a0505", NF_RED],
text="count", labels={"count":"Movies","tier":""})
fig_tier.update_traces(texttemplate="%{text:,}", textposition="outside")
fig_tier.update_layout(yaxis={"categoryorder":"array","categoryarray":tier_order[::-1]},
coloraxis_showscale=False)
apply_theme(fig_tier, 340)
st.plotly_chart(fig_tier, use_container_width=True)
st.markdown("---")
# Scatter: Engagement vs IMDb Votes
col_e3, col_e4 = st.columns([2,1], gap="large")
with col_e3:
st.markdown("**Engagement Score vs IMDb Votes (Viewership)**")
if all(c in movies_f.columns for c in ["audience_engagement_score","imdb_votes","title"]):
sc_df = movies_f[
movies_f["imdb_votes"].notna() & movies_f["audience_engagement_score"].notna()
].copy()
if not sc_df.empty:
sc_df["size_norm"] = sc_df["vote_count"].fillna(100).clip(100, 50000) if "vote_count" in sc_df.columns else 100
fig_sc = px.scatter(
sc_df, x="imdb_votes", y="audience_engagement_score",
color="vote_average" if "vote_average" in sc_df.columns else "imdb_votes",
hover_name="title",
size="size_norm", size_max=25,
log_x=True,
color_continuous_scale=["#6C1F1F", NF_RED, GOLD, GREEN],
labels={"imdb_votes":"IMDb Votes (log)","audience_engagement_score":"Engagement Score"},
)
apply_theme(fig_sc, 400)
st.plotly_chart(fig_sc, use_container_width=True)
with col_e4:
st.markdown("**πŸ“Œ Engagement Insights**")
if "audience_engagement_score" in movies_f.columns and not movies_f.empty:
avg_eng = movies_f["audience_engagement_score"].mean()
top10 = movies_f.nlargest(10,"audience_engagement_score")["audience_engagement_score"].mean()
insight(f"Avg Engagement Score: <strong>{avg_eng:.1f}/10</strong><br>Top 10 avg: <strong>{top10:.1f}</strong>", "red")
if "imdb_votes" in movies_f.columns and not movies_f.empty:
mega = (movies_f["imdb_votes"] >= 1_000_000).sum()
insight(f"Movies ΰΈ£ΰΈ°ΰΈ”ΰΈ±ΰΈš Mega (1M+ IMDb Votes): <strong>{mega} ΰΉ€ΰΈ£ΰΈ·ΰΉˆΰΈ­ΰΈ‡</strong><br>= ΰΈΰΈ²ΰΈ™ΰΈœΰΈΉΰΉ‰ΰΈŠΰΈ‘ΰΈ‚ΰΈ™ΰΈ²ΰΈ”ΰΉƒΰΈ«ΰΈΰΉˆΰΈ‘ΰΈ²ΰΈ", "teal")
if "critic_audience_verdict" in movies_f.columns and not movies_f.empty:
verdict_vc = movies_f["critic_audience_verdict"].value_counts()
for v, cnt in verdict_vc.items():
insight(f"<strong>{v}:</strong> {cnt:,} ΰΉ€ΰΈ£ΰΈ·ΰΉˆΰΈ­ΰΈ‡")
# Critic vs Audience Divergence
st.markdown("---")
sec("🎯 CRITIC VS AUDIENCE DIVERGENCE")
col_div1, col_div2 = st.columns([3,1], gap="large")
with col_div1:
if all(c in movies_f.columns for c in ["rt_score","vote_average","title"]):
div_df = movies_f[
movies_f["rt_score"].notna() & movies_f["vote_average"].notna()
].copy()
div_df["critic_gap"] = (div_df["rt_score"]/10) - div_df["vote_average"]
fig_div = px.scatter(
div_df.sample(min(500,len(div_df))),
x="vote_average", y="rt_score",
color="critic_audience_verdict" if "critic_audience_verdict" in div_df.columns else "vote_average",
hover_name="title",
color_discrete_map={"Critics Favorite":PURPLE,"Audience Favorite":NF_RED,"Consensus":TEAL},
labels={"vote_average":"TMDB Rating","rt_score":"Rotten Tomatoes (%)"},
)
fig_div.add_shape(type="line", x0=0, y0=0, x1=10, y1=100,
line=dict(color=NF_BORDER, dash="dash", width=1))
apply_theme(fig_div, 380)
st.plotly_chart(fig_div, use_container_width=True)
with col_div2:
if "critic_audience_verdict" in movies_f.columns and not movies_f.empty:
vc = movies_f["critic_audience_verdict"].value_counts()
for v, cnt in vc.items():
style = "teal" if "Critics" in str(v) else ("red" if "Audience" in str(v) else "")
icon = "🎬" if "Critics" in str(v) else ("🍿" if "Audience" in str(v) else "βœ…")
insight(f"{icon} <strong>{v}</strong>: {cnt:,} movies", style)
# ══════════════════════════════════════════════════════════════════════════════
# TAB 4: GENRE INTELLIGENCE
# ══════════════════════════════════════════════════════════════════════════════
with tab_genre:
sec("🎭 GENRE INTELLIGENCE")
if not genre_perf.empty:
col_gp1, col_gp2 = st.columns(2, gap="large")
gp_movies = genre_perf[genre_perf["media_type"]=="movie"].copy() if "media_type" in genre_perf.columns else genre_perf.copy()
gp_tv = genre_perf[genre_perf["media_type"]=="tv"].copy() if "media_type" in genre_perf.columns else pd.DataFrame()
with col_gp1:
st.markdown("**🎬 Movie Genres β€” Viewership (Total IMDb Votes)**")
if not gp_movies.empty and "total_imdb_votes" in gp_movies.columns:
top_gm = gp_movies.nlargest(15,"total_imdb_votes")
fig_gv = px.bar(top_gm, x="total_imdb_votes", y="genre", orientation="h",
color="avg_vote_average",
color_continuous_scale=["#3D0000",NF_RED,GOLD,GREEN],
text="total_imdb_votes",
labels={"total_imdb_votes":"Total IMDb Votes","genre":""})
fig_gv.update_traces(texttemplate="%{text:,.0f}", textposition="outside")
fig_gv.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=True,
coloraxis_colorbar=dict(title="Avg Rating", len=0.6))
apply_theme(fig_gv, 450)
st.plotly_chart(fig_gv, use_container_width=True)
with col_gp2:
st.markdown("**πŸ“Ί TV Genres β€” Engagement Score**")
if not gp_tv.empty and "avg_engagement" in gp_tv.columns:
top_gt = gp_tv.nlargest(15,"avg_engagement")
fig_ge = px.bar(top_gt, x="avg_engagement", y="genre", orientation="h",
color="avg_engagement",
color_continuous_scale=["#1a0040",PURPLE,"#A29BFE"],
text="avg_engagement",
labels={"avg_engagement":"Avg Engagement Score","genre":""})
fig_ge.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig_ge.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_ge, 450)
st.plotly_chart(fig_ge, use_container_width=True)
st.markdown("---")
# Bubble chart: Volume vs Rating vs Viewership
st.markdown("**Genre Bubble: Volume Γ— Rating Γ— Viewership**")
if not gp_movies.empty and all(c in gp_movies.columns for c in ["genre","title_count","avg_vote_average","avg_imdb_votes"]):
gp_bubble = gp_movies.dropna(subset=["avg_imdb_votes"]).head(20)
fig_bub = px.scatter(
gp_bubble, x="title_count", y="avg_vote_average",
size="avg_imdb_votes", color="genre",
hover_name="genre",
color_discrete_sequence=CHART_COLORS,
size_max=60,
labels={"title_count":"Number of Titles","avg_vote_average":"Avg Rating"},
text="genre"
)
fig_bub.update_traces(textposition="top center", textfont_size=10)
apply_theme(fig_bub, 420)
st.plotly_chart(fig_bub, use_container_width=True)
else:
# Fallback to computed genres
col_g1, col_g2 = st.columns(2, gap="large")
with col_g1:
if "genres" in movies_f.columns and not movies_f.empty:
gc = (movies_f.explode("genres").groupby("genres")["title"]
.count().reset_index().rename(columns={"title":"count","genres":"genre"})
.sort_values("count",ascending=False).head(15))
fig_gc = px.bar(gc, x="count", y="genre", orientation="h",
color="count", color_continuous_scale=["#3D0000",NF_RED],
text="count", title="Volume by Genre")
fig_gc.update_traces(texttemplate="%{text:,}", textposition="outside")
fig_gc.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_gc, 420)
st.plotly_chart(fig_gc, use_container_width=True)
with col_g2:
if "genres" in movies_f.columns and "vote_average" in movies_f.columns and not movies_f.empty:
gr = (movies_f.explode("genres").groupby("genres")["vote_average"]
.agg(["mean","count"]).reset_index()
.rename(columns={"genres":"genre","mean":"avg_rating"})
.query("count >= 10").sort_values("avg_rating",ascending=False).head(15))
fig_gr = px.bar(gr, x="avg_rating", y="genre", orientation="h",
color="avg_rating",
color_continuous_scale=[NF_RED,GOLD,GREEN],
text="avg_rating", title="Quality by Genre")
fig_gr.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig_gr.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_gr, 420)
st.plotly_chart(fig_gr, use_container_width=True)
# Genre Γ— Decade Heatmap
st.markdown("---")
sec("πŸ—“οΈ GENRE POPULARITY BY DECADE")
if "genres" in movies_f.columns and "release_year" in movies_f.columns and not movies_f.empty:
movies_f_dec = movies_f.copy()
movies_f_dec["decade_str"] = (movies_f_dec["release_year"] // 10 * 10).astype("Int64").astype(str) + "s"
hmap = (movies_f_dec.explode("genres")
.groupby(["decade_str","genres"]).size().reset_index(name="count"))
if not hmap.empty:
top_genres_hmap = hmap.groupby("genres")["count"].sum().nlargest(12).index.tolist()
hmap_top = hmap[hmap["genres"].isin(top_genres_hmap)]
pivot = hmap_top.pivot(index="genres", columns="decade_str", values="count").fillna(0)
fig_hm = px.imshow(pivot, color_continuous_scale=["#0a0a0a",hex_rgba(NF_RED,0.4),NF_RED],
labels={"color":"Titles"}, aspect="auto")
fig_hm.update_layout(xaxis_title="Decade", yaxis_title="")
apply_theme(fig_hm, 380)
st.plotly_chart(fig_hm, use_container_width=True)
# ══════════════════════════════════════════════════════════════════════════════
# TAB 5: MOVIES ANALYSIS
# ══════════════════════════════════════════════════════════════════════════════
with tab_movies:
sec("🎬 MOVIES DEEP-DIVE")
tab_m1, tab_m2, tab_m3, tab_m4 = st.tabs(["πŸ† Top Rated","πŸ’° Box Office","🎭 Multi-Score","🏒 Franchises"])
with tab_m1:
col_m1, col_m2 = st.columns([3,1], gap="large")
with col_m1:
if "vote_average" in movies_f.columns and not movies_f.empty:
# ── FIX 2: guard optional OMDB columns ───────────────────────
_m1_want = ["title","vote_average","vote_count","release_year","imdb_rating","has_awards"]
_m1_cols = safe_cols(movies_f, _m1_want)
vc_series = movies_f["vote_count"] if "vote_count" in movies_f.columns else pd.Series([999]*len(movies_f), index=movies_f.index)
top_r = (movies_f[vc_series >= 200]
.nlargest(12, "vote_average")
[_m1_cols]
.reset_index(drop=True))
fig_tr = px.bar(top_r, x="vote_average", y="title", orientation="h",
color="vote_average",
color_continuous_scale=["#6C1F1F",NF_RED,"#FF8C8C"],
text="vote_average",
custom_data=safe_cols(top_r, ["vote_count","release_year","imdb_rating"]))
fig_tr.update_traces(
texttemplate="%{text:.2f}", textposition="outside",
hovertemplate="<b>%{y}</b><br>Rating: %{x:.2f}<extra></extra>")
fig_tr.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_tr, 450)
st.plotly_chart(fig_tr, use_container_width=True)
with col_m2:
st.markdown("#### πŸ–ΌοΈ Top Picks")
vc_series2 = movies_f["vote_count"] if "vote_count" in movies_f.columns else pd.Series([999]*len(movies_f), index=movies_f.index)
top_imgs = (movies_f[vc_series2 >= 200].nlargest(4,"vote_average")
if "vote_average" in movies_f.columns else movies_f.head(4))
for _, row in top_imgs.iterrows():
p = row.get("poster_url","")
if p and p != "None":
st.image(p, width=150, caption=str(row.get("title",""))[:30])
with tab_m2:
if all(c in movies_f.columns for c in ["budget_usd","revenue_usd","title"]):
sc_bo = movies_f[(movies_f["budget_usd"]>1e6)&(movies_f["revenue_usd"]>1e6)].copy()
if not sc_bo.empty:
col_b1, col_b2 = st.columns([3,1], gap="large")
with col_b1:
fig_bo = px.scatter(
sc_bo, x="budget_usd", y="revenue_usd",
color="roi" if "roi" in sc_bo.columns else "vote_average",
size="vote_count" if "vote_count" in sc_bo.columns else None,
hover_name="title",
color_continuous_scale=["#6C1F1F",NF_RED,GOLD,GREEN],
log_x=True, log_y=True,
labels={"budget_usd":"Budget (USD)","revenue_usd":"Revenue (USD)"},
)
mx = max(sc_bo["budget_usd"].max(), sc_bo["revenue_usd"].max())
fig_bo.add_shape(type="line",x0=1e6,y0=1e6,x1=mx,y1=mx,
line=dict(color="#444",dash="dash",width=1))
apply_theme(fig_bo, 450)
st.plotly_chart(fig_bo, use_container_width=True)
with col_b2:
st.markdown("#### πŸ’° Box Office")
top_rev = sc_bo.nlargest(5,"revenue_usd")
for _, r in top_rev.iterrows():
p = r.get("poster_url","")
col_pi, col_ti = st.columns([1,2])
with col_pi:
if p and p!="None": st.image(p, width=60)
with col_ti:
st.markdown(f"**{str(r.get('title',''))[:20]}**")
st.caption(f"${r.get('revenue_usd',0)/1e9:.1f}B")
# Budget tier breakdown
if "budget_tier" in movies_f.columns:
st.markdown("---")
bt = movies_f["budget_tier"].value_counts().reset_index()
bt.columns = ["tier","count"]
col_bt1, col_bt2 = st.columns(2)
with col_bt1:
fig_bt = px.pie(bt, names="tier", values="count", hole=0.5,
color_discrete_sequence=CHART_COLORS, title="Movies by Budget Tier")
fig_bt.update_traces(textinfo="percent+label")
apply_theme(fig_bt, 300)
st.plotly_chart(fig_bt, use_container_width=True)
with col_bt2:
if "roi" in movies_f.columns:
roi_by_tier = (movies_f.groupby("budget_tier")["roi"]
.mean().reset_index().rename(columns={"roi":"avg_roi"}))
fig_rt = px.bar(roi_by_tier, x="budget_tier", y="avg_roi",
color="avg_roi",
color_continuous_scale=["#3D0000",NF_RED,GREEN],
text="avg_roi", title="Avg ROI by Budget Tier")
fig_rt.update_traces(texttemplate="%{text:.1f}x", textposition="outside")
fig_rt.update_layout(coloraxis_showscale=False)
apply_theme(fig_rt, 300)
st.plotly_chart(fig_rt, use_container_width=True)
with tab_m3:
st.markdown("**Multi-Score Comparison: TMDB vs IMDb vs RT vs Metacritic**")
if all(c in movies_f.columns for c in ["vote_average","imdb_rating"]):
multi_df = movies_f.dropna(subset=["vote_average","imdb_rating"]).copy()
if not multi_df.empty:
multi_sample = multi_df.nlargest(50,"vote_count") if "vote_count" in multi_df.columns else multi_df.head(50)
fig_ms = go.Figure()
x_titles = multi_sample["title"].str[:25].tolist()
for col_name, color, label in [
("vote_average", TEAL, "TMDB (Γ—10)"),
("imdb_rating", GOLD, "IMDb (Γ—10)"),
]:
if col_name in multi_sample.columns:
fig_ms.add_trace(go.Bar(
name=label, x=x_titles,
y=multi_sample[col_name]*10,
marker_color=color, opacity=0.8
))
if "rt_score" in multi_sample.columns:
fig_ms.add_trace(go.Bar(
name="RT Score", x=x_titles,
y=multi_sample["rt_score"].fillna(0),
marker_color=NF_RED, opacity=0.8
))
fig_ms.update_layout(barmode="group", xaxis_tickangle=-45,
xaxis_title="", yaxis_title="Score (normalized to 100)")
apply_theme(fig_ms, 450)
st.plotly_chart(fig_ms, use_container_width=True)
else:
st.info("Multi-score comparison ต้องการข้อฑูΰΈ₯ OMDB β€” ΰΈ£ΰΈ±ΰΈ™ fetch_omdb_enrichment ΰΉƒΰΈ™ pipeline ΰΈΰΉˆΰΈ­ΰΈ™")
with tab_m4:
if not franchises.empty:
st.markdown("**🏒 Top Franchises by Total Revenue**")
col_f1, col_f2 = st.columns([2,1], gap="large")
with col_f1:
top_fr = franchises.nlargest(15,"total_revenue_usd") if "total_revenue_usd" in franchises.columns else franchises.head(15)
fig_fr = px.bar(top_fr, x="total_revenue_usd", y="collection_name",
orientation="h", color="movie_count",
color_continuous_scale=["#1a0010",PINK],
text="total_revenue_usd",
labels={"total_revenue_usd":"Total Revenue (USD)","collection_name":""})
fig_fr.update_traces(texttemplate="$%{text:,.0f}", textposition="outside")
fig_fr.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_fr, 450)
st.plotly_chart(fig_fr, use_container_width=True)
with col_f2:
if "franchise_roi" in franchises.columns:
top_roi_fr = franchises.dropna(subset=["franchise_roi"]).nlargest(8,"franchise_roi")
fig_froi = px.bar(top_roi_fr, x="franchise_roi", y="collection_name",
orientation="h", color="franchise_roi",
color_continuous_scale=[NF_RED,GOLD,GREEN],
text="franchise_roi", title="Best ROI Franchises")
fig_froi.update_traces(texttemplate="%{text:.1f}x", textposition="outside")
fig_froi.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_froi, 450)
st.plotly_chart(fig_froi, use_container_width=True)
else:
st.info("ΰΉ„ΰΈ‘ΰΉˆΰΈ‘ΰΈ΅ΰΈ‚ΰΉ‰ΰΈ­ΰΈ‘ΰΈΉΰΈ₯ Franchise β€” ΰΈ£ΰΈ±ΰΈ™ Pipeline ΰΉƒΰΈ«ΰΈ‘ΰΉˆΰΉ€ΰΈžΰΈ·ΰΉˆΰΈ­ΰΈͺΰΈ£ΰΉ‰ΰΈ²ΰΈ‡ franchises.parquet")
# ══════════════════════════════════════════════════════════════════════════════
# TAB 6: TV SHOWS
# ══════════════════════════════════════════════════════════════════════════════
with tab_tv:
sec("πŸ“Ί TV SHOWS DEEP-DIVE")
tab_tv1, tab_tv2, tab_tv3 = st.tabs(["πŸ† Top Rated","πŸ“Š Structure","🌱 Renewal Signal"])
with tab_tv1:
col_tv1, col_tv2 = st.columns([3,1], gap="large")
with col_tv1:
if "vote_average" in tv_f.columns and "name" in tv_f.columns and not tv_f.empty:
# ── FIX 3: guard optional OMDB columns in TV ─────────────────
_tv1_want = ["name","vote_average","vote_count","number_of_seasons","imdb_rating"]
_tv1_cols = safe_cols(tv_f, _tv1_want)
vc_tv = tv_f["vote_count"] if "vote_count" in tv_f.columns else pd.Series([999]*len(tv_f), index=tv_f.index)
top_tv_r = (tv_f[vc_tv >= 100]
.nlargest(12, "vote_average")
[_tv1_cols]
.reset_index(drop=True))
fig_tvr = px.bar(top_tv_r, x="vote_average", y="name", orientation="h",
color="vote_average",
color_continuous_scale=["#1a0040",PURPLE,"#A29BFE"],
text="vote_average")
fig_tvr.update_traces(
texttemplate="%{text:.2f}", textposition="outside",
hovertemplate="<b>%{y}</b><br>Rating: %{x:.2f}<extra></extra>")
fig_tvr.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_tvr, 450)
st.plotly_chart(fig_tvr, use_container_width=True)
with col_tv2:
st.markdown("#### πŸ–ΌοΈ Top Picks")
vc_tv2 = tv_f["vote_count"] if "vote_count" in tv_f.columns else pd.Series([999]*len(tv_f), index=tv_f.index)
tv_top_imgs = (tv_f[vc_tv2 >= 100].nlargest(4,"vote_average")
if "vote_average" in tv_f.columns else tv_f.head(4))
for _, row in tv_top_imgs.iterrows():
p = row.get("poster_url","")
if p and p != "None":
st.image(p, width=150, caption=str(row.get("name",""))[:25])
with tab_tv2:
col_ts1, col_ts2, col_ts3 = st.columns(3, gap="large")
with col_ts1:
if "status" in tv_f.columns and not tv_f.empty:
sc = tv_f["status"].value_counts().reset_index()
sc.columns = ["status","count"]
colors_map = {"Returning Series":GREEN,"Ended":NF_RED,"Canceled":"#E17055",
"In Production":TEAL,"Planned":PURPLE}
fig_st = px.pie(sc, names="status", values="count", hole=0.55,
color="status", color_discrete_map=colors_map, title="TV Status")
fig_st.update_traces(textinfo="percent+label", textfont_size=10)
apply_theme(fig_st, 320)
st.plotly_chart(fig_st, use_container_width=True)
with col_ts2:
if "number_of_seasons" in tv_f.columns and not tv_f.empty:
sd = (tv_f["number_of_seasons"].dropna().astype(int)
.value_counts().sort_index().reset_index())
sd.columns = ["seasons","count"]
sd = sd[sd["seasons"] <= 20]
fig_sd = px.bar(sd, x="seasons", y="count",
color="count", color_continuous_scale=["#1a0040",PURPLE],
text="count", title="Seasons Distribution")
fig_sd.update_traces(texttemplate="%{text}", textposition="outside")
fig_sd.update_layout(coloraxis_showscale=False, bargap=0.3)
apply_theme(fig_sd, 320)
st.plotly_chart(fig_sd, use_container_width=True)
with col_ts3:
if "episode_format" in tv_f.columns and not tv_f.empty:
ef = tv_f["episode_format"].value_counts().reset_index()
ef.columns = ["format","count"]
fig_ef = px.pie(ef, names="format", values="count", hole=0.5,
color_discrete_sequence=CHART_COLORS, title="Episode Format")
fig_ef.update_traces(textinfo="percent+label", textfont_size=10)
apply_theme(fig_ef, 320)
st.plotly_chart(fig_ef, use_container_width=True)
# Total content hours
if "total_content_hours" in tv_f.columns and not tv_f.empty:
st.markdown("---")
st.markdown("**Total Content Hours (Top 20 shows)**")
top_hours = tv_f.nlargest(20,"total_content_hours")[safe_cols(tv_f,["name","total_content_hours","number_of_seasons"])].dropna()
fig_hrs = px.bar(top_hours, x="total_content_hours", y="name", orientation="h",
color="number_of_seasons" if "number_of_seasons" in top_hours.columns else "total_content_hours",
color_continuous_scale=["#1a0040",PURPLE],
text="total_content_hours",
labels={"total_content_hours":"Total Watch Hours","name":""})
fig_hrs.update_traces(texttemplate="%{text:.0f}h", textposition="outside")
fig_hrs.update_layout(yaxis={"categoryorder":"total ascending"})
apply_theme(fig_hrs, 400)
st.plotly_chart(fig_hrs, use_container_width=True)
# TV Seasons detail
if not tv_seasons.empty and "season_number" in tv_seasons.columns:
st.markdown("---")
sec("πŸ—“οΈ SEASON-LEVEL ANALYSIS")
if "name" in tv_seasons.columns:
sel_show = st.selectbox("ΰΉ€ΰΈ₯ือก TV Show", tv_seasons["name"].dropna().unique().tolist()[:100])
show_seasons = tv_seasons[tv_seasons["name"]==sel_show].sort_values("season_number")
if not show_seasons.empty:
col_ss1, col_ss2 = st.columns(2)
with col_ss1:
if "episode_count" in show_seasons.columns:
fig_ssn = px.bar(show_seasons, x="season_number", y="episode_count",
color="vote_average" if "vote_average" in show_seasons.columns else "season_number",
color_continuous_scale=[PURPLE,"#A29BFE"],
text="episode_count",
labels={"season_number":"Season","episode_count":"Episodes"})
fig_ssn.update_traces(texttemplate="%{text}", textposition="outside")
fig_ssn.update_layout(coloraxis_showscale=False)
apply_theme(fig_ssn, 280)
st.plotly_chart(fig_ssn, use_container_width=True)
with col_ss2:
disp_cols = safe_cols(show_seasons, ["season_number","season_name","air_date","episode_count","vote_average","season_position"])
st.dataframe(show_seasons[disp_cols].reset_index(drop=True),
use_container_width=True, height=280)
with tab_tv3:
if "renewal_signal" in tv_f.columns and not tv_f.empty:
rs = tv_f["renewal_signal"].value_counts().reset_index()
rs.columns = ["signal","count"]
colors_rs = {
"Renewed/Ongoing":GREEN,"Strong Candidate":TEAL,
"Possible":GOLD,"Unlikely / Ended":NF_RED
}
col_rs1, col_rs2 = st.columns([2,1], gap="large")
with col_rs1:
fig_rs = px.bar(rs, x="count", y="signal", orientation="h",
color="signal", color_discrete_map=colors_rs,
text="count", title="TV Shows Renewal Likelihood")
fig_rs.update_traces(texttemplate="%{text:,}", textposition="outside")
fig_rs.update_layout(yaxis={"categoryorder":"total ascending"}, showlegend=False)
apply_theme(fig_rs, 320)
st.plotly_chart(fig_rs, use_container_width=True)
st.markdown("**πŸ“Ί Strong Candidates β€” Shows Likely to Return**")
strong = tv_f[tv_f["renewal_signal"]=="Strong Candidate"].nlargest(12,"vote_average") if "vote_average" in tv_f.columns else tv_f[tv_f["renewal_signal"]=="Strong Candidate"].head(12)
if not strong.empty:
poster_gallery(strong, title_col="name", year_col="first_air_year",
rating_col="vote_average", poster_col="poster_url", max_cards=10)
with col_rs2:
for signal, style in [("Renewed/Ongoing","green"),("Strong Candidate","teal"),
("Possible","gold"),("Unlikely / Ended","red")]:
cnt = tv_f[tv_f["renewal_signal"]==signal].shape[0]
insight(f"<strong>{signal}</strong>: {cnt:,} shows", style)
else:
st.info("ΰΉ„ΰΈ‘ΰΉˆΰΈ‘ΰΈ΅ΰΈ‚ΰΉ‰ΰΈ­ΰΈ‘ΰΈΉΰΈ₯ renewal_signal β€” ΰΈ•ΰΉ‰ΰΈ­ΰΈ‡ΰΈ£ΰΈ±ΰΈ™ PySpark pipeline ΰΉƒΰΈ«ΰΈ‘ΰΉˆ")
# ══════════════════════════════════════════════════════════════════════════════
# TAB 7: TALENT
# ══════════════════════════════════════════════════════════════════════════════
with tab_talent:
sec("🌟 TALENT & CREDITS")
if not top_talent.empty:
col_tt1, col_tt2 = st.columns([3,1], gap="large")
cast_talent = top_talent[top_talent["role"]=="cast"].copy() if "role" in top_talent.columns else top_talent.copy()
crew_talent = top_talent[top_talent["role"]=="crew"].copy() if "role" in top_talent.columns else pd.DataFrame()
with col_tt1:
st.markdown("**🎭 Most Impactful Cast β€” Avg Engagement Score**")
if not cast_talent.empty and "avg_content_engagement" in cast_talent.columns:
top_cast_t = cast_talent.nlargest(15,"avg_content_engagement")
color_col = "total_imdb_votes_across_titles" if "total_imdb_votes_across_titles" in top_cast_t.columns else "avg_content_engagement"
fig_ct = px.bar(top_cast_t, x="avg_content_engagement", y="name",
orientation="h", color=color_col,
color_continuous_scale=["#001433",TEAL],
text="avg_content_engagement",
labels={"avg_content_engagement":"Avg Engagement","name":""})
fig_ct.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig_ct.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_ct, 460)
st.plotly_chart(fig_ct, use_container_width=True)
with col_tt2:
st.markdown("**πŸ–ΌοΈ Top Talent Profiles**")
top5_cast = cast_talent.nlargest(8,"avg_content_engagement") if not cast_talent.empty and "avg_content_engagement" in cast_talent.columns else cast_talent.head(8)
cards_html = '<div style="display:flex;flex-wrap:wrap;gap:8px;">'
for _, row in top5_cast.iterrows():
profile = row.get("profile_url","")
if not profile or profile == "None":
profile = "https://via.placeholder.com/72x72/181818/555?text=πŸ‘€"
name = str(row.get("name",""))[:20]
eng = row.get("avg_content_engagement",0)
tc = row.get("title_count",0)
cards_html += f"""
<div class="talent-card" style="width:calc(50% - 4px);">
<img src="{profile}" onerror="this.src='https://via.placeholder.com/72x72/181818/555?text=πŸ‘€'"/>
<div class="talent-name">{name}</div>
<div class="talent-meta">Score: {eng:.1f} Β· {tc} titles</div>
</div>"""
cards_html += "</div>"
st.markdown(cards_html, unsafe_allow_html=True)
st.markdown("---")
if not crew_talent.empty:
st.markdown("**🎬 Top Directors & Producers**")
col_cr1, col_cr2 = st.columns([2,2], gap="large")
with col_cr1:
top_crew = crew_talent.nlargest(12,"avg_content_engagement") if "avg_content_engagement" in crew_talent.columns else crew_talent.head(12)
fig_crew = px.bar(top_crew, x="avg_content_engagement", y="name",
orientation="h", color="title_count" if "title_count" in top_crew.columns else "avg_content_engagement",
color_continuous_scale=["#1a0a20",PURPLE],
text="avg_content_engagement",
labels={"avg_content_engagement":"Avg Engagement","name":""})
fig_crew.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig_crew.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_crew, 400)
st.plotly_chart(fig_crew, use_container_width=True)
with col_cr2:
if "gender" in cast_talent.columns and not cast_talent.empty:
gen_dist = cast_talent["gender"].value_counts().reset_index()
gen_dist.columns = ["gender","count"]
fig_gen = px.pie(gen_dist, names="gender", values="count",
hole=0.55, title="Cast Gender Distribution",
color="gender",
color_discrete_map={"Female":TEAL,"Male":PURPLE,"Unknown":NF_BORDER})
fig_gen.update_traces(textinfo="percent+label", textfont_size=12)
apply_theme(fig_gen, 380)
st.plotly_chart(fig_gen, use_container_width=True)
else:
# Fallback to credits
col_c1, col_c2 = st.columns([3,1], gap="large")
with col_c1:
if not credits.empty and "role" in credits.columns:
top_cast = (credits[credits["role"]=="cast"]
.groupby("name").size().reset_index(name="appearances")
.nlargest(15,"appearances"))
fig_cast = px.bar(top_cast, x="appearances", y="name", orientation="h",
color="appearances", color_continuous_scale=["#001433",TEAL],
text="appearances",
labels={"appearances":"Appearances","name":""})
fig_cast.update_traces(texttemplate="%{text}", textposition="outside")
fig_cast.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_cast, 450)
st.plotly_chart(fig_cast, use_container_width=True)
with col_c2:
if "gender" in credits.columns and not credits.empty:
gd = credits[credits["role"]=="cast"]["gender"].value_counts().reset_index()
gd.columns = ["gender","count"]
fig_g = px.pie(gd, names="gender", values="count", hole=0.55,
color="gender",
color_discrete_map={"Female":TEAL,"Male":PURPLE,"Unknown":"#333"},
title="Gender Distribution")
fig_g.update_traces(textinfo="percent+label")
apply_theme(fig_g, 300)
st.plotly_chart(fig_g, use_container_width=True)
# Keywords treemap
st.markdown("---")
sec("πŸ”‘ TRENDING THEMES & KEYWORDS")
if not keywords.empty and "keyword" in keywords.columns:
col_kw1, col_kw2 = st.columns([3,1], gap="large")
with col_kw1:
top_kw = keywords.groupby("keyword").size().reset_index(name="count").nlargest(30,"count")
fig_kw = px.treemap(top_kw, path=["keyword"], values="count",
color="count", color_continuous_scale=["#200000","#6C1F1F",NF_RED],
title="Top 30 Content Themes")
fig_kw.update_traces(textfont=dict(size=12,family="DM Sans"))
apply_theme(fig_kw, 400)
st.plotly_chart(fig_kw, use_container_width=True)
with col_kw2:
top5_kw = top_kw.head(5)
for _, r in top5_kw.iterrows():
insight(f"πŸ”‘ <strong>'{r['keyword']}'</strong> β€” {r['count']:,} titles", "red")
# ══════════════════════════════════════════════════════════════════════════════
# TAB 8: YEAR-OVER-YEAR TRENDS
# ══════════════════════════════════════════════════════════════════════════════
with tab_trends:
sec("πŸ“ˆ YEAR-OVER-YEAR TRENDS")
if not yoy_trend.empty and "year" in yoy_trend.columns:
yoy_m = yoy_trend[yoy_trend["media_type"]=="movie"].sort_values("year") if "media_type" in yoy_trend.columns else yoy_trend.sort_values("year")
yoy_tv = yoy_trend[yoy_trend["media_type"]=="tv"].sort_values("year") if "media_type" in yoy_trend.columns else pd.DataFrame()
col_yr1, col_yr2 = st.columns(2, gap="large")
with col_yr1:
st.markdown("**Avg Rating Over Time**")
fig_yr1 = go.Figure()
if not yoy_m.empty and "avg_rating" in yoy_m.columns:
fig_yr1.add_trace(go.Scatter(x=yoy_m["year"], y=yoy_m["avg_rating"],
name="Movies", mode="lines+markers",
line=dict(color=NF_RED,width=2.5), marker_size=5))
if not yoy_tv.empty and "avg_rating" in yoy_tv.columns:
fig_yr1.add_trace(go.Scatter(x=yoy_tv["year"], y=yoy_tv["avg_rating"],
name="TV Shows", mode="lines+markers",
line=dict(color=PURPLE,width=2.5), marker_size=5))
fig_yr1.update_layout(xaxis_title="Year", yaxis_title="Avg Rating")
apply_theme(fig_yr1, 320)
st.plotly_chart(fig_yr1, use_container_width=True)
with col_yr2:
st.markdown("**Avg Engagement Over Time**")
fig_yr2 = go.Figure()
if not yoy_m.empty and "avg_engagement" in yoy_m.columns:
fig_yr2.add_trace(go.Scatter(x=yoy_m["year"], y=yoy_m["avg_engagement"],
name="Movies", mode="lines", fill="tozeroy",
line=dict(color=NF_RED,width=2),
fillcolor=hex_rgba(NF_RED,0.12)))
if not yoy_tv.empty and "avg_engagement" in yoy_tv.columns:
fig_yr2.add_trace(go.Scatter(x=yoy_tv["year"], y=yoy_tv["avg_engagement"],
name="TV Shows", mode="lines", fill="tozeroy",
line=dict(color=PURPLE,width=2),
fillcolor=hex_rgba(PURPLE,0.12)))
fig_yr2.update_layout(xaxis_title="Year", yaxis_title="Avg Engagement Score")
apply_theme(fig_yr2, 320)
st.plotly_chart(fig_yr2, use_container_width=True)
st.markdown("---")
col_yr3, col_yr4 = st.columns(2, gap="large")
with col_yr3:
if "trending_titles" in yoy_m.columns and not yoy_m.empty:
st.markdown("**Trending Titles by Year**")
fig_tr = px.bar(yoy_m, x="year", y="trending_titles",
color="trending_titles",
color_continuous_scale=[hex_rgba(TEAL,0.3),TEAL],
labels={"trending_titles":"Trending Titles","year":"Year"})
fig_tr.update_layout(coloraxis_showscale=False)
apply_theme(fig_tr, 280)
st.plotly_chart(fig_tr, use_container_width=True)
with col_yr4:
if "awarded_titles" in yoy_m.columns and not yoy_m.empty:
st.markdown("**Award Winners by Year**")
fig_aw = px.bar(yoy_m, x="year", y="awarded_titles",
color="awarded_titles",
color_continuous_scale=[hex_rgba(GOLD,0.3),GOLD],
labels={"awarded_titles":"Award Winners","year":"Year"})
fig_aw.update_layout(coloraxis_showscale=False)
apply_theme(fig_aw, 280)
st.plotly_chart(fig_aw, use_container_width=True)
if "avg_imdb_votes" in yoy_m.columns and not yoy_m.empty:
st.markdown("---")
st.markdown("**Avg IMDb Votes (Viewership Proxy) Over Time**")
fig_iv = go.Figure()
fig_iv.add_trace(go.Scatter(x=yoy_m["year"], y=yoy_m["avg_imdb_votes"],
name="Movies", mode="lines+markers",
line=dict(color=GOLD,width=2.5), marker_size=5,
fill="tozeroy", fillcolor=hex_rgba(GOLD,0.1)))
if not yoy_tv.empty and "avg_imdb_votes" in yoy_tv.columns:
fig_iv.add_trace(go.Scatter(x=yoy_tv["year"], y=yoy_tv["avg_imdb_votes"],
name="TV Shows", mode="lines+markers",
line=dict(color=PURPLE,width=2.5), marker_size=5))
fig_iv.update_layout(xaxis_title="Year", yaxis_title="Avg IMDb Votes")
apply_theme(fig_iv, 320)
st.plotly_chart(fig_iv, use_container_width=True)
else:
st.info("ΰΉ„ΰΈ‘ΰΉˆΰΈ‘ΰΈ΅ΰΈ‚ΰΉ‰ΰΈ­ΰΈ‘ΰΈΉΰΈ₯ yoy_trend.parquet β€” แΰΈͺΰΈ”ΰΈ‡ΰΈˆΰΈ²ΰΈΰΈ‚ΰΉ‰ΰΈ­ΰΈ‘ΰΈΉΰΈ₯ filtered แทน")
if "release_year" in movies_f.columns and not movies_f.empty:
yr_data = movies_f.groupby("release_year").agg(
count=("title","count"),
avg_rating=("vote_average","mean")
).reset_index()
fig_fb = make_subplots(specs=[[{"secondary_y":True}]])
fig_fb.add_trace(go.Bar(x=yr_data["release_year"], y=yr_data["count"],
name="Count", marker_color=hex_rgba(NF_RED,0.5)), secondary_y=False)
fig_fb.add_trace(go.Scatter(x=yr_data["release_year"], y=yr_data["avg_rating"],
name="Avg Rating", line=dict(color=GOLD,width=2.5)), secondary_y=True)
fig_fb.update_layout(**PLOTLY_BASE, height=380)
st.plotly_chart(fig_fb, use_container_width=True)
# Language Summary
st.markdown("---")
sec("🌍 LANGUAGE & REGION ANALYTICS")
if not lang_summary.empty:
col_ls1, col_ls2 = st.columns(2, gap="large")
with col_ls1:
if all(c in lang_summary.columns for c in ["original_language","title_count"]):
top_lang = lang_summary.nlargest(15,"title_count").copy()
lang_map = {"en":"English","ja":"Japanese","ko":"Korean","fr":"French","es":"Spanish",
"de":"German","it":"Italian","pt":"Portuguese","zh":"Chinese","hi":"Hindi",
"ru":"Russian","th":"Thai","ar":"Arabic","nl":"Dutch","sv":"Swedish"}
top_lang["lang_name"] = top_lang["original_language"].map(lang_map).fillna(top_lang["original_language"])
fig_ls = px.bar(top_lang, x="title_count", y="lang_name", orientation="h",
color="avg_popularity" if "avg_popularity" in top_lang.columns else "title_count",
color_continuous_scale=["#001a33",TEAL],
text="title_count",
labels={"title_count":"Titles","lang_name":""})
fig_ls.update_traces(texttemplate="%{text:,}", textposition="outside")
fig_ls.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_ls, 380)
st.plotly_chart(fig_ls, use_container_width=True)
with col_ls2:
if "language_group" in lang_summary.columns:
lg_grp = lang_summary.groupby("language_group")["title_count"].sum().reset_index()
fig_lg = px.pie(lg_grp, names="language_group", values="title_count",
hole=0.55, color_discrete_sequence=CHART_COLORS,
title="Content by Language Group")
fig_lg.update_traces(textinfo="percent+label", textfont_size=11)
apply_theme(fig_lg, 380)
st.plotly_chart(fig_lg, use_container_width=True)
else:
if "original_language" in movies_f.columns and not movies_f.empty:
lang_cnt = movies_f["original_language"].value_counts().head(12).reset_index()
lang_cnt.columns = ["language","count"]
fig_lf = px.bar(lang_cnt, x="count", y="language", orientation="h",
color="count", color_continuous_scale=["#001a33",TEAL],
text="count")
fig_lf.update_traces(texttemplate="%{text:,}", textposition="outside")
fig_lf.update_layout(yaxis={"categoryorder":"total ascending"}, coloraxis_showscale=False)
apply_theme(fig_lf, 380)
st.plotly_chart(fig_lf, use_container_width=True)
# ─────────────────────────────────────────────────────────────────────────────
# RAW DATA EXPLORER
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("---")
with st.expander("πŸ—ƒοΈ Raw Data Explorer", expanded=False):
tabs_raw = st.tabs(["🎬 Movies","πŸ“Ί TV","🎭 Credits","πŸ”‘ Keywords",
"πŸ“Š Content Perf","🎭 Genre Perf","🌟 Talent","πŸ“ˆ YoY"])
data_map = [
movies_f, tv_f, credits, keywords,
content_perf, genre_perf, top_talent, yoy_trend,
]
for i, df in enumerate(data_map):
with tabs_raw[i]:
st.caption(f"{len(df):,} records")
if not df.empty:
st.dataframe(df.head(200), use_container_width=True, height=320)
# ─────────────────────────────────────────────────────────────────────────────
# FOOTER
# ─────────────────────────────────────────────────────────────────────────────
st.markdown(f"""
<div style="text-align:center; padding:30px 0 10px 0; color:{TEXT_DIM}; font-size:10px; letter-spacing:1.5px;">
NETFLIX ANALYTICS 2.0 &nbsp;Β·&nbsp; TMDB API + OMDB API &nbsp;Β·&nbsp; AIRFLOW β†’ PYSPARK β†’ HUGGINGFACE
<br><span style="color:{NF_BORDER};">New Tables: content_performance Β· genre_performance Β· top_talent Β· yoy_trend Β· tv_seasons Β· language_summary</span>
</div>
""", unsafe_allow_html=True)