import numpy as np import pandas as pd def compute_bayesian_popularity_score(df, rating_col="overall_rating", reviews_col="review_count", m_prior=20): # Convert to numeric df[rating_col] = pd.to_numeric(df[rating_col], errors="coerce") df[reviews_col] = pd.to_numeric(df[reviews_col], errors="coerce").fillna(0).astype(int) # Global mean rating mu = df[rating_col].dropna().mean() # Data n = df[reviews_col] r = df[rating_col].fillna(mu) # Bayesian rating df["bayes_rating"] = ((mu * m_prior + n * r) / (m_prior + n.replace(0, np.nan))).fillna(mu) # Popularity metrics df["pop_log"] = np.log1p(n) df["pop_score"] = 0.7 * df["bayes_rating"] + 0.3 * df["pop_log"] return df