knguyen471's picture
Upload 11 files
888aba6 verified
raw
history blame contribute delete
734 Bytes
import numpy as np
import pandas as pd
def compute_bayesian_popularity_score(df, rating_col="overall_rating", reviews_col="review_count", m_prior=20):
# Convert to numeric
df[rating_col] = pd.to_numeric(df[rating_col], errors="coerce")
df[reviews_col] = pd.to_numeric(df[reviews_col], errors="coerce").fillna(0).astype(int)
# Global mean rating
mu = df[rating_col].dropna().mean()
# Data
n = df[reviews_col]
r = df[rating_col].fillna(mu)
# Bayesian rating
df["bayes_rating"] = ((mu * m_prior + n * r) / (m_prior + n.replace(0, np.nan))).fillna(mu)
# Popularity metrics
df["pop_log"] = np.log1p(n)
df["pop_score"] = 0.7 * df["bayes_rating"] + 0.3 * df["pop_log"]
return df