Spaces:
Running
Running
| import numpy as np | |
| import pandas as pd | |
| def compute_bayesian_popularity_score(df, rating_col="overall_rating", reviews_col="review_count", m_prior=20): | |
| # Convert to numeric | |
| df[rating_col] = pd.to_numeric(df[rating_col], errors="coerce") | |
| df[reviews_col] = pd.to_numeric(df[reviews_col], errors="coerce").fillna(0).astype(int) | |
| # Global mean rating | |
| mu = df[rating_col].dropna().mean() | |
| # Data | |
| n = df[reviews_col] | |
| r = df[rating_col].fillna(mu) | |
| # Bayesian rating | |
| df["bayes_rating"] = ((mu * m_prior + n * r) / (m_prior + n.replace(0, np.nan))).fillna(mu) | |
| # Popularity metrics | |
| df["pop_log"] = np.log1p(n) | |
| df["pop_score"] = 0.7 * df["bayes_rating"] + 0.3 * df["pop_log"] | |
| return df | |