File size: 734 Bytes
888aba6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import numpy as np
import pandas as pd

def compute_bayesian_popularity_score(df, rating_col="overall_rating", reviews_col="review_count", m_prior=20):
    # Convert to numeric
    df[rating_col] = pd.to_numeric(df[rating_col], errors="coerce")
    df[reviews_col] = pd.to_numeric(df[reviews_col], errors="coerce").fillna(0).astype(int)

    # Global mean rating
    mu = df[rating_col].dropna().mean()

    # Data
    n = df[reviews_col]
    r = df[rating_col].fillna(mu)

    # Bayesian rating
    df["bayes_rating"] = ((mu * m_prior + n * r) / (m_prior + n.replace(0, np.nan))).fillna(mu)

    # Popularity metrics
    df["pop_log"] = np.log1p(n)
    df["pop_score"] = 0.7 * df["bayes_rating"] + 0.3 * df["pop_log"]

    return df