# cear_model.py import os import json import numpy as np import pandas as pd # ---------------- Weight loading ---------------- # def _load_platform_weights() -> dict: """ Load platform weights from platform_weights.json. Supports multiple key schemes: - W_C / W_A - trend_weight / risk_weight - C_weight / A_weight """ script_dir = os.path.dirname(os.path.abspath(__file__)) json_path = os.path.join(script_dir, "platform_weights.json") if not os.path.exists(json_path): print("WARNING: platform_weights.json not found. Using default weights.") # Sensible defaults if file missing return { "tiktok": {"W_C": 1.00, "W_A": 1.00}, "instagram": {"W_C": 0.80, "W_A": 0.90}, "youtube": {"W_C": 0.60, "W_A": 0.60}, "twitter": {"W_C": 0.70, "W_A": 0.80}, "reddit": {"W_C": 0.50, "W_A": 0.50}, "facebook": {"W_C": 0.30, "W_A": 0.40}, "other": {"W_C": 0.20, "W_A": 0.30}, } with open(json_path, "r", encoding="utf-8") as f: raw = json.load(f) # Normalize key names into W_C and W_A norm = {} for platform, vals in raw.items(): if not isinstance(vals, dict): vals = {} w_c = ( vals.get("W_C") or vals.get("c_weight") or vals.get("C_weight") or vals.get("trend_weight") or 0.0 ) w_a = ( vals.get("W_A") or vals.get("a_weight") or vals.get("A_weight") or vals.get("risk_weight") or 0.0 ) norm[platform.lower()] = {"W_C": float(w_c), "W_A": float(w_a)} return norm PLATFORM_WEIGHTS = _load_platform_weights() class CEARModel: """ Core CEAR scoring model. Inputs: user_df: DataFrame with columns: - 'platform_name': str - 'minutes_per_week': numeric - optional 'variety_score': numeric (0–10) satisfaction: optional float (0–10) fomo: optional float (0–10) Returns dict: { "C_Score": float, "A_Risk": float, "D_Index": float, "Avg_Variety": float | None, "Satisfaction": float | None, "FOMO": float | None, "Per_Platform_Efficiency": [ {"platform_name": str, "Cultural_Efficiency": float}, ... ] } """ def __init__(self, weights: dict | None = None) -> None: self.weights = weights if weights is not None else PLATFORM_WEIGHTS # ---------- internals ---------- # @staticmethod def _diminishing_returns(minutes: float) -> float: """Log10-based diminishing returns on minutes.""" minutes = max(float(minutes), 0.0) return float(np.log10(minutes + 1.0)) def _weights_dataframe(self) -> pd.DataFrame: if not self.weights: return pd.DataFrame(columns=["platform_name", "W_C", "W_A"]) w_df = pd.DataFrame.from_dict(self.weights, orient="index") w_df.index = w_df.index.astype(str).str.lower() w_df.index.name = "platform_name" w_df = w_df.reset_index() # Ensure W_C / W_A exist even if missing if "W_C" not in w_df.columns: w_df["W_C"] = 0.0 if "W_A" not in w_df.columns: w_df["W_A"] = 0.0 return w_df[["platform_name", "W_C", "W_A"]] # ---------- public API ---------- # def calculate_scores( self, user_df: pd.DataFrame, satisfaction: float | None = None, fomo: float | None = None, ) -> dict: if user_df is None or user_df.empty: return { "C_Score": 0.0, "A_Risk": 0.0, "D_Index": 0.0, "Avg_Variety": None, "Satisfaction": satisfaction, "FOMO": fomo, "Per_Platform_Efficiency": [], } df = user_df.copy() # Normalize names and convert minutes df["platform_name"] = ( df["platform_name"].astype(str).str.strip().str.lower() ) df["minutes_per_week"] = pd.to_numeric( df["minutes_per_week"], errors="coerce" ).fillna(0.0) df["minutes_per_week"] = df["minutes_per_week"].clip(lower=0.0) # Attach weights w_df = self._weights_dataframe() df = df.merge(w_df, on="platform_name", how="left") df[["W_C", "W_A"]] = df[["W_C", "W_A"]].fillna(0.0) total_mins = float(df["minutes_per_week"].sum()) # 1. Core contributions df["C_Contrib"] = df.apply( lambda row: row["W_C"] * self._diminishing_returns(row["minutes_per_week"]), axis=1, ) df["A_Contrib"] = df["W_A"] * df["minutes_per_week"] C_Score = float(df["C_Contrib"].sum()) A_Risk = float(df["A_Contrib"].sum()) # 2. D-Index (effective number of platforms via inverse Herfindahl) if total_mins > 0: shares = df["minutes_per_week"] / total_mins H = float((shares**2).sum()) D_Index = float(1.0 / H) if H > 0 else 0.0 else: D_Index = 0.0 # 3. Per-platform cultural efficiency (scaled 0–100) df["Cultural_Efficiency"] = df["C_Contrib"] / df["minutes_per_week"].replace( 0.0, np.nan ) eff_df = df.loc[ df["minutes_per_week"] > 0, ["platform_name", "Cultural_Efficiency"] ].copy() eff_df = eff_df.dropna() if not eff_df.empty: max_ce = float(eff_df["Cultural_Efficiency"].max()) if max_ce > 0: eff_df["Cultural_Efficiency"] = ( eff_df["Cultural_Efficiency"] / max_ce * 100.0 ) else: eff_df["Cultural_Efficiency"] = 0.0 eff_df = eff_df.sort_values("Cultural_Efficiency", ascending=False) per_platform_eff = eff_df.to_dict("records") else: per_platform_eff = [] # 4. Weighted average variety, if provided avg_variety = None if "variety_score" in df.columns and total_mins > 0: if df["variety_score"].notna().any(): avg_variety = float( np.average( df["variety_score"].fillna(0.0), weights=df["minutes_per_week"], ) ) return { "C_Score": C_Score, "A_Risk": A_Risk, "D_Index": D_Index, "Avg_Variety": avg_variety, "Satisfaction": satisfaction, "FOMO": fomo, "Per_Platform_Efficiency": per_platform_eff, }