Spaces:

dotoking
/

CEAR

Sleeping

App Files Files Community

dotoking commited on Dec 9, 2025

Commit

fc19dc9

verified ·

1 Parent(s): ba3bc8e

Update cear_model.py

Browse files

Files changed (1) hide show

cear_model.py +78 -23

cear_model.py CHANGED Viewed

@@ -5,24 +5,60 @@ import json
 import numpy as np
 import pandas as pd
-# --- 1. Load platform weights from JSON ----
-# Expected JSON shape:
-# {
-#   "tiktok":   {"W_C": 1.0, "W_A": 1.0},
-#   "instagram":{"W_C": 0.8, "W_A": 0.9},
-#   ...
-# }
-PLATFORM_WEIGHTS = {}
-try:
     script_dir = os.path.dirname(os.path.abspath(__file__))
     json_path = os.path.join(script_dir, "platform_weights.json")
     with open(json_path, "r", encoding="utf-8") as f:
-        PLATFORM_WEIGHTS = json.load(f)
-except FileNotFoundError:
-    print("FATAL ERROR: platform_weights.json not found! Using empty weights.")
-    PLATFORM_WEIGHTS = {}
 class CEARModel:
@@ -30,14 +66,13 @@ class CEARModel:
     Core CEAR scoring model.
     Inputs:
-        user_df: DataFrame with at least:
             - 'platform_name': str
             - 'minutes_per_week': numeric
-            Optionally:
-            - 'variety_score': numeric (0–10)
         satisfaction: optional float (0–10)
-        fomo: optional float (0–10)
     Returns dict:
         {
@@ -56,6 +91,8 @@ class CEARModel:
     def __init__(self, weights: dict | None = None) -> None:
         self.weights = weights if weights is not None else PLATFORM_WEIGHTS
     @staticmethod
     def _diminishing_returns(minutes: float) -> float:
         """Log10-based diminishing returns on minutes."""
@@ -67,15 +104,20 @@ class CEARModel:
             return pd.DataFrame(columns=["platform_name", "W_C", "W_A"])
         w_df = pd.DataFrame.from_dict(self.weights, orient="index")
         w_df.index.name = "platform_name"
         w_df = w_df.reset_index()
-        # Ensure both columns exist
         if "W_C" not in w_df.columns:
             w_df["W_C"] = 0.0
         if "W_A" not in w_df.columns:
             w_df["W_A"] = 0.0
         return w_df[["platform_name", "W_C", "W_A"]]
     def calculate_scores(
         self,
         user_df: pd.DataFrame,
@@ -121,7 +163,7 @@ class CEARModel:
         C_Score = float(df["C_Contrib"].sum())
         A_Risk = float(df["A_Contrib"].sum())
-        # 2. D-Index (diversity via inverse Herfindahl)
         if total_mins > 0:
             shares = df["minutes_per_week"] / total_mins
             H = float((shares**2).sum())
@@ -129,15 +171,28 @@ class CEARModel:
         else:
             D_Index = 0.0
-        # 3. Per-platform cultural efficiency (C-contribution per minute)
         df["Cultural_Efficiency"] = df["C_Contrib"] / df["minutes_per_week"].replace(
             0.0, np.nan
         )
         eff_df = df.loc[
             df["minutes_per_week"] > 0, ["platform_name", "Cultural_Efficiency"]
         ].copy()
-        eff_df = eff_df.dropna().sort_values("Cultural_Efficiency", ascending=False)
-        per_platform_eff = eff_df.to_dict("records")
         # 4. Weighted average variety, if provided
         avg_variety = None

 import numpy as np
 import pandas as pd
+# ---------------- Weight loading ---------------- #
+def _load_platform_weights() -> dict:
+    """
+    Load platform weights from platform_weights.json.
+    Supports multiple key schemes:
+      - W_C / W_A
+      - trend_weight / risk_weight
+      - C_weight / A_weight
+    """
     script_dir = os.path.dirname(os.path.abspath(__file__))
     json_path = os.path.join(script_dir, "platform_weights.json")
+    if not os.path.exists(json_path):
+        print("WARNING: platform_weights.json not found. Using default weights.")
+        # Sensible defaults if file missing
+        return {
+            "tiktok":    {"W_C": 1.00, "W_A": 1.00},
+            "instagram": {"W_C": 0.80, "W_A": 0.90},
+            "youtube":   {"W_C": 0.60, "W_A": 0.60},
+            "twitter":   {"W_C": 0.70, "W_A": 0.80},
+            "reddit":    {"W_C": 0.50, "W_A": 0.50},
+            "facebook":  {"W_C": 0.30, "W_A": 0.40},
+            "other":     {"W_C": 0.20, "W_A": 0.30},
+        }
     with open(json_path, "r", encoding="utf-8") as f:
+        raw = json.load(f)
+    # Normalize key names into W_C and W_A
+    norm = {}
+    for platform, vals in raw.items():
+        if not isinstance(vals, dict):
+            vals = {}
+        w_c = (
+            vals.get("W_C")
+            or vals.get("c_weight")
+            or vals.get("C_weight")
+            or vals.get("trend_weight")
+            or 0.0
+        )
+        w_a = (
+            vals.get("W_A")
+            or vals.get("a_weight")
+            or vals.get("A_weight")
+            or vals.get("risk_weight")
+            or 0.0
+        )
+        norm[platform.lower()] = {"W_C": float(w_c), "W_A": float(w_a)}
+    return norm
+PLATFORM_WEIGHTS = _load_platform_weights()
 class CEARModel:
     Core CEAR scoring model.
     Inputs:
+        user_df: DataFrame with columns:
             - 'platform_name': str
             - 'minutes_per_week': numeric
+            - optional 'variety_score': numeric (0–10)
         satisfaction: optional float (0–10)
+        fomo:         optional float (0–10)
     Returns dict:
         {
     def __init__(self, weights: dict | None = None) -> None:
         self.weights = weights if weights is not None else PLATFORM_WEIGHTS
+    # ---------- internals ---------- #
     @staticmethod
     def _diminishing_returns(minutes: float) -> float:
         """Log10-based diminishing returns on minutes."""
             return pd.DataFrame(columns=["platform_name", "W_C", "W_A"])
         w_df = pd.DataFrame.from_dict(self.weights, orient="index")
+        w_df.index = w_df.index.astype(str).str.lower()
         w_df.index.name = "platform_name"
         w_df = w_df.reset_index()
+        # Ensure W_C / W_A exist even if missing
         if "W_C" not in w_df.columns:
             w_df["W_C"] = 0.0
         if "W_A" not in w_df.columns:
             w_df["W_A"] = 0.0
         return w_df[["platform_name", "W_C", "W_A"]]
+    # ---------- public API ---------- #
     def calculate_scores(
         self,
         user_df: pd.DataFrame,
         C_Score = float(df["C_Contrib"].sum())
         A_Risk = float(df["A_Contrib"].sum())
+        # 2. D-Index (effective number of platforms via inverse Herfindahl)
         if total_mins > 0:
             shares = df["minutes_per_week"] / total_mins
             H = float((shares**2).sum())
         else:
             D_Index = 0.0
+        # 3. Per-platform cultural efficiency (scaled 0–100)
         df["Cultural_Efficiency"] = df["C_Contrib"] / df["minutes_per_week"].replace(
             0.0, np.nan
         )
         eff_df = df.loc[
             df["minutes_per_week"] > 0, ["platform_name", "Cultural_Efficiency"]
         ].copy()
+        eff_df = eff_df.dropna()
+        if not eff_df.empty:
+            max_ce = float(eff_df["Cultural_Efficiency"].max())
+            if max_ce > 0:
+                eff_df["Cultural_Efficiency"] = (
+                    eff_df["Cultural_Efficiency"] / max_ce * 100.0
+                )
+            else:
+                eff_df["Cultural_Efficiency"] = 0.0
+            eff_df = eff_df.sort_values("Cultural_Efficiency", ascending=False)
+            per_platform_eff = eff_df.to_dict("records")
+        else:
+            per_platform_eff = []
         # 4. Weighted average variety, if provided
         avg_variety = None