Spaces:

dotoking
/

CEAR

Sleeping

App Files Files Community

dotoking commited on 21 days ago

Commit

be89f48

verified ·

1 Parent(s): 363ce76

Update cear_model.py

Browse files

Files changed (1) hide show

cear_model.py +161 -69

cear_model.py CHANGED Viewed

@@ -1,69 +1,161 @@
-# cear_model.py
-import numpy as np
-import pandas as pd
-import json
-import os # Necessary for finding the JSON file
-# --- 1. Load PLATFORM_WEIGHTS variable from JSON ---
-PLATFORM_WEIGHTS = {} # Default value
-try:
-    # Get the directory of the current script (cear_model.py)
-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    json_path = os.path.join(script_dir, 'platform_weights.json')
-    with open(json_path, 'r') as f:
-        # Load the configuration data into the global variable
-        PLATFORM_WEIGHTS = json.load(f)
-except FileNotFoundError:
-    # This warning is useful for debugging if the file is missing
-    print("FATAL ERROR: platform_weights.json not found! Using empty weights.")
-    # The default empty {} dict is used if the file is missing
-# --- 2. Define the Model Class ---
-# The class can now safely reference the global PLATFORM_WEIGHTS variable
-class CEARModel:
-    def __init__(self, weights=PLATFORM_WEIGHTS):
-        # The weights dictionary is passed as a default parameter
-        self.weights = weights
-    def _diminishing_returns(self, minutes):
-        # ... your method code ...
-        return np.log10(minutes + 1)
-    def calculate_scores(self, user_input_df: pd.DataFrame):
-        # 1. Merge weights with user input
-        df = user_input_df.merge(
-            pd.DataFrame.from_dict(self.weights, orient='index'),
-            left_on='platform_name',
-            right_index=True,
-            how='left'
-        ).fillna(0) # Fills missing weights with 0 for platforms not in list
-        total_mins = df['minutes_per_week'].sum()
-        # 2. Calculate Core Scores
-        df['C_Contrib'] = df.apply(lambda row: row['W_C'] * self._diminishing_returns(row['minutes_per_week']), axis=1)
-        df['A_Contrib'] = df.apply(lambda row: row['W_A'] * row['minutes_per_week'], axis=1)
-        C_Score = df['C_Contrib'].sum()
-        A_Risk = df['A_Contrib'].sum()
-        # 3. Calculate D-Index (Platform Diversity)
-        df['Min_Share'] = df['minutes_per_week'] / total_mins
-        D_Index = 1 / (df['Min_Share']**2).sum() if total_mins > 0 else 0
-        # 4. Calculate Cultural Efficiency
-        df['Cultural_Efficiency'] = df['C_Contrib'] / df['minutes_per_week'].replace(0, np.nan) # Avoid div by zero
-        return {
-            "C_Score": C_Score,
-            "A_Risk": A_Risk,
-            "D_Index": D_Index,
-            "Per_Platform_Efficiency": df[['platform_name', 'Cultural_Efficiency']].dropna().to_dict('records')
-        }
-# Example Usage:
-# user_data = pd.DataFrame([{'platform_name': 'TikTok', 'minutes_per_week': 300}, ...])
-# model = CEARModel()
-# model.calculate_scores(user_data)

+# cear_model.py
+import os
+import json
+import numpy as np
+import pandas as pd
+# --- 1. Load platform weights from JSON ----
+# Expected JSON shape:
+# {
+#   "tiktok":   {"W_C": 1.0, "W_A": 1.0},
+#   "instagram":{"W_C": 0.8, "W_A": 0.9},
+#   ...
+# }
+PLATFORM_WEIGHTS = {}
+try:
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    json_path = os.path.join(script_dir, "platform_weights.json")
+    with open(json_path, "r", encoding="utf-8") as f:
+        PLATFORM_WEIGHTS = json.load(f)
+except FileNotFoundError:
+    print("FATAL ERROR: platform_weights.json not found! Using empty weights.")
+    PLATFORM_WEIGHTS = {}
+class CEARModel:
+    """
+    Core CEAR scoring model.
+    Inputs:
+        user_df: DataFrame with at least:
+            - 'platform_name': str
+            - 'minutes_per_week': numeric
+            Optionally:
+            - 'variety_score': numeric (0–10)
+        satisfaction: optional float (0–10)
+        fomo: optional float (0–10)
+    Returns dict:
+        {
+          "C_Score": float,
+          "A_Risk": float,
+          "D_Index": float,
+          "Avg_Variety": float | None,
+          "Satisfaction": float | None,
+          "FOMO": float | None,
+          "Per_Platform_Efficiency": [
+              {"platform_name": str, "Cultural_Efficiency": float}, ...
+          ]
+        }
+    """
+    def __init__(self, weights: dict | None = None) -> None:
+        self.weights = weights if weights is not None else PLATFORM_WEIGHTS
+    @staticmethod
+    def _diminishing_returns(minutes: float) -> float:
+        """Log10-based diminishing returns on minutes."""
+        minutes = max(float(minutes), 0.0)
+        return float(np.log10(minutes + 1.0))
+    def _weights_dataframe(self) -> pd.DataFrame:
+        if not self.weights:
+            return pd.DataFrame(columns=["platform_name", "W_C", "W_A"])
+        w_df = pd.DataFrame.from_dict(self.weights, orient="index")
+        w_df.index.name = "platform_name"
+        w_df = w_df.reset_index()
+        # Ensure both columns exist
+        if "W_C" not in w_df.columns:
+            w_df["W_C"] = 0.0
+        if "W_A" not in w_df.columns:
+            w_df["W_A"] = 0.0
+        return w_df[["platform_name", "W_C", "W_A"]]
+    def calculate_scores(
+        self,
+        user_df: pd.DataFrame,
+        satisfaction: float | None = None,
+        fomo: float | None = None,
+    ) -> dict:
+        if user_df is None or user_df.empty:
+            return {
+                "C_Score": 0.0,
+                "A_Risk": 0.0,
+                "D_Index": 0.0,
+                "Avg_Variety": None,
+                "Satisfaction": satisfaction,
+                "FOMO": fomo,
+                "Per_Platform_Efficiency": [],
+            }
+        df = user_df.copy()
+        # Normalize names and convert minutes
+        df["platform_name"] = (
+            df["platform_name"].astype(str).str.strip().str.lower()
+        )
+        df["minutes_per_week"] = pd.to_numeric(
+            df["minutes_per_week"], errors="coerce"
+        ).fillna(0.0)
+        df["minutes_per_week"] = df["minutes_per_week"].clip(lower=0.0)
+        # Attach weights
+        w_df = self._weights_dataframe()
+        df = df.merge(w_df, on="platform_name", how="left")
+        df[["W_C", "W_A"]] = df[["W_C", "W_A"]].fillna(0.0)
+        total_mins = float(df["minutes_per_week"].sum())
+        # 1. Core contributions
+        df["C_Contrib"] = df.apply(
+            lambda row: row["W_C"] * self._diminishing_returns(row["minutes_per_week"]),
+            axis=1,
+        )
+        df["A_Contrib"] = df["W_A"] * df["minutes_per_week"]
+        C_Score = float(df["C_Contrib"].sum())
+        A_Risk = float(df["A_Contrib"].sum())
+        # 2. D-Index (diversity via inverse Herfindahl)
+        if total_mins > 0:
+            shares = df["minutes_per_week"] / total_mins
+            H = float((shares**2).sum())
+            D_Index = float(1.0 / H) if H > 0 else 0.0
+        else:
+            D_Index = 0.0
+        # 3. Per-platform cultural efficiency (C-contribution per minute)
+        df["Cultural_Efficiency"] = df["C_Contrib"] / df["minutes_per_week"].replace(
+            0.0, np.nan
+        )
+        eff_df = df.loc[
+            df["minutes_per_week"] > 0, ["platform_name", "Cultural_Efficiency"]
+        ].copy()
+        eff_df = eff_df.dropna().sort_values("Cultural_Efficiency", ascending=False)
+        per_platform_eff = eff_df.to_dict("records")
+        # 4. Weighted average variety, if provided
+        avg_variety = None
+        if "variety_score" in df.columns and total_mins > 0:
+            if df["variety_score"].notna().any():
+                avg_variety = float(
+                    np.average(
+                        df["variety_score"].fillna(0.0),
+                        weights=df["minutes_per_week"],
+                    )
+                )
+        return {
+            "C_Score": C_Score,
+            "A_Risk": A_Risk,
+            "D_Index": D_Index,
+            "Avg_Variety": avg_variety,
+            "Satisfaction": satisfaction,
+            "FOMO": fomo,
+            "Per_Platform_Efficiency": per_platform_eff,
+        }