dotoking commited on
Commit
fc19dc9
·
verified ·
1 Parent(s): ba3bc8e

Update cear_model.py

Browse files
Files changed (1) hide show
  1. cear_model.py +78 -23
cear_model.py CHANGED
@@ -5,24 +5,60 @@ import json
5
  import numpy as np
6
  import pandas as pd
7
 
8
- # --- 1. Load platform weights from JSON ----
9
- # Expected JSON shape:
10
- # {
11
- # "tiktok": {"W_C": 1.0, "W_A": 1.0},
12
- # "instagram":{"W_C": 0.8, "W_A": 0.9},
13
- # ...
14
- # }
15
 
16
- PLATFORM_WEIGHTS = {}
17
-
18
- try:
 
 
 
 
 
19
  script_dir = os.path.dirname(os.path.abspath(__file__))
20
  json_path = os.path.join(script_dir, "platform_weights.json")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  with open(json_path, "r", encoding="utf-8") as f:
22
- PLATFORM_WEIGHTS = json.load(f)
23
- except FileNotFoundError:
24
- print("FATAL ERROR: platform_weights.json not found! Using empty weights.")
25
- PLATFORM_WEIGHTS = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  class CEARModel:
@@ -30,14 +66,13 @@ class CEARModel:
30
  Core CEAR scoring model.
31
 
32
  Inputs:
33
- user_df: DataFrame with at least:
34
  - 'platform_name': str
35
  - 'minutes_per_week': numeric
36
- Optionally:
37
- - 'variety_score': numeric (0–10)
38
 
39
  satisfaction: optional float (0–10)
40
- fomo: optional float (0–10)
41
 
42
  Returns dict:
43
  {
@@ -56,6 +91,8 @@ class CEARModel:
56
  def __init__(self, weights: dict | None = None) -> None:
57
  self.weights = weights if weights is not None else PLATFORM_WEIGHTS
58
 
 
 
59
  @staticmethod
60
  def _diminishing_returns(minutes: float) -> float:
61
  """Log10-based diminishing returns on minutes."""
@@ -67,15 +104,20 @@ class CEARModel:
67
  return pd.DataFrame(columns=["platform_name", "W_C", "W_A"])
68
 
69
  w_df = pd.DataFrame.from_dict(self.weights, orient="index")
 
70
  w_df.index.name = "platform_name"
71
  w_df = w_df.reset_index()
72
- # Ensure both columns exist
 
73
  if "W_C" not in w_df.columns:
74
  w_df["W_C"] = 0.0
75
  if "W_A" not in w_df.columns:
76
  w_df["W_A"] = 0.0
 
77
  return w_df[["platform_name", "W_C", "W_A"]]
78
 
 
 
79
  def calculate_scores(
80
  self,
81
  user_df: pd.DataFrame,
@@ -121,7 +163,7 @@ class CEARModel:
121
  C_Score = float(df["C_Contrib"].sum())
122
  A_Risk = float(df["A_Contrib"].sum())
123
 
124
- # 2. D-Index (diversity via inverse Herfindahl)
125
  if total_mins > 0:
126
  shares = df["minutes_per_week"] / total_mins
127
  H = float((shares**2).sum())
@@ -129,15 +171,28 @@ class CEARModel:
129
  else:
130
  D_Index = 0.0
131
 
132
- # 3. Per-platform cultural efficiency (C-contribution per minute)
133
  df["Cultural_Efficiency"] = df["C_Contrib"] / df["minutes_per_week"].replace(
134
  0.0, np.nan
135
  )
136
  eff_df = df.loc[
137
  df["minutes_per_week"] > 0, ["platform_name", "Cultural_Efficiency"]
138
  ].copy()
139
- eff_df = eff_df.dropna().sort_values("Cultural_Efficiency", ascending=False)
140
- per_platform_eff = eff_df.to_dict("records")
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  # 4. Weighted average variety, if provided
143
  avg_variety = None
 
5
  import numpy as np
6
  import pandas as pd
7
 
8
+ # ---------------- Weight loading ---------------- #
 
 
 
 
 
 
9
 
10
+ def _load_platform_weights() -> dict:
11
+ """
12
+ Load platform weights from platform_weights.json.
13
+ Supports multiple key schemes:
14
+ - W_C / W_A
15
+ - trend_weight / risk_weight
16
+ - C_weight / A_weight
17
+ """
18
  script_dir = os.path.dirname(os.path.abspath(__file__))
19
  json_path = os.path.join(script_dir, "platform_weights.json")
20
+
21
+ if not os.path.exists(json_path):
22
+ print("WARNING: platform_weights.json not found. Using default weights.")
23
+ # Sensible defaults if file missing
24
+ return {
25
+ "tiktok": {"W_C": 1.00, "W_A": 1.00},
26
+ "instagram": {"W_C": 0.80, "W_A": 0.90},
27
+ "youtube": {"W_C": 0.60, "W_A": 0.60},
28
+ "twitter": {"W_C": 0.70, "W_A": 0.80},
29
+ "reddit": {"W_C": 0.50, "W_A": 0.50},
30
+ "facebook": {"W_C": 0.30, "W_A": 0.40},
31
+ "other": {"W_C": 0.20, "W_A": 0.30},
32
+ }
33
+
34
  with open(json_path, "r", encoding="utf-8") as f:
35
+ raw = json.load(f)
36
+
37
+ # Normalize key names into W_C and W_A
38
+ norm = {}
39
+ for platform, vals in raw.items():
40
+ if not isinstance(vals, dict):
41
+ vals = {}
42
+ w_c = (
43
+ vals.get("W_C")
44
+ or vals.get("c_weight")
45
+ or vals.get("C_weight")
46
+ or vals.get("trend_weight")
47
+ or 0.0
48
+ )
49
+ w_a = (
50
+ vals.get("W_A")
51
+ or vals.get("a_weight")
52
+ or vals.get("A_weight")
53
+ or vals.get("risk_weight")
54
+ or 0.0
55
+ )
56
+ norm[platform.lower()] = {"W_C": float(w_c), "W_A": float(w_a)}
57
+
58
+ return norm
59
+
60
+
61
+ PLATFORM_WEIGHTS = _load_platform_weights()
62
 
63
 
64
  class CEARModel:
 
66
  Core CEAR scoring model.
67
 
68
  Inputs:
69
+ user_df: DataFrame with columns:
70
  - 'platform_name': str
71
  - 'minutes_per_week': numeric
72
+ - optional 'variety_score': numeric (0–10)
 
73
 
74
  satisfaction: optional float (0–10)
75
+ fomo: optional float (0–10)
76
 
77
  Returns dict:
78
  {
 
91
  def __init__(self, weights: dict | None = None) -> None:
92
  self.weights = weights if weights is not None else PLATFORM_WEIGHTS
93
 
94
+ # ---------- internals ---------- #
95
+
96
  @staticmethod
97
  def _diminishing_returns(minutes: float) -> float:
98
  """Log10-based diminishing returns on minutes."""
 
104
  return pd.DataFrame(columns=["platform_name", "W_C", "W_A"])
105
 
106
  w_df = pd.DataFrame.from_dict(self.weights, orient="index")
107
+ w_df.index = w_df.index.astype(str).str.lower()
108
  w_df.index.name = "platform_name"
109
  w_df = w_df.reset_index()
110
+
111
+ # Ensure W_C / W_A exist even if missing
112
  if "W_C" not in w_df.columns:
113
  w_df["W_C"] = 0.0
114
  if "W_A" not in w_df.columns:
115
  w_df["W_A"] = 0.0
116
+
117
  return w_df[["platform_name", "W_C", "W_A"]]
118
 
119
+ # ---------- public API ---------- #
120
+
121
  def calculate_scores(
122
  self,
123
  user_df: pd.DataFrame,
 
163
  C_Score = float(df["C_Contrib"].sum())
164
  A_Risk = float(df["A_Contrib"].sum())
165
 
166
+ # 2. D-Index (effective number of platforms via inverse Herfindahl)
167
  if total_mins > 0:
168
  shares = df["minutes_per_week"] / total_mins
169
  H = float((shares**2).sum())
 
171
  else:
172
  D_Index = 0.0
173
 
174
+ # 3. Per-platform cultural efficiency (scaled 0–100)
175
  df["Cultural_Efficiency"] = df["C_Contrib"] / df["minutes_per_week"].replace(
176
  0.0, np.nan
177
  )
178
  eff_df = df.loc[
179
  df["minutes_per_week"] > 0, ["platform_name", "Cultural_Efficiency"]
180
  ].copy()
181
+ eff_df = eff_df.dropna()
182
+
183
+ if not eff_df.empty:
184
+ max_ce = float(eff_df["Cultural_Efficiency"].max())
185
+ if max_ce > 0:
186
+ eff_df["Cultural_Efficiency"] = (
187
+ eff_df["Cultural_Efficiency"] / max_ce * 100.0
188
+ )
189
+ else:
190
+ eff_df["Cultural_Efficiency"] = 0.0
191
+
192
+ eff_df = eff_df.sort_values("Cultural_Efficiency", ascending=False)
193
+ per_platform_eff = eff_df.to_dict("records")
194
+ else:
195
+ per_platform_eff = []
196
 
197
  # 4. Weighted average variety, if provided
198
  avg_variety = None