dotoking commited on
Commit
be89f48
·
verified ·
1 Parent(s): 363ce76

Update cear_model.py

Browse files
Files changed (1) hide show
  1. cear_model.py +161 -69
cear_model.py CHANGED
@@ -1,69 +1,161 @@
1
- # cear_model.py
2
- import numpy as np
3
- import pandas as pd
4
- import json
5
- import os # Necessary for finding the JSON file
6
-
7
- # --- 1. Load PLATFORM_WEIGHTS variable from JSON ---
8
- PLATFORM_WEIGHTS = {} # Default value
9
-
10
- try:
11
- # Get the directory of the current script (cear_model.py)
12
- script_dir = os.path.dirname(os.path.abspath(__file__))
13
- json_path = os.path.join(script_dir, 'platform_weights.json')
14
-
15
- with open(json_path, 'r') as f:
16
- # Load the configuration data into the global variable
17
- PLATFORM_WEIGHTS = json.load(f)
18
-
19
- except FileNotFoundError:
20
- # This warning is useful for debugging if the file is missing
21
- print("FATAL ERROR: platform_weights.json not found! Using empty weights.")
22
- # The default empty {} dict is used if the file is missing
23
-
24
- # --- 2. Define the Model Class ---
25
- # The class can now safely reference the global PLATFORM_WEIGHTS variable
26
- class CEARModel:
27
- def __init__(self, weights=PLATFORM_WEIGHTS):
28
- # The weights dictionary is passed as a default parameter
29
- self.weights = weights
30
-
31
- def _diminishing_returns(self, minutes):
32
- # ... your method code ...
33
- return np.log10(minutes + 1)
34
- def calculate_scores(self, user_input_df: pd.DataFrame):
35
- # 1. Merge weights with user input
36
- df = user_input_df.merge(
37
- pd.DataFrame.from_dict(self.weights, orient='index'),
38
- left_on='platform_name',
39
- right_index=True,
40
- how='left'
41
- ).fillna(0) # Fills missing weights with 0 for platforms not in list
42
-
43
- total_mins = df['minutes_per_week'].sum()
44
-
45
- # 2. Calculate Core Scores
46
- df['C_Contrib'] = df.apply(lambda row: row['W_C'] * self._diminishing_returns(row['minutes_per_week']), axis=1)
47
- df['A_Contrib'] = df.apply(lambda row: row['W_A'] * row['minutes_per_week'], axis=1)
48
-
49
- C_Score = df['C_Contrib'].sum()
50
- A_Risk = df['A_Contrib'].sum()
51
-
52
- # 3. Calculate D-Index (Platform Diversity)
53
- df['Min_Share'] = df['minutes_per_week'] / total_mins
54
- D_Index = 1 / (df['Min_Share']**2).sum() if total_mins > 0 else 0
55
-
56
- # 4. Calculate Cultural Efficiency
57
- df['Cultural_Efficiency'] = df['C_Contrib'] / df['minutes_per_week'].replace(0, np.nan) # Avoid div by zero
58
-
59
- return {
60
- "C_Score": C_Score,
61
- "A_Risk": A_Risk,
62
- "D_Index": D_Index,
63
- "Per_Platform_Efficiency": df[['platform_name', 'Cultural_Efficiency']].dropna().to_dict('records')
64
- }
65
-
66
- # Example Usage:
67
- # user_data = pd.DataFrame([{'platform_name': 'TikTok', 'minutes_per_week': 300}, ...])
68
- # model = CEARModel()
69
- # model.calculate_scores(user_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # cear_model.py
2
+
3
+ import os
4
+ import json
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ # --- 1. Load platform weights from JSON ----
9
+ # Expected JSON shape:
10
+ # {
11
+ # "tiktok": {"W_C": 1.0, "W_A": 1.0},
12
+ # "instagram":{"W_C": 0.8, "W_A": 0.9},
13
+ # ...
14
+ # }
15
+
16
+ PLATFORM_WEIGHTS = {}
17
+
18
+ try:
19
+ script_dir = os.path.dirname(os.path.abspath(__file__))
20
+ json_path = os.path.join(script_dir, "platform_weights.json")
21
+ with open(json_path, "r", encoding="utf-8") as f:
22
+ PLATFORM_WEIGHTS = json.load(f)
23
+ except FileNotFoundError:
24
+ print("FATAL ERROR: platform_weights.json not found! Using empty weights.")
25
+ PLATFORM_WEIGHTS = {}
26
+
27
+
28
+ class CEARModel:
29
+ """
30
+ Core CEAR scoring model.
31
+
32
+ Inputs:
33
+ user_df: DataFrame with at least:
34
+ - 'platform_name': str
35
+ - 'minutes_per_week': numeric
36
+ Optionally:
37
+ - 'variety_score': numeric (0–10)
38
+
39
+ satisfaction: optional float (0–10)
40
+ fomo: optional float (0–10)
41
+
42
+ Returns dict:
43
+ {
44
+ "C_Score": float,
45
+ "A_Risk": float,
46
+ "D_Index": float,
47
+ "Avg_Variety": float | None,
48
+ "Satisfaction": float | None,
49
+ "FOMO": float | None,
50
+ "Per_Platform_Efficiency": [
51
+ {"platform_name": str, "Cultural_Efficiency": float}, ...
52
+ ]
53
+ }
54
+ """
55
+
56
+ def __init__(self, weights: dict | None = None) -> None:
57
+ self.weights = weights if weights is not None else PLATFORM_WEIGHTS
58
+
59
+ @staticmethod
60
+ def _diminishing_returns(minutes: float) -> float:
61
+ """Log10-based diminishing returns on minutes."""
62
+ minutes = max(float(minutes), 0.0)
63
+ return float(np.log10(minutes + 1.0))
64
+
65
+ def _weights_dataframe(self) -> pd.DataFrame:
66
+ if not self.weights:
67
+ return pd.DataFrame(columns=["platform_name", "W_C", "W_A"])
68
+
69
+ w_df = pd.DataFrame.from_dict(self.weights, orient="index")
70
+ w_df.index.name = "platform_name"
71
+ w_df = w_df.reset_index()
72
+ # Ensure both columns exist
73
+ if "W_C" not in w_df.columns:
74
+ w_df["W_C"] = 0.0
75
+ if "W_A" not in w_df.columns:
76
+ w_df["W_A"] = 0.0
77
+ return w_df[["platform_name", "W_C", "W_A"]]
78
+
79
+ def calculate_scores(
80
+ self,
81
+ user_df: pd.DataFrame,
82
+ satisfaction: float | None = None,
83
+ fomo: float | None = None,
84
+ ) -> dict:
85
+ if user_df is None or user_df.empty:
86
+ return {
87
+ "C_Score": 0.0,
88
+ "A_Risk": 0.0,
89
+ "D_Index": 0.0,
90
+ "Avg_Variety": None,
91
+ "Satisfaction": satisfaction,
92
+ "FOMO": fomo,
93
+ "Per_Platform_Efficiency": [],
94
+ }
95
+
96
+ df = user_df.copy()
97
+
98
+ # Normalize names and convert minutes
99
+ df["platform_name"] = (
100
+ df["platform_name"].astype(str).str.strip().str.lower()
101
+ )
102
+ df["minutes_per_week"] = pd.to_numeric(
103
+ df["minutes_per_week"], errors="coerce"
104
+ ).fillna(0.0)
105
+ df["minutes_per_week"] = df["minutes_per_week"].clip(lower=0.0)
106
+
107
+ # Attach weights
108
+ w_df = self._weights_dataframe()
109
+ df = df.merge(w_df, on="platform_name", how="left")
110
+ df[["W_C", "W_A"]] = df[["W_C", "W_A"]].fillna(0.0)
111
+
112
+ total_mins = float(df["minutes_per_week"].sum())
113
+
114
+ # 1. Core contributions
115
+ df["C_Contrib"] = df.apply(
116
+ lambda row: row["W_C"] * self._diminishing_returns(row["minutes_per_week"]),
117
+ axis=1,
118
+ )
119
+ df["A_Contrib"] = df["W_A"] * df["minutes_per_week"]
120
+
121
+ C_Score = float(df["C_Contrib"].sum())
122
+ A_Risk = float(df["A_Contrib"].sum())
123
+
124
+ # 2. D-Index (diversity via inverse Herfindahl)
125
+ if total_mins > 0:
126
+ shares = df["minutes_per_week"] / total_mins
127
+ H = float((shares**2).sum())
128
+ D_Index = float(1.0 / H) if H > 0 else 0.0
129
+ else:
130
+ D_Index = 0.0
131
+
132
+ # 3. Per-platform cultural efficiency (C-contribution per minute)
133
+ df["Cultural_Efficiency"] = df["C_Contrib"] / df["minutes_per_week"].replace(
134
+ 0.0, np.nan
135
+ )
136
+ eff_df = df.loc[
137
+ df["minutes_per_week"] > 0, ["platform_name", "Cultural_Efficiency"]
138
+ ].copy()
139
+ eff_df = eff_df.dropna().sort_values("Cultural_Efficiency", ascending=False)
140
+ per_platform_eff = eff_df.to_dict("records")
141
+
142
+ # 4. Weighted average variety, if provided
143
+ avg_variety = None
144
+ if "variety_score" in df.columns and total_mins > 0:
145
+ if df["variety_score"].notna().any():
146
+ avg_variety = float(
147
+ np.average(
148
+ df["variety_score"].fillna(0.0),
149
+ weights=df["minutes_per_week"],
150
+ )
151
+ )
152
+
153
+ return {
154
+ "C_Score": C_Score,
155
+ "A_Risk": A_Risk,
156
+ "D_Index": D_Index,
157
+ "Avg_Variety": avg_variety,
158
+ "Satisfaction": satisfaction,
159
+ "FOMO": fomo,
160
+ "Per_Platform_Efficiency": per_platform_eff,
161
+ }