dotoking commited on
Commit
363ce76
·
verified ·
1 Parent(s): af57a2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -41
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import gradio as gr
2
  import pandas as pd
 
 
3
  from cear_model import CEARModel
4
 
5
  # Instantiate the core model once
@@ -63,26 +65,34 @@ def analyze_user_data(input_table):
63
 
64
  input_table: list of lists from gr.Dataframe, e.g.
65
  [
66
- ["tiktok", 240],
67
- ["instagram", 180],
68
  ...
69
  ]
 
70
  Returns:
71
  summary_markdown (str), efficiency_dataframe (pd.DataFrame)
72
  """
73
-
74
- # 1. Basic validation: something must be entered
75
  if not input_table:
76
  return "Please enter at least one platform and its weekly minutes.", pd.DataFrame()
77
 
78
- # Convert raw table to DataFrame with fixed columns
79
- df = pd.DataFrame(input_table, columns=["platform_name", "minutes_per_week"])
 
 
 
 
 
 
 
 
80
 
81
- # Handle types and trim garbage rows
82
  df["platform_name"] = df["platform_name"].astype(str)
83
  df["minutes_per_week"] = pd.to_numeric(df["minutes_per_week"], errors="coerce")
 
84
 
85
- # Drop rows where both are missing/empty
86
  df = df.dropna(how="all")
87
  if df.empty:
88
  return "Please provide at least one platform with some minutes.", pd.DataFrame()
@@ -90,21 +100,35 @@ def analyze_user_data(input_table):
90
  # Normalize names and minutes
91
  df["platform_name"] = df["platform_name"].apply(normalize_platform_name)
92
  df["minutes_per_week"] = df["minutes_per_week"].fillna(0).clip(lower=0)
 
93
 
94
- # Drop rows with blank platform names
95
  df = df[df["platform_name"] != ""]
96
  if df.empty:
97
  return "Please provide at least one platform with some minutes.", pd.DataFrame()
98
 
99
- # 2. Call the core CEAR model
100
- raw_scores = cear_analyzer.calculate_scores(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  c = float(raw_scores.get("C_Score", 0.0))
103
  a = float(raw_scores.get("A_Risk", 0.0))
104
  d = float(raw_scores.get("D_Index", 0.0))
105
- per_eff = raw_scores.get("Per_Platform_Efficiency", {})
106
 
107
- # 3. Build a human-readable profile based on score bands
108
  if c >= 70 and a >= 70:
109
  profile = (
110
  "You are highly plugged into online culture, but that comes with high "
@@ -126,28 +150,65 @@ def analyze_user_data(input_table):
126
  "You are either deliberately detached or under-invested in highly trend-dense platforms."
127
  )
128
 
129
- summary = f"""
130
- ## 📊 CEAR Analysis Summary
131
-
132
- - **Cultural Connectedness Score (C-Score):** **{c:.2f}**
133
- - **Algorithmic Risk Score (A-Risk):** **{a:.2f}**
134
- - **Platform Diversity Index (D-Index):** **{d:.2f}**
135
-
136
- ### 📝 Interpretation
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- {profile}
 
 
 
 
 
 
139
 
140
- The C-Score is based on a logarithmic transform of your weekly minutes, which bakes in diminishing returns as time increases.
141
- A-Risk reflects your raw time investment and how concentrated it is on a small set of high-weight platforms.
142
- D-Index captures how spread out your usage is across different platforms.
143
- """.strip()
144
 
145
- # 4. Turn per-platform efficiency into a tidy table
146
- if isinstance(per_eff, dict) and per_eff:
147
- eff_df = pd.DataFrame(
148
- sorted(per_eff.items(), key=lambda kv: kv[1], reverse=True),
149
- columns=["platform", "efficiency_score"],
150
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  else:
152
  eff_df = pd.DataFrame(columns=["platform", "efficiency_score"])
153
 
@@ -159,16 +220,16 @@ D-Index captures how spread out your usage is across different platforms.
159
  demo = gr.Interface(
160
  fn=analyze_user_data,
161
  inputs=gr.Dataframe(
162
- headers=["platform_name", "minutes_per_week"],
163
  row_count=5,
164
- col_count=(2, "fixed"),
165
  label="Weekly screen time (by platform)",
166
  value=[
167
- ["tiktok", 240],
168
- ["instagram", 180],
169
- ["youtube", 120],
170
- ["twitter", 60],
171
- ["reddit", 90],
172
  ],
173
  ),
174
  outputs=[
@@ -177,8 +238,8 @@ demo = gr.Interface(
177
  ],
178
  title="CEAR – Cultural Exposure & Algorithmic Risk Analyzer",
179
  description=(
180
- "Enter your weekly screen time per platform to estimate your cultural connectedness, "
181
- "algorithmic risk, and per-platform efficiency."
182
  ),
183
  )
184
 
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import numpy as np
4
+
5
  from cear_model import CEARModel
6
 
7
  # Instantiate the core model once
 
65
 
66
  input_table: list of lists from gr.Dataframe, e.g.
67
  [
68
+ ["tiktok", 240, 5],
69
+ ["instagram", 180, 6],
70
  ...
71
  ]
72
+
73
  Returns:
74
  summary_markdown (str), efficiency_dataframe (pd.DataFrame)
75
  """
 
 
76
  if not input_table:
77
  return "Please enter at least one platform and its weekly minutes.", pd.DataFrame()
78
 
79
+ # Convert raw table to DataFrame. Support both 2- and 3-column input
80
+ df = pd.DataFrame(input_table)
81
+
82
+ if df.shape[1] == 2:
83
+ df.columns = ["platform_name", "minutes_per_week"]
84
+ df["variety_score"] = np.nan
85
+ else:
86
+ # Assume 3 columns: platform, minutes, variety
87
+ df = df.iloc[:, :3] # ignore any extra accidental columns
88
+ df.columns = ["platform_name", "minutes_per_week", "variety_score"]
89
 
90
+ # Basic cleaning
91
  df["platform_name"] = df["platform_name"].astype(str)
92
  df["minutes_per_week"] = pd.to_numeric(df["minutes_per_week"], errors="coerce")
93
+ df["variety_score"] = pd.to_numeric(df["variety_score"], errors="coerce")
94
 
95
+ # Drop fully empty rows
96
  df = df.dropna(how="all")
97
  if df.empty:
98
  return "Please provide at least one platform with some minutes.", pd.DataFrame()
 
100
  # Normalize names and minutes
101
  df["platform_name"] = df["platform_name"].apply(normalize_platform_name)
102
  df["minutes_per_week"] = df["minutes_per_week"].fillna(0).clip(lower=0)
103
+ df["variety_score"] = df["variety_score"].clip(lower=0, upper=10)
104
 
105
+ # Drop rows with blank names
106
  df = df[df["platform_name"] != ""]
107
  if df.empty:
108
  return "Please provide at least one platform with some minutes.", pd.DataFrame()
109
 
110
+ # Compute minutes-weighted average variety (if any variety data present)
111
+ total_minutes = df["minutes_per_week"].sum()
112
+ if total_minutes > 0 and df["variety_score"].notna().any():
113
+ avg_variety = float(
114
+ np.average(
115
+ df["variety_score"].fillna(0),
116
+ weights=df["minutes_per_week"]
117
+ )
118
+ )
119
+ else:
120
+ avg_variety = None
121
+
122
+ # Call the core CEAR model using only the columns it expects
123
+ df_for_model = df[["platform_name", "minutes_per_week"]].copy()
124
+ raw_scores = cear_analyzer.calculate_scores(df_for_model)
125
 
126
  c = float(raw_scores.get("C_Score", 0.0))
127
  a = float(raw_scores.get("A_Risk", 0.0))
128
  d = float(raw_scores.get("D_Index", 0.0))
129
+ per_eff = raw_scores.get("Per_Platform_Efficiency", [])
130
 
131
+ # Profile based on C and A
132
  if c >= 70 and a >= 70:
133
  profile = (
134
  "You are highly plugged into online culture, but that comes with high "
 
150
  "You are either deliberately detached or under-invested in highly trend-dense platforms."
151
  )
152
 
153
+ # Variety interpretation snippet
154
+ if avg_variety is None:
155
+ variety_text = (
156
+ "You did not provide variety ratings, so this analysis focuses only on time and platform mix."
157
+ )
158
+ elif avg_variety < 4:
159
+ variety_text = (
160
+ f"Your average variety rating is **{avg_variety:.1f} / 10**, which suggests that your feeds feel "
161
+ "quite repetitive. You may be seeing similar content types despite the time you invest."
162
+ )
163
+ elif avg_variety > 7:
164
+ variety_text = (
165
+ f"Your average variety rating is **{avg_variety:.1f} / 10**, which suggests that you see a wide range "
166
+ "of topics and styles. This can broaden your cultural exposure and reduce some perceived stagnation."
167
+ )
168
+ else:
169
+ variety_text = (
170
+ f"Your average variety rating is **{avg_variety:.1f} / 10**, indicating a moderate mix of content types "
171
+ "without being extremely narrow or extremely diverse."
172
+ )
173
 
174
+ summary_lines = [
175
+ "## 📊 CEAR Analysis Summary",
176
+ "",
177
+ f"- **Cultural Connectedness Score (C-Score):** **{c:.2f}**",
178
+ f"- **Algorithmic Risk Score (A-Risk):** **{a:.2f}**",
179
+ f"- **Platform Diversity Index (D-Index):** **{d:.2f}**",
180
+ ]
181
 
182
+ if avg_variety is not None:
183
+ summary_lines.append(f"- **Average Variety Rating (0–10):** **{avg_variety:.2f}**")
 
 
184
 
185
+ summary_lines.extend(
186
+ [
187
+ "",
188
+ "### 📝 Interpretation",
189
+ "",
190
+ profile,
191
+ "",
192
+ variety_text,
193
+ "",
194
+ "The C-Score is based on a logarithmic transform of your weekly minutes, encoding diminishing "
195
+ "returns as time increases. A-Risk reflects your raw time investment and how concentrated it is on "
196
+ "a small set of high-weight platforms. D-Index captures how many platforms you use in a meaningful way "
197
+ "(higher values mean your time is spread across more platforms).",
198
+ ]
199
+ )
200
+
201
+ summary = "\n".join(summary_lines).strip()
202
+
203
+ # Turn per-platform efficiency into a tidy table
204
+ if isinstance(per_eff, list) and per_eff:
205
+ eff_df = pd.DataFrame(per_eff)
206
+ # Expect columns ['platform_name', 'Cultural_Efficiency']
207
+ if "platform_name" in eff_df.columns:
208
+ eff_df = eff_df.rename(
209
+ columns={"platform_name": "platform", "Cultural_Efficiency": "efficiency_score"}
210
+ )
211
+ eff_df = eff_df.sort_values("efficiency_score", ascending=False)
212
  else:
213
  eff_df = pd.DataFrame(columns=["platform", "efficiency_score"])
214
 
 
220
  demo = gr.Interface(
221
  fn=analyze_user_data,
222
  inputs=gr.Dataframe(
223
+ headers=["platform_name", "minutes_per_week", "variety_score (0–10, optional)"],
224
  row_count=5,
225
+ col_count=(3, "fixed"),
226
  label="Weekly screen time (by platform)",
227
  value=[
228
+ ["tiktok", 240, 4],
229
+ ["instagram", 180, 5],
230
+ ["youtube", 120, 7],
231
+ ["twitter", 60, 6],
232
+ ["reddit", 90, 8],
233
  ],
234
  ),
235
  outputs=[
 
238
  ],
239
  title="CEAR – Cultural Exposure & Algorithmic Risk Analyzer",
240
  description=(
241
+ "Enter your weekly screen time per platform (and optional variety ratings) to estimate your "
242
+ "cultural connectedness, algorithmic risk, and per-platform efficiency."
243
  ),
244
  )
245