mbecchis commited on
Commit
8848efa
·
verified ·
1 Parent(s): 9ac6052

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +406 -0
  2. catalog_scores.csv +52 -0
  3. countries.csv +72 -0
  4. gsheet_loader.py +41 -0
  5. languages.csv +61 -0
app.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from gsheet_loader import get_data
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import plotly.figure_factory as ff
6
+ import plotly.graph_objects as go
7
+ import datetime as dt
8
+
9
+ st.set_page_config(
10
+ page_title="Catalog Data Dashboard",
11
+ layout="wide",
12
+ page_icon="📊",
13
+ )
14
+
15
+ st.title("📊 Catalog Data Dashboard")
16
+ st.markdown(
17
+ """
18
+ This dashboard combines live [Google Sheets data](https://docs.google.com/spreadsheets/d/10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA) for:
19
+ - catalog onboarding
20
+ - metadata completeness
21
+ - mapping/scraping status
22
+ """
23
+ )
24
+
25
+ cat_onboarding_df, cat_metadata_df, cat_status_df = get_data()
26
+
27
+ tab0, tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Static Data", "Onboarding Status", "Metadata Completeness", "Mapping Status"])
28
+
29
+ # =========================================================================================================================
30
+ # Tab 0 - Overview
31
+ # =========================================================================================================================
32
+
33
+ with tab0:
34
+ st.header("Overiew")
35
+ if st.button("🔄 Refresh Data"):
36
+ st.cache_data.clear()
37
+ st.toast("Refreshing data...", icon="🔄")
38
+ st.rerun()
39
+
40
+ st.markdown("---")
41
+ st.subheader("Quick Data Preview")
42
+
43
+ col1, col2, col3 = st.columns(3)
44
+ with col1:
45
+ st.dataframe(cat_onboarding_df.head(5))
46
+ with col2:
47
+ st.dataframe(cat_metadata_df.head(5))
48
+ with col3:
49
+ st.dataframe(cat_status_df.head(5))
50
+
51
+ # =========================================================================================================================
52
+ # Tab 0 - Static stuff
53
+ # =========================================================================================================================
54
+
55
+ with tab1:
56
+ st.header("Static Data Preview")
57
+
58
+ full_countries_df = pd.read_csv('countries.csv')
59
+ full_languages_df = pd.read_csv('languages.csv')
60
+
61
+ # countries map
62
+ fig = px.choropleth(
63
+ full_countries_df,
64
+ locations="country_name",
65
+ locationmode="country names",
66
+ color="log_count",
67
+ color_continuous_scale="Purples",
68
+ hover_name="country_name",
69
+ hover_data={"count": True, "log_count": False},
70
+ projection="natural earth",
71
+ title="Programs' availabilities by Country (Log Scale)"
72
+ )
73
+
74
+ fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
75
+ fig.update_layout(
76
+ width=1400,
77
+ height=700,
78
+ margin=dict(l=0, r=0, t=100, b=0),
79
+ title_y=0.95
80
+ )
81
+
82
+ st.plotly_chart(fig, use_container_width=True)
83
+
84
+ # languages map
85
+ fig1 = px.choropleth(
86
+ full_languages_df,
87
+ locations="country_name",
88
+ locationmode="country names",
89
+ color="log_count",
90
+ color_continuous_scale="Purples",
91
+ hover_name="country_name",
92
+ hover_data={"count": True, "log_count": False},
93
+ projection="natural earth",
94
+ title="Programs by Languages (Log Scale)"
95
+ )
96
+
97
+ fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
98
+ fig1.update_layout(
99
+ width=1400,
100
+ height=700,
101
+ margin=dict(l=0, r=0, t=100, b=0),
102
+ title_y=0.95
103
+ )
104
+
105
+ st.plotly_chart(fig1, use_container_width=True)
106
+
107
+
108
+ # Completeness evaluation
109
+ catalog_scores = pd.read_csv("catalog_scores.csv")
110
+ colorscale = [
111
+ [0.0, "#ffffff"],
112
+ [0.1, "#dcd6f7"],
113
+ [0.3, "#a29bfe"],
114
+ [0.6, "#6c5ce7"],
115
+ [1.0, "#341f97"]
116
+ ]
117
+
118
+ fig_completeness = px.bar(
119
+ catalog_scores,
120
+ x="Total",
121
+ y="Catalog",
122
+ orientation="h",
123
+ color="Total",
124
+ color_continuous_scale=colorscale,
125
+ title="Catalog Metadata Completeness Score",
126
+ )
127
+ fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000)
128
+ st.plotly_chart(fig_completeness, use_container_width=True)
129
+
130
+
131
+ # ### completeness score broken down
132
+ subcols = ["movie", "show", "season", "episode", "sport"]
133
+
134
+ # Compute sum of raw subscores
135
+ catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1)
136
+
137
+ # Build the figure
138
+ fig_completeness2 = go.Figure()
139
+
140
+ for col in subcols:
141
+
142
+ # normalized height of this bar segment
143
+ norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"]
144
+
145
+ fig_completeness2.add_trace(
146
+ go.Bar(
147
+ y=catalog_scores["Catalog"],
148
+ x=norm_vals, # BAR SIZE = normalized values
149
+ name=col.capitalize(),
150
+ orientation="h",
151
+ customdata=catalog_scores[col], # RAW values for hover
152
+ hovertemplate=(
153
+ "<b>%{y}</b><br>" +
154
+ f"{col.capitalize()}: <b>%{{customdata}}</b><br>" + # RAW value
155
+ "Normalized: %{x:.2f}<extra></extra>"
156
+ )
157
+ )
158
+ )
159
+
160
+ fig_completeness2.update_layout(
161
+ barmode="stack",
162
+ title="Subscore Contribution per Catalog (Scaled to Total Score)",
163
+ xaxis_title="Total Score",
164
+ template="plotly_dark",
165
+ height=1200,
166
+ yaxis={'categoryorder':'total ascending'}
167
+ )
168
+
169
+ st.plotly_chart(fig_completeness2, use_container_width=True)
170
+
171
+
172
+ #scatter plot
173
+ fig_scatter = px.scatter(
174
+ catalog_scores,
175
+ x="Total",
176
+ y="Number of programs",
177
+ size="Number of programs",
178
+ color="Total",
179
+ hover_name="Catalog",
180
+ color_continuous_scale="Viridis",
181
+ size_max=50
182
+ )
183
+
184
+ st.plotly_chart(fig_scatter, use_container_width=True)
185
+
186
+
187
+
188
+ # =========================================================================================================================
189
+ # Tab 2 - Onboarding sheet
190
+ # =========================================================================================================================
191
+
192
+ with tab2:
193
+ st.header("Catalog Onboarding Status")
194
+
195
+ # Convert onboarding date to datetime (e.g., 21/11 → 2025-11-21)
196
+ cat_onboarding_df["Onboarding date"] = pd.to_datetime(
197
+ cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce"
198
+ )
199
+ cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply(
200
+ lambda d: d.replace(year=2025) if pd.notna(d) else d
201
+ )
202
+
203
+ # Map textual months to end-of-month dates
204
+ month_map = {
205
+ "November 2025": dt.datetime(2025, 11, 30),
206
+ "December 2025": dt.datetime(2025, 12, 31),
207
+ "January 2026": dt.datetime(2026, 1, 31),
208
+ "February 2026": dt.datetime(2026, 2, 28),
209
+ "March 2026": dt.datetime(2026, 3, 31),
210
+ "April 2026": dt.datetime(2026, 4, 30),
211
+ "TBD": None,
212
+ }
213
+ cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map)
214
+
215
+ # Drop missing
216
+ timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"])
217
+
218
+ fig_timeline = px.timeline(
219
+ timeline_df,
220
+ x_start="Onboarding date",
221
+ x_end="Go live parsed",
222
+ y="NAME",
223
+ color="Onboarding Status",
224
+ hover_data=["Client", "Priority"],
225
+ title="Onboarding → Go-Live Timeline",
226
+ )
227
+ fig_timeline.update_yaxes(autorange="reversed")
228
+
229
+ st.plotly_chart(fig_timeline, use_container_width=True)
230
+
231
+ # bar chart 1
232
+ summary = (
233
+ cat_onboarding_df.groupby(["Client", "Onboarding Status"])
234
+ .size()
235
+ .reset_index(name="Count")
236
+ )
237
+
238
+ fig_client = px.bar(
239
+ summary,
240
+ x="Client",
241
+ y="Count",
242
+ color="Onboarding Status",
243
+ text_auto=True,
244
+ title="Catalogs per Client (by Onboarding Status)",
245
+ )
246
+ fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
247
+
248
+ st.plotly_chart(fig_client, use_container_width=True)
249
+
250
+ # bar chart 2
251
+ summary = (
252
+ cat_onboarding_df.groupby(["Client", "Priority"])
253
+ .size()
254
+ .reset_index(name="Count")
255
+ )
256
+
257
+ fig_client1 = px.bar(
258
+ summary,
259
+ x="Client",
260
+ y="Count",
261
+ color="Priority",
262
+ text_auto=True,
263
+ title="Catalogs per Client (by Priority)",
264
+ )
265
+ fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
266
+
267
+ st.plotly_chart(fig_client1, use_container_width=True)
268
+
269
+ # bar chart 3
270
+
271
+ summary = (
272
+ cat_onboarding_df.groupby(["Onboarding Status", "Priority"])
273
+ .size()
274
+ .reset_index(name="Count")
275
+ )
276
+
277
+ fig_client2 = px.bar(
278
+ summary,
279
+ x="Onboarding Status",
280
+ y="Count",
281
+ color="Priority",
282
+ text_auto=True,
283
+ title="Catalogs per Onboarding Status (by Priority)",
284
+ )
285
+ fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count")
286
+
287
+ st.plotly_chart(fig_client2, use_container_width=True)
288
+
289
+ # =========================================================================================================================
290
+ # Tab 3 - Metadata completeness
291
+ # =========================================================================================================================
292
+
293
+ with tab3:
294
+ st.header("Catalog Metadata Completeness")
295
+
296
+ cat_df = cat_metadata_df.copy()
297
+ meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]]
298
+
299
+ score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0}
300
+
301
+ cat_df_numeric = cat_df.copy()
302
+ cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map)
303
+
304
+ # force conversion to numeric (anything else becomes NaN)
305
+ cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce")
306
+
307
+ cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1)
308
+ cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False)
309
+
310
+ #graph 1
311
+ fig_completeness = px.bar(
312
+ cat_df_numeric_sorted,
313
+ x="Completeness Score",
314
+ y="Catalog name",
315
+ orientation="h",
316
+ color="Completeness Score",
317
+ color_continuous_scale="Greens",
318
+ title="Catalog Metadata Completeness Score",
319
+ )
320
+ fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'})
321
+
322
+ st.plotly_chart(fig_completeness, use_container_width=True)
323
+
324
+ # graph 2
325
+ coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index()
326
+ coverage.columns = ["Metadata Field", "Average Score"]
327
+
328
+ fig_field_coverage = px.bar(
329
+ coverage,
330
+ x="Average Score",
331
+ y="Metadata Field",
332
+ orientation="h",
333
+ color="Average Score",
334
+ color_continuous_scale="Blues",
335
+ title="Metadata Field Coverage Across All Catalogs",
336
+ )
337
+ fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'})
338
+
339
+ st.plotly_chart(fig_field_coverage, use_container_width=True)
340
+
341
+ # heatmap 1
342
+ # Prepare data
343
+ z = cat_df_numeric[meta_cols].astype(float).to_numpy()
344
+ x = list(meta_cols)
345
+ y = list(cat_df_numeric["Catalog name"].astype(str))
346
+
347
+ # Build the heatmap (no annotation_text)
348
+ fig_heatmap = ff.create_annotated_heatmap(
349
+ z=z,
350
+ x=x,
351
+ y=y,
352
+ showscale=True,
353
+ colorscale=[
354
+ [0.0, "rgb(255,77,77)"], # red for 0 (No)
355
+ [0.5, "rgb(255,204,0)"], # yellow for 0.5 (Some)
356
+ [1.0, "rgb(0,204,102)"] # green for 1 (Yes)
357
+ ],
358
+ annotation_text=None # removes numbers
359
+ )
360
+
361
+ # Layout adjustments
362
+ fig_heatmap.update_layout(
363
+ title="Metadata Completeness Heatmap (Catalog vs Field)",
364
+ xaxis_title="Metadata Field",
365
+ yaxis_title="Catalog Name",
366
+ width=1600, # make it wide
367
+ height=1000, # make it tall so names fit
368
+ margin=dict(l=200, r=50, t=80, b=150), # spacing for labels
369
+ )
370
+
371
+ # Tweak label angles for readability
372
+ fig_heatmap.update_xaxes(tickangle=-45)
373
+ fig_heatmap.update_yaxes(automargin=True)
374
+
375
+ st.plotly_chart(fig_heatmap, use_container_width=True)
376
+
377
+
378
+ # heatmap 2
379
+
380
+ fig_heatmap1 = px.imshow(
381
+ cat_df_numeric[meta_cols],
382
+ labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"),
383
+ x=meta_cols,
384
+ y=cat_df_numeric["Catalog name"],
385
+ color_continuous_scale=[
386
+ [0.0, "rgb(255,77,77)"],
387
+ [0.5, "rgb(255,204,0)"],
388
+ [1.0, "rgb(0,204,102)"]
389
+ ],
390
+ )
391
+
392
+ fig_heatmap1.update_layout(
393
+ title="Metadata Completeness Heatmap (Catalog vs Field)",
394
+ width=1600,
395
+ height=1000,
396
+ margin=dict(l=200, r=50, t=80, b=150),
397
+ )
398
+ fig_heatmap1.update_xaxes(tickangle=-45)
399
+
400
+ st.plotly_chart(fig_heatmap1, use_container_width=True)
401
+
402
+
403
+
404
+ with tab4:
405
+ st.header("Catalog Mapping status")
406
+
catalog_scores.csv ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,Catalog,Total,Number of programs,movie,show,season,episode,sport
2
+ 0,Netflix UK,78.37321487958448,53523,87.58,85.41,69.85,77.91,0.0
3
+ 1,Channel 5 UK,79.85897394440644,16081,77.0,76.99,72.99,81.0,0.0
4
+ 2,All Channel 4,81.06291694583658,37313,80.18,82.84,70.0,82.0,0.0
5
+ 3,ITVX UK,79.82202669166713,36266,83.99,81.78,68.0,80.81,0.0
6
+ 4,BBC iPlayer UK,79.5271696482445,61321,85.07,84.98,70.83,79.64,84.0
7
+ 5,Netflix Norway,76.15902045350585,45909,85.41,82.38,67.23,75.29,0.0
8
+ 6,SF Anytime Denmark,81.49234045053869,25525,84.84,84.38,72.86,75.91,0.0
9
+ 7,Netflix US,76.42054976420948,56406,85.38,82.93,67.97,76.01,0.0
10
+ 8,VG Plus Norway,78.30014328168117,9422,0.0,73.67,62.88,76.78,81.85
11
+ 9,Apple TV UK,83.0935448804564,213144,88.53,81.15,67.94,81.0,81.62
12
+ 10,SkyShowtime Norway,76.42113719636315,14738,81.84,77.55,73.0,76.0,0.0
13
+ 11,Filmoteket,86.29287764350454,2648,86.98,73.0,63.0,75.36,0.0
14
+ 12,MTV Katsomo,81.55067180179465,25186,89.06,85.32,74.42,81.59,81.28
15
+ 13,Pathè thuis Netherlands,85.45,6578,85.45,0.0,0.0,0.0,0.0
16
+ 14,Chili,93.06,12973,93.06,0.0,0.0,0.0,0.0
17
+ 15,Maxdome,79.75177596664139,42208,86.58,70.0,83.46,75.98,0.0
18
+ 16,Mejane Netherlands,84.48,4185,84.48,0.0,0.0,0.0,0.0
19
+ 17,Play Suisse,81.25779218148672,4937,84.28,84.73,69.99,79.0,0.0
20
+ 18,Britbox API,81.26944713870029,5155,90.0,89.0,79.0,81.0,0.0
21
+ 19,TV4 Play,85.69972552009584,18362,90.33,83.57,75.06,86.21,81.0
22
+ 20,Amazon Prime Sweden,91.68148708616673,31323,94.84,88.58,83.74,91.84,0.0
23
+ 21,Disney Global,90.0467131120896,67678,97.3,95.12,84.32,89.75,0.0
24
+ 22,TV 2 Play Norway,80.55460599334073,901,0.0,81.59,72.78,81.58,77.62
25
+ 23,SVT Sweden,82.10870164677989,38803,88.19,82.69,76.2,82.26,0.0
26
+ 24,Go Net TV,79.85426,500,80.69,79.32,0.0,0.0,75.09
27
+ 25,HBO Max Global,92.21603897591963,109093,95.8,92.63,80.89,92.88,0.0
28
+ 26,Canal plus,85.27148061104583,6808,92.64,87.93,83.07,84.7,83.49
29
+ 27,Rakuten API V2,88.98855841914173,33602,91.0,83.13,81.13,84.41,0.0
30
+ 28,Plex US,87.45695624312563,104555,92.84,91.22,83.13,85.25,0.0
31
+ 29,Viaplay Combined,88.41753728304577,39294,93.96,91.03,70.0,87.98,83.97
32
+ 30,Joyn DE,84.60275700046458,94708,87.06,84.79,79.17,84.82,0.0
33
+ 31,HBO Max Finland,91.38097137014316,35208,95.87,93.08,78.28,92.26,0.0
34
+ 32,Paramount Plus US,76.82886449111844,34566,87.52,81.93,68.0,76.96,76.12
35
+ 33,SF Anytime Norway,81.9365097173145,22640,82.0,81.0,81.85,81.84,0.0
36
+ 34,Apple TV AU,86.7086645785877,3512,94.84,91.62,76.95,87.0,95.33
37
+ 35,Amazon Prime South Africa,95.56407676425043,54244,97.08,95.51,90.63,95.65,0.0
38
+ 36,Paramount Plus UK,77.00172401001576,10783,88.86,81.05,69.99,76.99,0.0
39
+ 37,Apple TV+ US & CA,86.7542266695671,4597,94.81,91.55,76.78,87.0,95.33
40
+ 38,CBS,83.2792908047975,5753,0.0,77.71,70.0,84.0,0.0
41
+ 39,MGM Plus,81.89496840896038,1741,83.0,77.0,69.97,81.0,0.0
42
+ 40,Food network,78.14298572996707,5466,0.0,85.0,68.02,79.0,0.0
43
+ 41,Shudder,80.55810945273632,603,82.99,71.0,70.0,76.66,0.0
44
+ 42,Fox One,83.0974113225276,4257,88.39,86.86,81.59,82.74,84.2
45
+ 43,Hulu Plus,85.67620808839351,144083,90.36,89.39,69.99,86.54,89.77
46
+ 44,Apple TV+ US TVOD,85.97134842657486,364269,92.41,85.74,74.29,85.7,0.0
47
+ 45,NBC,76.50201542020046,19455,81.76,76.95,68.0,76.65,76.01
48
+ 46,Criterion Channel,84.56354499151104,2945,84.84,73.0,66.0,80.35,0.0
49
+ 47,Comedy Central,78.38826711749788,1183,89.0,84.88,70.0,77.88,0.0
50
+ 48,History,83.23552386412499,6977,80.35,81.83,69.98,84.32,0.0
51
+ 49,CW TV,84.78429994700583,9435,91.0,82.91,70.0,84.99,0.0
52
+ 50,Pluto TV - Scraping,78.29156520889642,153545,82.28,80.89,75.0,78.0,79.25
countries.csv ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,zone,count,country_name,log_count
2
+ 0,US,727975,United States,5.862117061676528
3
+ 1,GB,510425,United Kingdom,5.707932788301151
4
+ 2,CA,300189,Canada,5.477396220828122
5
+ 3,SE,222351,Sweden,5.34704104017154
6
+ 4,FI,202977,Finland,5.307448968965579
7
+ 5,NO,191194,Norway,5.28147653071831
8
+ 6,DK,169423,Denmark,5.228974930960943
9
+ 7,AT,146243,Austria,5.165078057179213
10
+ 8,DE,123501,Germany,5.091673990647756
11
+ 9,ES,113480,Spain,5.054923154164424
12
+ 10,FR,111854,France,5.048655402146333
13
+ 11,PL,108995,Poland,5.037410560236007
14
+ 12,RO,104160,Romania,5.017705140697976
15
+ 13,BG,104132,Bulgaria,5.017588380296173
16
+ 14,HU,103267,Hungary,5.01396576608478
17
+ 15,CZ,103076,Czechia,5.013161770158209
18
+ 16,PT,103032,Portugal,5.012976345312311
19
+ 17,IS,101007,Iceland,5.0043557719832785
20
+ 18,AR,99835,Argentina,4.999287172371123
21
+ 19,SV,99565,El Salvador,4.998111059977296
22
+ 20,PY,99545,Paraguay,4.998023813707183
23
+ 21,UY,99545,Uruguay,4.998023813707183
24
+ 22,CL,99542,Chile,4.998010725254825
25
+ 23,CR,99541,Costa Rica,4.998006362349715
26
+ 24,HN,99541,Honduras,4.998006362349715
27
+ 25,NI,99541,Nicaragua,4.998006362349715
28
+ 26,GT,99541,Guatemala,4.998006362349715
29
+ 27,EC,99540,Ecuador,4.998001999400775
30
+ 28,RS,98972,Serbia,4.995516734493555
31
+ 29,AL,98939,Albania,4.995371906028162
32
+ 30,ME,98900,Montenegro,4.9952006828235325
33
+ 31,HR,98628,Croatia,4.994004629831678
34
+ 32,SI,98574,Slovenia,4.993766785745261
35
+ 33,BA,98438,Bosnia and Herzegovina,4.99316719323995
36
+ 34,BR,97227,Brazil,4.987791352316747
37
+ 35,SK,95325,Slovakia,4.979211369856581
38
+ 36,IE,87810,Ireland,4.943548922968384
39
+ 37,CH,87222,Switzerland,4.940631019978309
40
+ 38,NL,86813,Netherlands,4.938589767025189
41
+ 39,MT,85033,Malta,4.929592608772758
42
+ 40,IT,83615,Italy,4.922289388047232
43
+ 41,EE,80972,Estonia,4.908340229918514
44
+ 42,LV,80965,Latvia,4.908302684158997
45
+ 43,LT,80940,Lithuania,4.908168565657217
46
+ 44,BE,73205,Belgium,4.864546677507894
47
+ 45,LU,67817,Luxembourg,4.8313449779845135
48
+ 46,MK,66323,North Macedonia,4.8216707106294
49
+ 47,GR,63778,Greece,4.804677705595455
50
+ 48,ZA,54234,South Africa,4.73427964449282
51
+ 49,TR,52842,Türkiye,4.72298746538574
52
+ 50,UA,39999,Ukraine,4.6020599913279625
53
+ 51,CO,37424,Colombia,4.57316180901509
54
+ 52,VE,37423,"Venezuela, Bolivarian Republic of",4.573150204465078
55
+ 53,BO,37423,"Bolivia, Plurinational State of",4.573150204465078
56
+ 54,PE,37423,Peru,4.573150204465078
57
+ 55,DO,37419,Dominican Republic,4.573103783163991
58
+ 56,PA,37419,Panama,4.573103783163991
59
+ 57,MX,37322,Mexico,4.571976544803343
60
+ 58,MC,26853,Monaco,4.42900898458089
61
+ 59,AD,26716,Andorra,4.426787690457996
62
+ 60,MD,26650,"Moldova, Republic of",4.4257135092850675
63
+ 61,BY,26617,Belarus,4.425175420725635
64
+ 62,LI,26310,Liechtenstein,4.420137354593826
65
+ 63,VA,26176,Holy See (Vatican City State),4.417919872997741
66
+ 64,SM,26175,San Marino,4.417903281991229
67
+ 65,GE,17087,Georgia,4.2326912353484625
68
+ 66,AM,17087,Armenia,4.2326912353484625
69
+ 67,KZ,17087,Kazakhstan,4.2326912353484625
70
+ 68,AU,7992,Australia,3.902709812969877
71
+ 69,RU,506,Russian Federation,2.705007959333336
72
+ 70,DA,37,,1.5797835966168101
gsheet_loader.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import gspread
3
+ from google.oauth2.service_account import Credentials
4
+ import pandas as pd
5
+ import streamlit as st
6
+
7
+ SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
8
+ sheet_id = "10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA"
9
+ key_path = "service_account_credentials.json"
10
+
11
+ headers = ['Catalog', 'Mapping status', 'Priority', 'Program kinds', 'Customers', 'Size', 'Size Aprox', 'Needed by', 'Recommendations', 'Scraping?','Custom provider deeplinks', "Scraping link"]
12
+
13
+
14
+ def load_gsheet(tab_name: str) -> pd.DataFrame:
15
+ creds = Credentials.from_service_account_file(key_path, scopes=SCOPES)
16
+ client = gspread.authorize(creds)
17
+ w = client.open_by_key(sheet_id)
18
+
19
+ for attempt in range(3): # retry loop
20
+ try:
21
+ ws = w.worksheet(tab_name)
22
+ if tab_name == "Catalog Status":
23
+ df = pd.DataFrame(ws.get_all_records(expected_headers=headers))
24
+ else:
25
+ df= pd.DataFrame(ws.get_all_records())
26
+ return df
27
+ except gspread.exceptions.APIError as e:
28
+ if attempt < 2:
29
+ st.warning(f"Retrying Google API for {tab_name}... ({attempt+1}/3)")
30
+ time.sleep(2) # avoid hammering API
31
+ else:
32
+ st.error(f"Failed to load '{tab_name}': {e}")
33
+ raise e
34
+
35
+ def get_data():
36
+ onboarding = load_gsheet("Catalog Onboarding")
37
+ time.sleep(1)
38
+ metadata = load_gsheet("NEW Catalog Data levels")
39
+ time.sleep(1)
40
+ mapping = load_gsheet("Catalog Status")
41
+ return onboarding, metadata, mapping
languages.csv ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,zone,count,country_name,log_count
2
+ 0,AL,559,Albania,2.7481880270062002
3
+ 1,AO,161399,Angola,5.2079035303860515
4
+ 2,AR,216955,Argentina,5.336371665208993
5
+ 3,AT,240338,Austria,5.380824249832158
6
+ 4,AU,1698308,Australia,6.230016711105746
7
+ 5,BA,558,Bosnia and Herzegovina,2.747411807886423
8
+ 6,BE,266294,Belgium,5.425363012128782
9
+ 7,BG,15717,Bulgaria,4.196397284437343
10
+ 8,BO,216955,"Bolivia, Plurinational State of",5.336371665208993
11
+ 9,BR,161399,Brazil,5.2079035303860515
12
+ 10,CA,1862685,Canada,6.270139650408456
13
+ 11,CH,506346,Switzerland,5.704448241219132
14
+ 12,CL,216955,Chile,5.336371665208993
15
+ 13,CO,216955,Colombia,5.336371665208993
16
+ 14,CY,15036,Cyprus,4.177161199726047
17
+ 15,CZ,122498,Czechia,5.0881325434250035
18
+ 16,DE,235459,Germany,5.371917139682522
19
+ 17,DK,205275,Denmark,5.312338176468242
20
+ 18,EE,15019,Estonia,4.176669932668149
21
+ 19,ES,216955,Spain,5.336371665208993
22
+ 20,FI,230232,Finland,5.362167572511623
23
+ 21,FR,164377,France,5.215843692048625
24
+ 22,GB,1709281,United Kingdom,6.232813719210268
25
+ 23,GR,15595,Greece,4.193013226515948
26
+ 24,HR,14617,Croatia,4.164887957547954
27
+ 25,HU,124328,Hungary,5.094572440556444
28
+ 26,IE,1711800,Ireland,6.233453275746968
29
+ 27,IN,51,India,1.7160033436347992
30
+ 28,IS,27722,Iceland,4.442840224963883
31
+ 29,IT,96014,Italy,4.982339086251471
32
+ 30,LT,14923,Lithuania,4.173885240368792
33
+ 31,LU,399836,Luxembourg,5.601882980258131
34
+ 32,LV,14644,Latvia,4.1656893760176175
35
+ 33,MD,123653,"Moldova, Republic of",5.092208169624367
36
+ 34,ME,559,Montenegro,2.7481880270062002
37
+ 35,MK,558,North Macedonia,2.747411807886423
38
+ 36,MT,559,Malta,2.7481880270062002
39
+ 37,MX,216955,Mexico,5.336371665208993
40
+ 38,MZ,161399,Mozambique,5.2079035303860515
41
+ 39,NA,14,Namibia,1.1760912590556813
42
+ 40,NL,97189,Netherlands,4.987621582125484
43
+ 41,NO,288672,Norway,5.460406165594033
44
+ 42,NZ,1698308,New Zealand,6.230016711105746
45
+ 43,PE,216955,Peru,5.336371665208993
46
+ 44,PL,124408,Poland,5.094851799237066
47
+ 45,PT,161399,Portugal,5.2079035303860515
48
+ 46,PY,216955,Paraguay,5.336371665208993
49
+ 47,RO,123653,Romania,5.092208169624367
50
+ 48,RS,558,Serbia,2.747411807886423
51
+ 49,SE,245320,Sweden,5.389734726330133
52
+ 50,SI,1451,Slovenia,3.161966616364075
53
+ 51,SK,111180,Slovakia,5.046030575913449
54
+ 52,SM,96014,San Marino,4.982339086251471
55
+ 53,TR,74927,Türkiye,4.874644140438004
56
+ 54,UA,2197,Ukraine,3.3420276880874717
57
+ 55,US,1698308,United States,6.230016711105746
58
+ 56,UY,216955,Uruguay,5.336371665208993
59
+ 57,VA,96014,Holy See (Vatican City State),4.982339086251471
60
+ 58,VE,216955,"Venezuela, Bolivarian Republic of",5.336371665208993
61
+ 59,ZA,1698322,South Africa,6.230020291194933