mbecchis commited on
Commit
93fc514
Β·
verified Β·
1 Parent(s): 63db663

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +406 -406
app.py CHANGED
@@ -1,406 +1,406 @@
1
- import streamlit as st
2
- from gsheet_loader import get_data
3
- import pandas as pd
4
- import plotly.express as px
5
- import plotly.figure_factory as ff
6
- import plotly.graph_objects as go
7
- import datetime as dt
8
-
9
- st.set_page_config(
10
- page_title="Catalog Data Dashboard",
11
- layout="wide",
12
- page_icon="πŸ“Š",
13
- )
14
-
15
- st.title("πŸ“Š Catalog Data Dashboard")
16
- st.markdown(
17
- """
18
- This dashboard combines live [Google Sheets data](https://docs.google.com/spreadsheets/d/10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA) for:
19
- - catalog onboarding
20
- - metadata completeness
21
- - mapping/scraping status
22
- """
23
- )
24
-
25
- cat_onboarding_df, cat_metadata_df, cat_status_df = get_data()
26
-
27
- tab0, tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Static Data", "Onboarding Status", "Metadata Completeness", "Mapping Status"])
28
-
29
- # =========================================================================================================================
30
- # Tab 0 - Overview
31
- # =========================================================================================================================
32
-
33
- with tab0:
34
- st.header("Overiew")
35
- if st.button("πŸ”„ Refresh Data"):
36
- st.cache_data.clear()
37
- st.toast("Refreshing data...", icon="πŸ”„")
38
- st.rerun()
39
-
40
- st.markdown("---")
41
- st.subheader("Quick Data Preview")
42
-
43
- col1, col2, col3 = st.columns(3)
44
- with col1:
45
- st.dataframe(cat_onboarding_df.head(5))
46
- with col2:
47
- st.dataframe(cat_metadata_df.head(5))
48
- with col3:
49
- st.dataframe(cat_status_df.head(5))
50
-
51
- # =========================================================================================================================
52
- # Tab 0 - Static stuff
53
- # =========================================================================================================================
54
-
55
- with tab1:
56
- st.header("Static Data Preview")
57
-
58
- full_countries_df = pd.read_csv('countries.csv')
59
- full_languages_df = pd.read_csv('languages.csv')
60
-
61
- # countries map
62
- fig = px.choropleth(
63
- full_countries_df,
64
- locations="country_name",
65
- locationmode="country names",
66
- color="log_count",
67
- color_continuous_scale="Purples",
68
- hover_name="country_name",
69
- hover_data={"count": True, "log_count": False},
70
- projection="natural earth",
71
- title="Programs' availabilities by Country (Log Scale)"
72
- )
73
-
74
- fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
75
- fig.update_layout(
76
- width=1400,
77
- height=700,
78
- margin=dict(l=0, r=0, t=100, b=0),
79
- title_y=0.95
80
- )
81
-
82
- st.plotly_chart(fig, use_container_width=True)
83
-
84
- # languages map
85
- fig1 = px.choropleth(
86
- full_languages_df,
87
- locations="country_name",
88
- locationmode="country names",
89
- color="log_count",
90
- color_continuous_scale="Purples",
91
- hover_name="country_name",
92
- hover_data={"count": True, "log_count": False},
93
- projection="natural earth",
94
- title="Programs by Languages (Log Scale)"
95
- )
96
-
97
- fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
98
- fig1.update_layout(
99
- width=1400,
100
- height=700,
101
- margin=dict(l=0, r=0, t=100, b=0),
102
- title_y=0.95
103
- )
104
-
105
- st.plotly_chart(fig1, use_container_width=True)
106
-
107
-
108
- # Completeness evaluation
109
- catalog_scores = pd.read_csv("catalog_scores.csv")
110
- colorscale = [
111
- [0.0, "#ffffff"],
112
- [0.1, "#dcd6f7"],
113
- [0.3, "#a29bfe"],
114
- [0.6, "#6c5ce7"],
115
- [1.0, "#341f97"]
116
- ]
117
-
118
- fig_completeness = px.bar(
119
- catalog_scores,
120
- x="Total",
121
- y="Catalog",
122
- orientation="h",
123
- color="Total",
124
- color_continuous_scale=colorscale,
125
- title="Catalog Metadata Completeness Score",
126
- )
127
- fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000)
128
- st.plotly_chart(fig_completeness, use_container_width=True)
129
-
130
-
131
- # ### completeness score broken down
132
- subcols = ["movie", "show", "season", "episode", "sport"]
133
-
134
- # Compute sum of raw subscores
135
- catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1)
136
-
137
- # Build the figure
138
- fig_completeness2 = go.Figure()
139
-
140
- for col in subcols:
141
-
142
- # normalized height of this bar segment
143
- norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"]
144
-
145
- fig_completeness2.add_trace(
146
- go.Bar(
147
- y=catalog_scores["Catalog"],
148
- x=norm_vals, # BAR SIZE = normalized values
149
- name=col.capitalize(),
150
- orientation="h",
151
- customdata=catalog_scores[col], # RAW values for hover
152
- hovertemplate=(
153
- "<b>%{y}</b><br>" +
154
- f"{col.capitalize()}: <b>%{{customdata}}</b><br>" + # RAW value
155
- "Normalized: %{x:.2f}<extra></extra>"
156
- )
157
- )
158
- )
159
-
160
- fig_completeness2.update_layout(
161
- barmode="stack",
162
- title="Subscore Contribution per Catalog (Scaled to Total Score)",
163
- xaxis_title="Total Score",
164
- template="plotly_dark",
165
- height=1200,
166
- yaxis={'categoryorder':'total ascending'}
167
- )
168
-
169
- st.plotly_chart(fig_completeness2, use_container_width=True)
170
-
171
-
172
- #scatter plot
173
- fig_scatter = px.scatter(
174
- catalog_scores,
175
- x="Total",
176
- y="Number of programs",
177
- size="Number of programs",
178
- color="Total",
179
- hover_name="Catalog",
180
- color_continuous_scale="Viridis",
181
- size_max=50
182
- )
183
-
184
- st.plotly_chart(fig_scatter, use_container_width=True)
185
-
186
-
187
-
188
- # =========================================================================================================================
189
- # Tab 2 - Onboarding sheet
190
- # =========================================================================================================================
191
-
192
- with tab2:
193
- st.header("Catalog Onboarding Status")
194
-
195
- # Convert onboarding date to datetime (e.g., 21/11 β†’ 2025-11-21)
196
- cat_onboarding_df["Onboarding date"] = pd.to_datetime(
197
- cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce"
198
- )
199
- cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply(
200
- lambda d: d.replace(year=2025) if pd.notna(d) else d
201
- )
202
-
203
- # Map textual months to end-of-month dates
204
- month_map = {
205
- "November 2025": dt.datetime(2025, 11, 30),
206
- "December 2025": dt.datetime(2025, 12, 31),
207
- "January 2026": dt.datetime(2026, 1, 31),
208
- "February 2026": dt.datetime(2026, 2, 28),
209
- "March 2026": dt.datetime(2026, 3, 31),
210
- "April 2026": dt.datetime(2026, 4, 30),
211
- "TBD": None,
212
- }
213
- cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map)
214
-
215
- # Drop missing
216
- timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"])
217
-
218
- fig_timeline = px.timeline(
219
- timeline_df,
220
- x_start="Onboarding date",
221
- x_end="Go live parsed",
222
- y="NAME",
223
- color="Onboarding Status",
224
- hover_data=["Client", "Priority"],
225
- title="Onboarding β†’ Go-Live Timeline",
226
- )
227
- fig_timeline.update_yaxes(autorange="reversed")
228
-
229
- st.plotly_chart(fig_timeline, use_container_width=True)
230
-
231
- # bar chart 1
232
- summary = (
233
- cat_onboarding_df.groupby(["Client", "Onboarding Status"])
234
- .size()
235
- .reset_index(name="Count")
236
- )
237
-
238
- fig_client = px.bar(
239
- summary,
240
- x="Client",
241
- y="Count",
242
- color="Onboarding Status",
243
- text_auto=True,
244
- title="Catalogs per Client (by Onboarding Status)",
245
- )
246
- fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
247
-
248
- st.plotly_chart(fig_client, use_container_width=True)
249
-
250
- # bar chart 2
251
- summary = (
252
- cat_onboarding_df.groupby(["Client", "Priority"])
253
- .size()
254
- .reset_index(name="Count")
255
- )
256
-
257
- fig_client1 = px.bar(
258
- summary,
259
- x="Client",
260
- y="Count",
261
- color="Priority",
262
- text_auto=True,
263
- title="Catalogs per Client (by Priority)",
264
- )
265
- fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
266
-
267
- st.plotly_chart(fig_client1, use_container_width=True)
268
-
269
- # bar chart 3
270
-
271
- summary = (
272
- cat_onboarding_df.groupby(["Onboarding Status", "Priority"])
273
- .size()
274
- .reset_index(name="Count")
275
- )
276
-
277
- fig_client2 = px.bar(
278
- summary,
279
- x="Onboarding Status",
280
- y="Count",
281
- color="Priority",
282
- text_auto=True,
283
- title="Catalogs per Onboarding Status (by Priority)",
284
- )
285
- fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count")
286
-
287
- st.plotly_chart(fig_client2, use_container_width=True)
288
-
289
- # =========================================================================================================================
290
- # Tab 3 - Metadata completeness
291
- # =========================================================================================================================
292
-
293
- with tab3:
294
- st.header("Catalog Metadata Completeness")
295
-
296
- cat_df = cat_metadata_df.copy()
297
- meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]]
298
-
299
- score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0}
300
-
301
- cat_df_numeric = cat_df.copy()
302
- cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map)
303
-
304
- # force conversion to numeric (anything else becomes NaN)
305
- cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce")
306
-
307
- cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1)
308
- cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False)
309
-
310
- #graph 1
311
- fig_completeness = px.bar(
312
- cat_df_numeric_sorted,
313
- x="Completeness Score",
314
- y="Catalog name",
315
- orientation="h",
316
- color="Completeness Score",
317
- color_continuous_scale="Greens",
318
- title="Catalog Metadata Completeness Score",
319
- )
320
- fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'})
321
-
322
- st.plotly_chart(fig_completeness, use_container_width=True)
323
-
324
- # graph 2
325
- coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index()
326
- coverage.columns = ["Metadata Field", "Average Score"]
327
-
328
- fig_field_coverage = px.bar(
329
- coverage,
330
- x="Average Score",
331
- y="Metadata Field",
332
- orientation="h",
333
- color="Average Score",
334
- color_continuous_scale="Blues",
335
- title="Metadata Field Coverage Across All Catalogs",
336
- )
337
- fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'})
338
-
339
- st.plotly_chart(fig_field_coverage, use_container_width=True)
340
-
341
- # heatmap 1
342
- # Prepare data
343
- z = cat_df_numeric[meta_cols].astype(float).to_numpy()
344
- x = list(meta_cols)
345
- y = list(cat_df_numeric["Catalog name"].astype(str))
346
-
347
- # Build the heatmap (no annotation_text)
348
- fig_heatmap = ff.create_annotated_heatmap(
349
- z=z,
350
- x=x,
351
- y=y,
352
- showscale=True,
353
- colorscale=[
354
- [0.0, "rgb(255,77,77)"], # red for 0 (No)
355
- [0.5, "rgb(255,204,0)"], # yellow for 0.5 (Some)
356
- [1.0, "rgb(0,204,102)"] # green for 1 (Yes)
357
- ],
358
- annotation_text=None # removes numbers
359
- )
360
-
361
- # Layout adjustments
362
- fig_heatmap.update_layout(
363
- title="Metadata Completeness Heatmap (Catalog vs Field)",
364
- xaxis_title="Metadata Field",
365
- yaxis_title="Catalog Name",
366
- width=1600, # make it wide
367
- height=1000, # make it tall so names fit
368
- margin=dict(l=200, r=50, t=80, b=150), # spacing for labels
369
- )
370
-
371
- # Tweak label angles for readability
372
- fig_heatmap.update_xaxes(tickangle=-45)
373
- fig_heatmap.update_yaxes(automargin=True)
374
-
375
- st.plotly_chart(fig_heatmap, use_container_width=True)
376
-
377
-
378
- # heatmap 2
379
-
380
- fig_heatmap1 = px.imshow(
381
- cat_df_numeric[meta_cols],
382
- labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"),
383
- x=meta_cols,
384
- y=cat_df_numeric["Catalog name"],
385
- color_continuous_scale=[
386
- [0.0, "rgb(255,77,77)"],
387
- [0.5, "rgb(255,204,0)"],
388
- [1.0, "rgb(0,204,102)"]
389
- ],
390
- )
391
-
392
- fig_heatmap1.update_layout(
393
- title="Metadata Completeness Heatmap (Catalog vs Field)",
394
- width=1600,
395
- height=1000,
396
- margin=dict(l=200, r=50, t=80, b=150),
397
- )
398
- fig_heatmap1.update_xaxes(tickangle=-45)
399
-
400
- st.plotly_chart(fig_heatmap1, use_container_width=True)
401
-
402
-
403
-
404
- with tab4:
405
- st.header("Catalog Mapping status")
406
-
 
1
+ import streamlit as st
2
+ from gsheet_loader import get_data
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import plotly.figure_factory as ff
6
+ import plotly.graph_objects as go
7
+ import datetime as dt
8
+
9
+ st.set_page_config(
10
+ page_title="Catalog Data Dashboard",
11
+ layout="wide",
12
+ page_icon="πŸ“Š",
13
+ )
14
+
15
+ st.title("πŸ“Š Catalog Data Dashboard")
16
+ st.markdown(
17
+ """
18
+ This dashboard combines live Google Sheets data for:
19
+ - catalog onboarding
20
+ - metadata completeness
21
+ - mapping/scraping status
22
+ """
23
+ )
24
+
25
+ cat_onboarding_df, cat_metadata_df, cat_status_df = get_data()
26
+
27
+ tab0, tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Static Data", "Onboarding Status", "Metadata Completeness", "Mapping Status"])
28
+
29
+ # =========================================================================================================================
30
+ # Tab 0 - Overview
31
+ # =========================================================================================================================
32
+
33
+ with tab0:
34
+ st.header("Overiew")
35
+ if st.button("πŸ”„ Refresh Data"):
36
+ st.cache_data.clear()
37
+ st.toast("Refreshing data...", icon="πŸ”„")
38
+ st.rerun()
39
+
40
+ st.markdown("---")
41
+ st.subheader("Quick Data Preview")
42
+
43
+ col1, col2, col3 = st.columns(3)
44
+ with col1:
45
+ st.dataframe(cat_onboarding_df.head(5))
46
+ with col2:
47
+ st.dataframe(cat_metadata_df.head(5))
48
+ with col3:
49
+ st.dataframe(cat_status_df.head(5))
50
+
51
+ # =========================================================================================================================
52
+ # Tab 0 - Static stuff
53
+ # =========================================================================================================================
54
+
55
+ with tab1:
56
+ st.header("Static Data Preview")
57
+
58
+ full_countries_df = pd.read_csv('countries.csv')
59
+ full_languages_df = pd.read_csv('languages.csv')
60
+
61
+ # countries map
62
+ fig = px.choropleth(
63
+ full_countries_df,
64
+ locations="country_name",
65
+ locationmode="country names",
66
+ color="log_count",
67
+ color_continuous_scale="Purples",
68
+ hover_name="country_name",
69
+ hover_data={"count": True, "log_count": False},
70
+ projection="natural earth",
71
+ title="Programs' availabilities by Country (Log Scale)"
72
+ )
73
+
74
+ fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
75
+ fig.update_layout(
76
+ width=1400,
77
+ height=700,
78
+ margin=dict(l=0, r=0, t=100, b=0),
79
+ title_y=0.95
80
+ )
81
+
82
+ st.plotly_chart(fig, use_container_width=True)
83
+
84
+ # languages map
85
+ fig1 = px.choropleth(
86
+ full_languages_df,
87
+ locations="country_name",
88
+ locationmode="country names",
89
+ color="log_count",
90
+ color_continuous_scale="Purples",
91
+ hover_name="country_name",
92
+ hover_data={"count": True, "log_count": False},
93
+ projection="natural earth",
94
+ title="Programs by Languages (Log Scale)"
95
+ )
96
+
97
+ fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
98
+ fig1.update_layout(
99
+ width=1400,
100
+ height=700,
101
+ margin=dict(l=0, r=0, t=100, b=0),
102
+ title_y=0.95
103
+ )
104
+
105
+ st.plotly_chart(fig1, use_container_width=True)
106
+
107
+
108
+ # Completeness evaluation
109
+ catalog_scores = pd.read_csv("catalog_scores.csv")
110
+ colorscale = [
111
+ [0.0, "#ffffff"],
112
+ [0.1, "#dcd6f7"],
113
+ [0.3, "#a29bfe"],
114
+ [0.6, "#6c5ce7"],
115
+ [1.0, "#341f97"]
116
+ ]
117
+
118
+ fig_completeness = px.bar(
119
+ catalog_scores,
120
+ x="Total",
121
+ y="Catalog",
122
+ orientation="h",
123
+ color="Total",
124
+ color_continuous_scale=colorscale,
125
+ title="Catalog Metadata Completeness Score",
126
+ )
127
+ fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000)
128
+ st.plotly_chart(fig_completeness, use_container_width=True)
129
+
130
+
131
+ # ### completeness score broken down
132
+ subcols = ["movie", "show", "season", "episode", "sport"]
133
+
134
+ # Compute sum of raw subscores
135
+ catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1)
136
+
137
+ # Build the figure
138
+ fig_completeness2 = go.Figure()
139
+
140
+ for col in subcols:
141
+
142
+ # normalized height of this bar segment
143
+ norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"]
144
+
145
+ fig_completeness2.add_trace(
146
+ go.Bar(
147
+ y=catalog_scores["Catalog"],
148
+ x=norm_vals, # BAR SIZE = normalized values
149
+ name=col.capitalize(),
150
+ orientation="h",
151
+ customdata=catalog_scores[col], # RAW values for hover
152
+ hovertemplate=(
153
+ "<b>%{y}</b><br>" +
154
+ f"{col.capitalize()}: <b>%{{customdata}}</b><br>" + # RAW value
155
+ "Normalized: %{x:.2f}<extra></extra>"
156
+ )
157
+ )
158
+ )
159
+
160
+ fig_completeness2.update_layout(
161
+ barmode="stack",
162
+ title="Subscore Contribution per Catalog (Scaled to Total Score)",
163
+ xaxis_title="Total Score",
164
+ template="plotly_dark",
165
+ height=1200,
166
+ yaxis={'categoryorder':'total ascending'}
167
+ )
168
+
169
+ st.plotly_chart(fig_completeness2, use_container_width=True)
170
+
171
+
172
+ #scatter plot
173
+ fig_scatter = px.scatter(
174
+ catalog_scores,
175
+ x="Total",
176
+ y="Number of programs",
177
+ size="Number of programs",
178
+ color="Total",
179
+ hover_name="Catalog",
180
+ color_continuous_scale="Viridis",
181
+ size_max=50
182
+ )
183
+
184
+ st.plotly_chart(fig_scatter, use_container_width=True)
185
+
186
+
187
+
188
+ # =========================================================================================================================
189
+ # Tab 2 - Onboarding sheet
190
+ # =========================================================================================================================
191
+
192
+ with tab2:
193
+ st.header("Catalog Onboarding Status")
194
+
195
+ # Convert onboarding date to datetime (e.g., 21/11 β†’ 2025-11-21)
196
+ cat_onboarding_df["Onboarding date"] = pd.to_datetime(
197
+ cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce"
198
+ )
199
+ cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply(
200
+ lambda d: d.replace(year=2025) if pd.notna(d) else d
201
+ )
202
+
203
+ # Map textual months to end-of-month dates
204
+ month_map = {
205
+ "November 2025": dt.datetime(2025, 11, 30),
206
+ "December 2025": dt.datetime(2025, 12, 31),
207
+ "January 2026": dt.datetime(2026, 1, 31),
208
+ "February 2026": dt.datetime(2026, 2, 28),
209
+ "March 2026": dt.datetime(2026, 3, 31),
210
+ "April 2026": dt.datetime(2026, 4, 30),
211
+ "TBD": None,
212
+ }
213
+ cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map)
214
+
215
+ # Drop missing
216
+ timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"])
217
+
218
+ fig_timeline = px.timeline(
219
+ timeline_df,
220
+ x_start="Onboarding date",
221
+ x_end="Go live parsed",
222
+ y="NAME",
223
+ color="Onboarding Status",
224
+ hover_data=["Client", "Priority"],
225
+ title="Onboarding β†’ Go-Live Timeline",
226
+ )
227
+ fig_timeline.update_yaxes(autorange="reversed")
228
+
229
+ st.plotly_chart(fig_timeline, use_container_width=True)
230
+
231
+ # bar chart 1
232
+ summary = (
233
+ cat_onboarding_df.groupby(["Client", "Onboarding Status"])
234
+ .size()
235
+ .reset_index(name="Count")
236
+ )
237
+
238
+ fig_client = px.bar(
239
+ summary,
240
+ x="Client",
241
+ y="Count",
242
+ color="Onboarding Status",
243
+ text_auto=True,
244
+ title="Catalogs per Client (by Onboarding Status)",
245
+ )
246
+ fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
247
+
248
+ st.plotly_chart(fig_client, use_container_width=True)
249
+
250
+ # bar chart 2
251
+ summary = (
252
+ cat_onboarding_df.groupby(["Client", "Priority"])
253
+ .size()
254
+ .reset_index(name="Count")
255
+ )
256
+
257
+ fig_client1 = px.bar(
258
+ summary,
259
+ x="Client",
260
+ y="Count",
261
+ color="Priority",
262
+ text_auto=True,
263
+ title="Catalogs per Client (by Priority)",
264
+ )
265
+ fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
266
+
267
+ st.plotly_chart(fig_client1, use_container_width=True)
268
+
269
+ # bar chart 3
270
+
271
+ summary = (
272
+ cat_onboarding_df.groupby(["Onboarding Status", "Priority"])
273
+ .size()
274
+ .reset_index(name="Count")
275
+ )
276
+
277
+ fig_client2 = px.bar(
278
+ summary,
279
+ x="Onboarding Status",
280
+ y="Count",
281
+ color="Priority",
282
+ text_auto=True,
283
+ title="Catalogs per Onboarding Status (by Priority)",
284
+ )
285
+ fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count")
286
+
287
+ st.plotly_chart(fig_client2, use_container_width=True)
288
+
289
+ # =========================================================================================================================
290
+ # Tab 3 - Metadata completeness
291
+ # =========================================================================================================================
292
+
293
+ with tab3:
294
+ st.header("Catalog Metadata Completeness")
295
+
296
+ cat_df = cat_metadata_df.copy()
297
+ meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]]
298
+
299
+ score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0}
300
+
301
+ cat_df_numeric = cat_df.copy()
302
+ cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map)
303
+
304
+ # force conversion to numeric (anything else becomes NaN)
305
+ cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce")
306
+
307
+ cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1)
308
+ cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False)
309
+
310
+ #graph 1
311
+ fig_completeness = px.bar(
312
+ cat_df_numeric_sorted,
313
+ x="Completeness Score",
314
+ y="Catalog name",
315
+ orientation="h",
316
+ color="Completeness Score",
317
+ color_continuous_scale="Greens",
318
+ title="Catalog Metadata Completeness Score",
319
+ )
320
+ fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'})
321
+
322
+ st.plotly_chart(fig_completeness, use_container_width=True)
323
+
324
+ # graph 2
325
+ coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index()
326
+ coverage.columns = ["Metadata Field", "Average Score"]
327
+
328
+ fig_field_coverage = px.bar(
329
+ coverage,
330
+ x="Average Score",
331
+ y="Metadata Field",
332
+ orientation="h",
333
+ color="Average Score",
334
+ color_continuous_scale="Blues",
335
+ title="Metadata Field Coverage Across All Catalogs",
336
+ )
337
+ fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'})
338
+
339
+ st.plotly_chart(fig_field_coverage, use_container_width=True)
340
+
341
+ # heatmap 1
342
+ # Prepare data
343
+ z = cat_df_numeric[meta_cols].astype(float).to_numpy()
344
+ x = list(meta_cols)
345
+ y = list(cat_df_numeric["Catalog name"].astype(str))
346
+
347
+ # Build the heatmap (no annotation_text)
348
+ fig_heatmap = ff.create_annotated_heatmap(
349
+ z=z,
350
+ x=x,
351
+ y=y,
352
+ showscale=True,
353
+ colorscale=[
354
+ [0.0, "rgb(255,77,77)"], # red for 0 (No)
355
+ [0.5, "rgb(255,204,0)"], # yellow for 0.5 (Some)
356
+ [1.0, "rgb(0,204,102)"] # green for 1 (Yes)
357
+ ],
358
+ annotation_text=None # removes numbers
359
+ )
360
+
361
+ # Layout adjustments
362
+ fig_heatmap.update_layout(
363
+ title="Metadata Completeness Heatmap (Catalog vs Field)",
364
+ xaxis_title="Metadata Field",
365
+ yaxis_title="Catalog Name",
366
+ width=1600, # make it wide
367
+ height=1000, # make it tall so names fit
368
+ margin=dict(l=200, r=50, t=80, b=150), # spacing for labels
369
+ )
370
+
371
+ # Tweak label angles for readability
372
+ fig_heatmap.update_xaxes(tickangle=-45)
373
+ fig_heatmap.update_yaxes(automargin=True)
374
+
375
+ st.plotly_chart(fig_heatmap, use_container_width=True)
376
+
377
+
378
+ # heatmap 2
379
+
380
+ fig_heatmap1 = px.imshow(
381
+ cat_df_numeric[meta_cols],
382
+ labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"),
383
+ x=meta_cols,
384
+ y=cat_df_numeric["Catalog name"],
385
+ color_continuous_scale=[
386
+ [0.0, "rgb(255,77,77)"],
387
+ [0.5, "rgb(255,204,0)"],
388
+ [1.0, "rgb(0,204,102)"]
389
+ ],
390
+ )
391
+
392
+ fig_heatmap1.update_layout(
393
+ title="Metadata Completeness Heatmap (Catalog vs Field)",
394
+ width=1600,
395
+ height=1000,
396
+ margin=dict(l=200, r=50, t=80, b=150),
397
+ )
398
+ fig_heatmap1.update_xaxes(tickangle=-45)
399
+
400
+ st.plotly_chart(fig_heatmap1, use_container_width=True)
401
+
402
+
403
+
404
+ with tab4:
405
+ st.header("Catalog Mapping status")
406
+