Ezhil commited on
Commit
b595166
·
1 Parent(s): 657719c

Changes DV - all charts are altered

Browse files
app.py CHANGED
@@ -50,7 +50,7 @@ st.sidebar.markdown("[View Raw Data Source](https://www.kaggle.com/datasets/joeb
50
 
51
  # Main Content
52
  st.title("Music Data Analysis Dashboard")
53
- st.markdown("Explore trends and insights from a diverse music dataset.")
54
 
55
  # Call Analysis Functions Based on Selection with updated explanations
56
  if analysis_option == "Popularity Trends Over Time":
 
50
 
51
  # Main Content
52
  st.title("Music Data Analysis Dashboard")
53
+ # st.markdown("Explore trends and insights from a diverse music dataset.")
54
 
55
  # Call Analysis Functions Based on Selection with updated explanations
56
  if analysis_option == "Popularity Trends Over Time":
functions/__pycache__/visualizations.cpython-310.pyc CHANGED
Binary files a/functions/__pycache__/visualizations.cpython-310.pyc and b/functions/__pycache__/visualizations.cpython-310.pyc differ
 
functions/visualizations.py CHANGED
@@ -10,442 +10,459 @@ import numpy as np
10
  from collections import Counter
11
 
12
 
13
- # def generate_popularity_trends(df):
14
- # st.header("Popularity Trends Over Time")
15
- # tab1, tab2, tab3 = st.tabs(["Average Popularity", "Individual Songs", "Top 10 Songs"])
16
 
17
- # with tab1:
18
- # st.markdown("**Average Popularity by Decade:** This chart shows how the average popularity of songs has changed over different decades.")
19
- # if 'Decade' in df.columns:
20
- # top_decades = df.groupby('Decade')['Popularity'].mean().reset_index().nlargest(10, 'Popularity')
21
 
22
- # fig1 = go.Figure()
23
- # fig1.add_trace(go.Scatter(
24
- # x=top_decades['Decade'],
25
- # y=top_decades['Popularity'],
26
- # mode='lines+markers',
27
- # fill='tonexty',
28
- # line=dict(color='royalblue', width=3),
29
- # marker=dict(size=8, color='darkblue', line=dict(width=2, color='white')),
30
- # name='Popularity',
31
- # hovertext=top_decades['Decade']
32
- # ))
33
- # fig1.update_layout(
34
- # title='Top 10 Decades by Average Popularity',
35
- # xaxis_title='Decade',
36
- # yaxis_title='Average Popularity Score',
37
- # template='plotly_white',
38
- # width=900,
39
- # height=450
40
- # )
41
- # st.plotly_chart(fig1)
42
- # else:
43
- # st.error("Cannot plot: 'Decade' column missing.")
44
 
45
- # with tab2:
46
- # st.markdown("**Top 10 Individual Songs:** This scatter plot highlights the popularity of the top 10 most popular songs over time.")
47
- # if 'Year' in df.columns:
48
- # top_songs = df.nlargest(10, 'Popularity')
49
- # fig2 = px.scatter(
50
- # top_songs, x='Year', y='Popularity',
51
- # color='Popularity',
52
- # size='Popularity',
53
- # color_continuous_scale='viridis',
54
- # title='Top 10 Individual Songs by Popularity',
55
- # hover_data=['Track Name', 'Artist Name(s)', 'Year']
56
- # )
57
- # fig2.update_layout(
58
- # xaxis_title='Release Year',
59
- # yaxis_title='Popularity Score',
60
- # template='plotly_white',
61
- # width=900,
62
- # height=500
63
- # )
64
- # st.plotly_chart(fig2)
65
- # else:
66
- # st.error("Cannot plot: 'Year' column missing.")
67
 
68
- # with tab3:
69
- # st.markdown("**Top 10 Most Popular Songs:** This bar chart displays the top 10 songs based on their popularity scores.")
70
- # if 'Track Name' in df.columns and 'Popularity' in df.columns:
71
- # top_songs = df.nlargest(10, 'Popularity')[['Track Name', 'Artist Name(s)', 'Popularity']]
72
- # fig3 = px.bar(
73
- # top_songs, y='Track Name', x='Popularity',
74
- # orientation='h', color='Popularity',
75
- # color_continuous_scale='deep',
76
- # title='Top 10 Most Popular Songs',
77
- # labels={'Track Name': 'Song Title', 'Popularity': 'Popularity Score'},
78
- # hover_data=['Track Name', 'Artist Name(s)']
79
- # )
80
- # fig3.update_layout(
81
- # xaxis_title='Popularity Score',
82
- # yaxis_title='Song Title',
83
- # template='plotly_white',
84
- # width=900,
85
- # height=500
86
- # )
87
- # st.plotly_chart(fig3)
88
- # else:
89
- # st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
 
90
 
 
 
91
 
92
- # def generate_audio_features(df):
93
- # st.header("Audio Features Analysis")
94
- # feature = st.selectbox(
95
- # "Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness']
96
- # )
97
- # tab1, tab2 = st.tabs(["Distribution", "By Decade"])
98
 
99
- # with tab1:
100
- # st.markdown(f"**Top 20 {feature} Values:** This histogram displays the distribution of the top 20 songs based on {feature}.")
101
- # top_features = df.nlargest(20, feature)
102
- # fig = px.histogram(
103
- # top_features, x=feature, nbins=20,
104
- # color='Decade' if 'Decade' in df.columns else None,
105
- # barmode='overlay',
106
- # opacity=0.7,
107
- # title=f'Top 20 Songs by {feature}',
108
- # color_discrete_sequence=px.colors.qualitative.Set2,
109
- # hover_data=['Track Name', 'Artist Name(s)']
110
- # )
111
- # st.plotly_chart(fig)
112
 
113
- # with tab2:
114
- # st.markdown(f"**{feature} by Decade:** This box plot compares the top 20 {feature} values across different decades.")
115
- # if 'Decade' in df.columns:
116
- # top_features = df.nlargest(20, feature)
117
- # fig2 = px.box(top_features, x='Decade', y=feature,
118
- # color='Decade',
119
- # title=f'Top 20 {feature} Values by Decade',
120
- # color_discrete_sequence=px.colors.qualitative.Pastel,
121
- # hover_data=['Track Name', 'Artist Name(s)']
122
- # )
123
- # st.plotly_chart(fig2)
124
- # else:
125
- # st.error("Cannot plot: 'Decade' column missing.")
126
- # def generate_genre_analysis(df):
127
- # st.header("Genre & Artist Analysis")
128
- # tab1, tab2, tab3 = st.tabs(["Top Genres", "Genre Distribution", "Artist Popularity"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- # with tab1:
131
- # st.markdown("**Top Genres in Top 10 Songs:** Displays the most common genres among the top 10 most popular songs.")
132
- # top_songs = df.nlargest(10, 'Popularity')
133
- # top_genres = top_songs.explode('Genres')['Genres'].value_counts().reset_index()
134
- # fig1 = px.bar(
135
- # top_genres, x='count', y='Genres',
136
- # orientation='h', color='count',
137
- # color_continuous_scale='viridis',
138
- # title='Top Genres in Top 10 Songs',
139
- # labels={'count': 'Number of Songs', 'Genres': 'Genre Name'},
140
- # hover_data=['Genres', 'count']
141
- # )
142
- # fig1.update_layout(template='plotly_white', width=900, height=500)
143
- # st.plotly_chart(fig1)
144
 
145
- # with tab2:
146
- # st.markdown("**Genre Distribution in Top 10 Songs:** Shows how different genres contribute to the top 10 songs.")
147
- # genre_song_data = top_songs.explode('Genres')
148
- # fig2 = px.bar(
149
- # genre_song_data, x='Track Name', y='Popularity', color='Genres',
150
- # title='Genre Distribution in Top 10 Songs',
151
- # labels={'Track Name': 'Song Title', 'Popularity': 'Popularity Score', 'Genres': 'Genre'},
152
- # barmode='stack',
153
- # hover_data=['Track Name', 'Genres']
154
- # )
155
- # fig2.update_layout(template='plotly_white', width=900, height=500)
156
- # st.plotly_chart(fig2)
157
 
158
- # with tab3:
159
- # st.markdown("**Artist Popularity in Top 10 Songs:** Visualizes the most popular artists in the top 10 songs with their song count and names.")
160
- # artist_popularity = top_songs.groupby('Artist Name(s)').agg({'Popularity': 'sum', 'Track Name': lambda x: list(x)}).reset_index().sort_values(by='Popularity', ascending=False)
161
- # artist_popularity['Song Count'] = artist_popularity['Track Name'].apply(len)
162
- # fig3 = px.bar(
163
- # artist_popularity, x='Popularity', y='Artist Name(s)',
164
- # orientation='h', color='Popularity',
165
- # color_continuous_scale='blues',
166
- # title='Artist Popularity in Top 10 Songs',
167
- # labels={'Artist Name(s)': 'Artist Name', 'Popularity': 'Total Popularity Score', 'Song Count': 'Number of Songs'},
168
- # hover_data={'Artist Name(s)': True, 'Popularity': True, 'Song Count': True, 'Track Name': True}
169
- # )
170
- # fig3.update_layout(template='plotly_white', width=900, height=500)
171
- # st.plotly_chart(fig3)
172
 
173
- # def generate_explicit_trends(df):
174
- # st.header("Explicit Content Trends")
175
- # st.markdown("**Explicit vs Non-Explicit Songs Over Time:** This line chart shows how the number of explicit and non-explicit songs has changed over different decades.")
176
- # if 'Decade' in df.columns and 'Explicit' in df.columns:
177
- # explicit_trends = df.groupby(['Decade', 'Explicit']).size().reset_index(name='Count')
178
- # fig = px.line(
179
- # explicit_trends, x='Decade', y='Count', color='Explicit',
180
- # markers=True, line_shape='linear',
181
- # title='Explicit vs Non-Explicit Songs Over Time',
182
- # labels={'Decade': 'Decade', 'Count': 'Number of Songs', 'Explicit': 'Song Type'},
183
- # color_discrete_map={True: 'purple', False: 'green'}
184
- # )
185
- # fig.update_layout(template='plotly_white', width=900, height=500)
186
- # st.plotly_chart(fig)
187
- # else:
188
- # st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")
189
 
190
- # def generate_album_insights(df):
191
- # st.header("Album & Label Insights")
192
- # tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])
193
 
194
- # with tab1:
195
- # st.markdown("**Top Record Labels:** Displays the most dominant record labels based on the number of songs they have released.")
196
- # if 'Label' in df.columns:
197
- # top_labels = df['Label'].value_counts().nlargest(10).reset_index()
198
- # fig9 = px.sunburst(
199
- # top_labels, path=['Label'], values='count',
200
- # title='Top Record Labels by Song Count',
201
- # color='count', color_continuous_scale='blues',
202
- # labels={'Label': 'Record Label', 'count': 'Number of Songs'}
203
- # )
204
- # fig9.update_layout(template='plotly_white', width=900, height=500)
205
- # st.plotly_chart(fig9)
206
- # else:
207
- # st.error("Cannot plot: 'Label' column missing.")
208
 
209
- # with tab2:
210
- # st.markdown("**Album Popularity:** Compares the popularity of albums based on the number of songs and their average popularity score.")
211
- # if 'Album Name' in df.columns and 'Popularity' in df.columns:
212
- # album_pop = df.groupby('Album Name')['Popularity'].agg(['mean', 'count']).reset_index()
213
- # album_pop = album_pop.sort_values(by=['mean', 'count'], ascending=[False, False]).nlargest(10, 'mean')
214
- # fig10 = px.strip(
215
- # album_pop, x='mean', y='Album Name',
216
- # color='count',
217
- # title='Top 10 Albums by Popularity',
218
- # labels={'Album Name': 'Album', 'mean': 'Average Popularity Score', 'count': 'Number of Songs'},
219
- # hover_data={'Album Name': True, 'count': True, 'mean': True},
220
- # color_discrete_sequence=px.colors.qualitative.Pastel
221
- # )
222
- # fig10.update_layout(template='plotly_white', width=900, height=500)
223
- # st.plotly_chart(fig10)
224
- # else:
225
- # st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")
226
 
227
 
228
- # def generate_tempo_mood(df):
229
- # st.header("Tempo & Mood Analysis")
230
- # tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
231
- # with tab1:
232
- # st.markdown("**Tempo Trends:** Tracks tempo changes.")
233
- # if 'Year' in df.columns and 'Tempo' in df.columns:
234
- # tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
235
- # fig11 = px.line(tempo_by_year, x='Year', y='Tempo', title='Average Tempo Over Time', color_discrete_sequence=['orange'])
236
- # fig11.update_layout(template='plotly_white', width=800, height=400)
237
- # st.plotly_chart(fig11)
238
- # else:
239
- # st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
240
- # with tab2:
241
- # st.markdown("**Mood Analysis (Valence & Energy):** Categorizes songs based on mood and energy.")
242
- # if 'Valence' in df.columns and 'Energy' in df.columns:
243
- # top_songs = df.nlargest(10, 'Popularity')
244
- # mood_by_valence = top_songs.groupby('Valence')['Energy'].mean().reset_index()
245
- # fig12 = px.bar(
246
- # mood_by_valence, x='Valence', y='Energy',
247
- # title='Average Energy Levels by Valence (Mood Analysis)',
248
- # color='Energy', color_continuous_scale='plasma'
249
- # )
250
- # fig12.update_layout(template='plotly_white', width=900, height=500)
251
- # st.plotly_chart(fig12)
252
- # else:
253
- # st.error("Cannot plot: 'Valence' or 'Energy' column missing.")
254
- # def generate_top_artists_songs(df):
255
- # st.header("Top Artists and Songs")
256
- # tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
257
 
258
- # with tab1:
259
- # st.markdown("**Most Featured Artists:** Shows top artists.")
260
- # if 'Artist Name(s)' in df.columns:
261
- # top_artists = df['Artist Name(s)'].value_counts().nlargest(10).reset_index()
262
- # fig13 = px.bar(
263
- # top_artists, x='count', y='Artist Name(s)',
264
- # orientation='h',
265
- # title='Most Featured Artists',
266
- # color='count', color_continuous_scale='greens'
267
- # )
268
- # fig13.update_layout(template='plotly_white', width=900, height=500)
269
- # st.plotly_chart(fig13)
270
- # else:
271
- # st.error("Cannot plot: 'Artist Name(s)' column missing.")
272
 
273
- # with tab2:
274
- # st.markdown("**Top 10 Songs:** Lists top songs.")
275
- # if 'Track Name' in df.columns and 'Popularity' in df.columns:
276
- # top_songs = df.nlargest(10, 'Popularity')[['Track Name', 'Popularity']]
277
- # fig14 = px.pie(
278
- # top_songs, values='Popularity', names='Track Name',
279
- # title='Top 10 Songs by Popularity', color_discrete_sequence=px.colors.qualitative.Set3
280
- # )
281
- # fig14.update_layout(template='plotly_white', width=900, height=500)
282
- # st.plotly_chart(fig14)
283
- # else:
284
- # st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
285
 
286
 
287
- # def generate_album_release_trends(df):
288
- # st.header("Album Release Trends")
289
- # tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
290
- # with tab1:
291
- # st.markdown("**Albums per Year:** Tracks release patterns.")
292
- # if 'Year' in df.columns:
293
- # albums_per_year = df['Year'].value_counts().sort_index().reset_index()
294
- # fig15 = px.line(albums_per_year, x='Year', y='count', title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
295
- # fig15.update_layout(template='plotly_white', width=800, height=400)
296
- # st.plotly_chart(fig15)
297
- # else:
298
- # st.error("Cannot plot: 'Year' column missing.")
299
- # with tab2:
300
- # st.markdown("**Songs by Artists and Years:** Visualizes trends.")
301
- # if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
302
- # # Filter to only show the top 10 most featured artists
303
- # top_artists = df['Artist Name(s)'].value_counts().nlargest(10).index
304
- # filtered_df = df[df['Artist Name(s)'].isin(top_artists)]
305
 
306
- # # Grouping data
307
- # artist_year = filtered_df.groupby(['Year', 'Artist Name(s)']).size().reset_index(name='Count')
308
 
309
- # # Create a grouped bar chart
310
- # fig16 = px.bar(
311
- # artist_year, x='Year', y='Count', color='Artist Name(s)',
312
- # title='Songs Released by Top Artists Over the Years',
313
- # labels={'Count': 'Number of Songs', 'Year': 'Year'},
314
- # barmode='group', # Grouped bars for each artist per year
315
- # color_discrete_sequence=px.colors.qualitative.Set2
316
- # )
317
- # fig16.update_layout(width=900, height=500)
318
- # st.plotly_chart(fig16)
319
- # else:
320
- # st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
321
- # def generate_duration_analysis(df):
322
- # st.header("Track Duration Analysis")
323
- # tab1, tab2 = st.tabs(["Distribution", "By Decade"])
324
 
325
- # # Filter out tracks longer than 900,000ms (15 minutes)
326
- # df = df[df['Track Duration (ms)'] <= 900000]
327
 
328
- # with tab1:
329
- # st.markdown("**Track Duration Distribution:** Illustrates how track durations vary, helping identify common song lengths.")
330
- # if 'Track Duration (ms)' in df.columns:
331
- # fig17 = px.histogram(
332
- # df, x='Track Duration (ms)',
333
- # title='Track Duration Distribution (Filtered)',
334
- # nbins=50,
335
- # color_discrete_sequence=['orange']
336
- # )
337
- # fig17.update_layout(template='plotly_white', width=800, height=400)
338
- # st.plotly_chart(fig17)
339
- # else:
340
- # st.error("Cannot plot: 'Track Duration (ms)' column missing.")
341
 
342
- # with tab2:
343
- # st.markdown("**Duration by Decade:** Compares the evolution of average track durations across decades, showing historical trends.")
344
- # if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
345
- # fig18 = px.pie(
346
- # df.groupby('Decade')['Track Duration (ms)'].mean().reset_index(),
347
- # names='Decade', values='Track Duration (ms)',
348
- # title='Average Track Duration by Decade',
349
- # color_discrete_sequence=px.colors.qualitative.Set2
350
- # )
351
- # fig18.update_layout(template='plotly_white', width=800, height=400)
352
- # st.plotly_chart(fig18)
353
- # else:
354
- # st.error("Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")
355
 
356
 
357
- # def generate_streaming_insights(df):
358
- # st.header("Streaming and Engagement Insights")
359
- # tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])
360
-
361
- # with tab1:
362
- # st.markdown("**Popularity vs Duration:** Examines how track length influences popularity trends.")
363
- # if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
364
- # df['Duration (minutes)'] = df['Track Duration (ms)'] / 60000
365
- # fig19 = px.box(
366
- # df, x=pd.cut(df['Duration (minutes)'], bins=[0, 2, 4, 6, 8, 10, 15], labels=['0-2', '2-4', '4-6', '6-8', '8-10', '10+']),
367
- # y='Popularity',
368
- # title='Popularity Distribution Across Track Durations',
369
- # color_discrete_sequence=['blue']
370
- # )
371
- # fig19.update_layout(template='plotly_white', width=800, height=400, xaxis_title='Track Duration (Minutes)')
372
- # st.plotly_chart(fig19)
373
- # else:
374
- # st.error("Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")
375
 
376
- # with tab2:
377
- # st.markdown("**Popularity by Time Signature:** Analyzes the average popularity of songs across different time signatures.")
378
- # if 'Time Signature' in df.columns and 'Popularity' in df.columns:
379
- # pop_by_time = df.groupby('Time Signature')['Popularity'].mean().reset_index()
380
- # fig20 = px.bar(
381
- # pop_by_time, x='Time Signature', y='Popularity',
382
- # title='Average Popularity by Time Signature',
383
- # color='Popularity',
384
- # color_continuous_scale='purples'
385
- # )
386
- # fig20.update_layout(template='plotly_white', width=800, height=400)
387
- # st.plotly_chart(fig20)
388
- # else:
389
- # st.error("Cannot plot: 'Time Signature' or 'Popularity' column missing.")
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
- # def generate_feature_comparisons(df):
393
- # st.header("Feature Comparisons Across Decades")
394
- # tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
395
- # with tab1:
396
- # st.markdown("**Feature Comparison:** Compares features across decades.")
397
- # if 'Decade' in df.columns:
398
- # features_by_decade = df.groupby('Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
399
- # fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
400
- # barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
401
- # fig21.update_layout(template='plotly_white', width=800, height=400)
402
- # st.plotly_chart(fig21)
403
- # else:
404
- # st.error("Cannot plot: 'Decade' column missing.")
405
- # with tab2:
406
- # st.markdown("**Loudness Over Time:** Tracks loudness trends.")
407
- # if 'Year' in df.columns and 'Loudness' in df.columns:
408
- # loudness_by_year = df.groupby('Year')['Loudness'].mean().reset_index()
409
- # fig22 = px.line(loudness_by_year, x='Year', y='Loudness', title='Average Loudness Over Time', color_discrete_sequence=['green'])
410
- # fig22.update_layout(template='plotly_white', width=800, height=400)
411
- # st.plotly_chart(fig22)
412
- # else:
413
- # st.error("Cannot plot: 'Year' or 'Loudness' column missing.")
414
 
415
- # def generate_top_artists_songs(df):
416
- # st.header("Top Artists and Songs")
417
- # tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
- # with tab1:
420
- # st.markdown("**Most Featured Artists:** Displays the top 10 artists with the highest song counts, highlighting their dominance in the dataset.")
421
- # if 'Artist Name(s)' in df.columns:
422
- # top_artists = df['Artist Name(s)'].value_counts().nlargest(10).reset_index()
423
- # top_artists.columns = ['Artist Name(s)', 'Count']
424
- # fig13 = px.sunburst(
425
- # top_artists, path=['Artist Name(s)'], values='Count',
426
- # title='Most Featured Artists',
427
- # color='Count',
428
- # color_continuous_scale='greens'
429
- # )
430
- # fig13.update_layout(template='plotly_white', width=900, height=500)
431
- # st.plotly_chart(fig13)
432
- # else:
433
- # st.error("Cannot plot: 'Artist Name(s)' column missing.")
434
 
435
- # with tab2:
436
- # st.markdown("**Songs by Artists and Years:** Analyzes song release trends across different years, focusing on the top artists.")
437
- # if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
438
- # artist_year = df.groupby(['Artist Name(s)', 'Year']).size().reset_index(name='Count')
439
- # fig16 = px.sunburst(
440
- # artist_year, path=['Year', 'Artist Name(s)'], values='Count',
441
- # title='Songs Released by Artists Over the Years',
442
- # color='Count',
443
- # color_continuous_scale=px.colors.qualitative.Set2
444
- # )
445
- # fig16.update_layout(width=900, height=500)
446
- # st.plotly_chart(fig16)
447
- # else:
448
- # st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
449
 
450
  def generate_network_analysis(df):
451
  st.header("Network Analysis")
@@ -455,11 +472,11 @@ def generate_network_analysis(df):
455
  df.columns = df.columns.str.strip()
456
 
457
  with tab1:
458
- st.markdown("**Top Collaborating Artists:** Shows which artists collaborate the most.")
459
- if 'Artists' in df.columns:
460
- df['Artists'] = df['Artists'].astype(str).str.split(', ')
461
  collaborations = []
462
- for artists in df['Artists']:
463
  collaborations.extend(combinations(sorted(artists), 2))
464
 
465
  collab_counts = Counter(collaborations)
@@ -477,10 +494,10 @@ def generate_network_analysis(df):
477
  plt.title("Top 20 Artist Collaborations")
478
  st.pyplot(plt)
479
  else:
480
- st.error("Cannot plot: 'Artists' column missing. Available columns: " + ", ".join(df.columns))
481
 
482
  with tab2:
483
- st.markdown("**Genre Crossover:** Displays genre relationships using a Chord Diagram.")
484
  if 'Genres' in df.columns:
485
  df['Genres'] = df['Genres'].astype(str).str.split(', ')
486
  genre_pairs = []
 
10
  from collections import Counter
11
 
12
 
13
+ def generate_popularity_trends(df):
14
+ st.header("Popularity Trends Over Time")
15
+ tab1, tab2, tab3 = st.tabs(["Average Popularity", "Individual Songs", "Top 10 Songs"])
16
 
17
+ with tab1:
18
+ st.markdown("**Average Popularity by Decade:** This chart shows how the average popularity of songs has changed over different decades.")
19
+ if 'Decade' in df.columns:
20
+ top_decades = df.groupby('Decade')['Popularity'].mean().reset_index().nlargest(10, 'Popularity')
21
 
22
+ fig1 = go.Figure()
23
+ fig1.add_trace(go.Scatter(
24
+ x=top_decades['Decade'],
25
+ y=top_decades['Popularity'],
26
+ mode='lines+markers',
27
+ fill='tonexty',
28
+ line=dict(color='royalblue', width=3),
29
+ marker=dict(size=8, color='darkblue', line=dict(width=2, color='white')),
30
+ name='Popularity',
31
+ hovertext=top_decades['Decade']
32
+ ))
33
+ fig1.update_layout(
34
+ title='Top 10 Decades by Average Popularity',
35
+ xaxis_title='Decade',
36
+ yaxis_title='Average Popularity Score',
37
+ template='plotly_white',
38
+ width=900,
39
+ height=450
40
+ )
41
+ st.plotly_chart(fig1)
42
+ else:
43
+ st.error("Cannot plot: 'Decade' column missing.")
44
 
45
+ with tab2:
46
+ st.markdown("**Top 10 Individual Songs:** This scatter plot highlights the popularity of the top 10 most popular songs over time.")
47
+ if 'Year' in df.columns:
48
+ top_songs = df.nlargest(10, 'Popularity')
49
+ fig2 = px.scatter(
50
+ top_songs, x='Year', y='Popularity',
51
+ color='Popularity',
52
+ size='Popularity',
53
+ color_continuous_scale='viridis',
54
+ title='Top 10 Individual Songs by Popularity',
55
+ hover_data=['Track Name', 'Artist Name(s)', 'Year']
56
+ )
57
+ fig2.update_layout(
58
+ xaxis_title='Release Year',
59
+ yaxis_title='Popularity Score',
60
+ template='plotly_white',
61
+ width=900,
62
+ height=500
63
+ )
64
+ st.plotly_chart(fig2)
65
+ else:
66
+ st.error("Cannot plot: 'Year' column missing.")
67
 
68
+ with tab3:
69
+ st.markdown("**Top 10 Most Popular Songs:** This bar chart displays the top 10 songs based on their popularity scores.")
70
+ if 'Track Name' in df.columns and 'Popularity' in df.columns:
71
+ top_songs = df.nlargest(10, 'Popularity')[['Track Name', 'Artist Name(s)', 'Popularity']]
72
+ fig3 = px.bar(
73
+ top_songs, y='Track Name', x='Popularity',
74
+ orientation='h', color='Popularity',
75
+ color_continuous_scale='deep',
76
+ title='Top 10 Most Popular Songs',
77
+ labels={'Track Name': 'Song Title', 'Popularity': 'Popularity Score'},
78
+ hover_data=['Track Name', 'Artist Name(s)']
79
+ )
80
+ fig3.update_layout(
81
+ xaxis_title='Popularity Score',
82
+ yaxis_title='Song Title',
83
+ template='plotly_white',
84
+ width=900,
85
+ height=500
86
+ )
87
+ st.plotly_chart(fig3)
88
+ else:
89
+ st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
90
+
91
 
92
+ def generate_audio_features(df):
93
+ st.header("Audio Features Analysis")
94
 
95
+ feature = st.selectbox(
96
+ "Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness']
97
+ )
 
 
 
98
 
99
+ tab1, tab2 = st.tabs(["Distribution", "By Decade"])
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ with tab1:
102
+ st.markdown(f"**Top 20 {feature} Values:** This bar chart displays the distribution of the top 20 songs based on {feature}.")
103
+ top_features = df.nlargest(20, feature)
104
+
105
+ fig = px.bar(
106
+ top_features, x='Track Name', y=feature,
107
+ color='Decade' if 'Decade' in df.columns else None,
108
+ title=f'Top 20 Songs by {feature}',
109
+ color_discrete_sequence=px.colors.qualitative.Set2,
110
+ hover_data=['Track Name', 'Artist Name(s)']
111
+ )
112
+ fig.update_layout(xaxis_tickangle=-45, template='plotly_white')
113
+ st.plotly_chart(fig)
114
+
115
+ with tab2:
116
+ st.markdown(f"**{feature} by Decade:** This line chart compares the top {feature} trends over different decades.")
117
+
118
+ if 'Decade' in df.columns:
119
+ avg_feature_by_decade = df.groupby('Decade')[feature].mean().reset_index()
120
+
121
+ fig2 = px.line(
122
+ avg_feature_by_decade, x='Decade', y=feature,
123
+ title=f'Average {feature} by Decade',
124
+ markers=True,
125
+ color_discrete_sequence=['red'],
126
+ hover_data=['Decade']
127
+ )
128
+ fig2.update_layout(template='plotly_white', width=800, height=400)
129
+ st.plotly_chart(fig2)
130
+ else:
131
+ st.error("Cannot plot: 'Decade' column missing.")
132
+
133
+ def generate_genre_analysis(df):
134
+ st.header("Genre & Artist Analysis")
135
+ tab1, tab2, tab3 = st.tabs(["Top Genres", "Genre Distribution", "Artist Popularity"])
136
 
137
+ with tab1:
138
+ st.markdown("**Top Genres in Top 10 Songs:** Displays the most common genres among the top 10 most popular songs.")
139
+ top_songs = df.nlargest(10, 'Popularity')
140
+ top_genres = top_songs.explode('Genres')['Genres'].value_counts().reset_index()
141
+ fig1 = px.bar(
142
+ top_genres, x='count', y='Genres',
143
+ orientation='h', color='count',
144
+ color_continuous_scale='viridis',
145
+ title='Top Genres in Top 10 Songs',
146
+ labels={'count': 'Number of Songs', 'Genres': 'Genre Name'},
147
+ hover_data=['Genres', 'count']
148
+ )
149
+ fig1.update_layout(template='plotly_white', width=900, height=500)
150
+ st.plotly_chart(fig1)
151
 
152
+ with tab2:
153
+ st.markdown("**Genre Distribution in Top 10 Songs:** Shows how different genres contribute to the top 10 songs.")
154
+ genre_song_data = top_songs.explode('Genres')
155
+ fig2 = px.bar(
156
+ genre_song_data, x='Track Name', y='Popularity', color='Genres',
157
+ title='Genre Distribution in Top 10 Songs',
158
+ labels={'Track Name': 'Song Title', 'Popularity': 'Popularity Score', 'Genres': 'Genre'},
159
+ barmode='stack',
160
+ hover_data=['Track Name', 'Genres']
161
+ )
162
+ fig2.update_layout(template='plotly_white', width=900, height=500)
163
+ st.plotly_chart(fig2)
164
 
165
+ with tab3:
166
+ st.markdown("**Artist Popularity in Top 10 Songs:** Visualizes the most popular artists in the top 10 songs with their song count and names.")
167
+ artist_popularity = top_songs.groupby('Artist Name(s)').agg({'Popularity': 'sum', 'Track Name': lambda x: list(x)}).reset_index().sort_values(by='Popularity', ascending=False)
168
+ artist_popularity['Song Count'] = artist_popularity['Track Name'].apply(len)
169
+ fig3 = px.bar(
170
+ artist_popularity, x='Popularity', y='Artist Name(s)',
171
+ orientation='h', color='Popularity',
172
+ color_continuous_scale='blues',
173
+ title='Artist Popularity in Top 10 Songs',
174
+ labels={'Artist Name(s)': 'Artist Name', 'Popularity': 'Total Popularity Score', 'Song Count': 'Number of Songs'},
175
+ hover_data={'Artist Name(s)': True, 'Popularity': True, 'Song Count': True, 'Track Name': True}
176
+ )
177
+ fig3.update_layout(template='plotly_white', width=900, height=500)
178
+ st.plotly_chart(fig3)
179
 
180
+ def generate_explicit_trends(df):
181
+ st.header("Explicit Content Trends")
182
+ st.markdown("**Explicit vs Non-Explicit Songs Over Time:** This line chart shows how the number of explicit and non-explicit songs has changed over different decades.")
183
+ if 'Decade' in df.columns and 'Explicit' in df.columns:
184
+ explicit_trends = df.groupby(['Decade', 'Explicit']).size().reset_index(name='Count')
185
+ fig = px.line(
186
+ explicit_trends, x='Decade', y='Count', color='Explicit',
187
+ markers=True, line_shape='linear',
188
+ title='Explicit vs Non-Explicit Songs Over Time',
189
+ labels={'Decade': 'Decade', 'Count': 'Number of Songs', 'Explicit': 'Song Type'},
190
+ color_discrete_map={True: 'purple', False: 'green'}
191
+ )
192
+ fig.update_layout(template='plotly_white', width=900, height=500)
193
+ st.plotly_chart(fig)
194
+ else:
195
+ st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")
196
 
197
+ def generate_album_insights(df):
198
+ st.header("Album & Label Insights")
199
+ tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])
200
 
201
+ with tab1:
202
+ st.markdown("**Top Record Labels:** Displays the most dominant record labels based on the number of songs they have released.")
203
+ if 'Label' in df.columns:
204
+ top_labels = df['Label'].value_counts().nlargest(10).reset_index()
205
+ fig9 = px.sunburst(
206
+ top_labels, path=['Label'], values='count',
207
+ title='Top Record Labels by Song Count',
208
+ color='count', color_continuous_scale='blues',
209
+ labels={'Label': 'Record Label', 'count': 'Number of Songs'}
210
+ )
211
+ fig9.update_layout(template='plotly_white', width=900, height=500)
212
+ st.plotly_chart(fig9)
213
+ else:
214
+ st.error("Cannot plot: 'Label' column missing.")
215
 
216
+ with tab2:
217
+ st.markdown("**Album Popularity:** Compares the popularity of albums based on the number of songs and their average popularity score.")
218
+ if 'Album Name' in df.columns and 'Popularity' in df.columns:
219
+ album_pop = df.groupby('Album Name')['Popularity'].agg(['mean', 'count']).reset_index()
220
+ album_pop = album_pop.sort_values(by=['mean', 'count'], ascending=[False, False]).nlargest(10, 'mean')
221
+ fig10 = px.strip(
222
+ album_pop, x='mean', y='Album Name',
223
+ color='count',
224
+ title='Top 10 Albums by Popularity',
225
+ labels={'Album Name': 'Album', 'mean': 'Average Popularity Score', 'count': 'Number of Songs'},
226
+ hover_data={'Album Name': True, 'count': True, 'mean': True},
227
+ color_discrete_sequence=px.colors.qualitative.Pastel
228
+ )
229
+ fig10.update_layout(template='plotly_white', width=900, height=500)
230
+ st.plotly_chart(fig10)
231
+ else:
232
+ st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")
233
 
234
 
235
+ def generate_tempo_mood(df):
236
+ st.header("Tempo & Mood Analysis")
237
+ tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
238
+ with tab1:
239
+ st.markdown("**Tempo Trends:** Tracks tempo changes.")
240
+ if 'Year' in df.columns and 'Tempo' in df.columns:
241
+ tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
242
+ fig11 = px.line(tempo_by_year, x='Year', y='Tempo', title='Average Tempo Over Time', color_discrete_sequence=['orange'])
243
+ fig11.update_layout(template='plotly_white', width=800, height=400)
244
+ st.plotly_chart(fig11)
245
+ else:
246
+ st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
247
+ with tab2:
248
+ st.markdown("**Mood Analysis (Valence & Energy):** Categorizes songs based on mood and energy.")
249
+ if 'Valence' in df.columns and 'Energy' in df.columns:
250
+ top_songs = df.nlargest(10, 'Popularity')
251
+ mood_by_valence = top_songs.groupby('Valence')['Energy'].mean().reset_index()
252
+ fig12 = px.bar(
253
+ mood_by_valence, x='Valence', y='Energy',
254
+ title='Average Energy Levels by Valence (Mood Analysis)',
255
+ color='Energy', color_continuous_scale='plasma'
256
+ )
257
+ fig12.update_layout(template='plotly_white', width=900, height=500)
258
+ st.plotly_chart(fig12)
259
+ else:
260
+ st.error("Cannot plot: 'Valence' or 'Energy' column missing.")
261
+ def generate_top_artists_songs(df):
262
+ st.header("Top Artists and Songs")
263
+ tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
264
 
265
+ with tab1:
266
+ st.markdown("**Most Featured Artists:** Shows top artists.")
267
+ if 'Artist Name(s)' in df.columns:
268
+ top_artists = df['Artist Name(s)'].value_counts().nlargest(10).reset_index()
269
+ fig13 = px.bar(
270
+ top_artists, x='count', y='Artist Name(s)',
271
+ orientation='h',
272
+ title='Most Featured Artists',
273
+ color='count', color_continuous_scale='greens'
274
+ )
275
+ fig13.update_layout(template='plotly_white', width=900, height=500)
276
+ st.plotly_chart(fig13)
277
+ else:
278
+ st.error("Cannot plot: 'Artist Name(s)' column missing.")
279
 
280
+ with tab2:
281
+ st.markdown("**Top 10 Songs:** Lists top songs.")
282
+ if 'Track Name' in df.columns and 'Popularity' in df.columns:
283
+ top_songs = df.nlargest(10, 'Popularity')[['Track Name', 'Popularity']]
284
+ fig14 = px.pie(
285
+ top_songs, values='Popularity', names='Track Name',
286
+ title='Top 10 Songs by Popularity', color_discrete_sequence=px.colors.qualitative.Set3
287
+ )
288
+ fig14.update_layout(template='plotly_white', width=900, height=500)
289
+ st.plotly_chart(fig14)
290
+ else:
291
+ st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
292
 
293
 
294
+ def generate_album_release_trends(df):
295
+ st.header("Album Release Trends")
296
+ tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
297
+ with tab1:
298
+ st.markdown("**Albums per Year:** Tracks release patterns.")
299
+ if 'Year' in df.columns:
300
+ albums_per_year = df['Year'].value_counts().sort_index().reset_index()
301
+ fig15 = px.line(albums_per_year, x='Year', y='count', title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
302
+ fig15.update_layout(template='plotly_white', width=800, height=400)
303
+ st.plotly_chart(fig15)
304
+ else:
305
+ st.error("Cannot plot: 'Year' column missing.")
306
+ with tab2:
307
+ st.markdown("**Songs by Artists and Years:** Visualizes trends.")
308
+ if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
309
+ # Filter to only show the top 10 most featured artists
310
+ top_artists = df['Artist Name(s)'].value_counts().nlargest(10).index
311
+ filtered_df = df[df['Artist Name(s)'].isin(top_artists)]
312
 
313
+ # Grouping data
314
+ artist_year = filtered_df.groupby(['Year', 'Artist Name(s)']).size().reset_index(name='Count')
315
 
316
+ # Create a grouped bar chart
317
+ fig16 = px.bar(
318
+ artist_year, x='Year', y='Count', color='Artist Name(s)',
319
+ title='Songs Released by Top Artists Over the Years',
320
+ labels={'Count': 'Number of Songs', 'Year': 'Year'},
321
+ barmode='group', # Grouped bars for each artist per year
322
+ color_discrete_sequence=px.colors.qualitative.Set2
323
+ )
324
+ fig16.update_layout(width=900, height=500)
325
+ st.plotly_chart(fig16)
326
+ else:
327
+ st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
328
+ def generate_duration_analysis(df):
329
+ st.header("Track Duration Analysis")
330
+ tab1, tab2 = st.tabs(["Distribution", "By Decade"])
331
 
332
+ # Filter out tracks longer than 900,000ms (15 minutes)
333
+ df = df[df['Track Duration (ms)'] <= 900000]
334
 
335
+ with tab1:
336
+ st.markdown("**Track Duration Distribution:** Illustrates how track durations vary, helping identify common song lengths.")
337
+ if 'Track Duration (ms)' in df.columns:
338
+ fig17 = px.histogram(
339
+ df, x='Track Duration (ms)',
340
+ title='Track Duration Distribution (Filtered)',
341
+ nbins=50,
342
+ color_discrete_sequence=['orange']
343
+ )
344
+ fig17.update_layout(template='plotly_white', width=800, height=400)
345
+ st.plotly_chart(fig17)
346
+ else:
347
+ st.error("Cannot plot: 'Track Duration (ms)' column missing.")
348
 
349
+ with tab2:
350
+ st.markdown("**Duration by Decade:** Compares the evolution of average track durations across decades, showing historical trends.")
351
+ if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
352
+ fig18 = px.pie(
353
+ df.groupby('Decade')['Track Duration (ms)'].mean().reset_index(),
354
+ names='Decade', values='Track Duration (ms)',
355
+ title='Average Track Duration by Decade',
356
+ color_discrete_sequence=px.colors.qualitative.Set2
357
+ )
358
+ fig18.update_layout(template='plotly_white', width=800, height=400)
359
+ st.plotly_chart(fig18)
360
+ else:
361
+ st.error("Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")
362
 
363
 
364
+
365
+ def generate_streaming_insights(df):
366
+ st.header("Streaming and Engagement Insights")
367
+ tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
+ with tab1:
370
+ st.markdown("**Popularity vs Track Duration:** This line chart shows the trend of song popularity based on their duration.")
371
+
372
+ if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
373
+ df['Duration (minutes)'] = df['Track Duration (ms)'] / 60000
374
+ duration_bins = pd.cut(df['Duration (minutes)'], bins=[0, 2, 4, 6, 8, 10, 15], labels=['0-2', '2-4', '4-6', '6-8', '8-10', '10+'])
375
+ avg_popularity = df.groupby(duration_bins)['Popularity'].mean().reset_index()
 
 
 
 
 
 
 
376
 
377
+ fig1 = px.line(
378
+ avg_popularity,
379
+ x='Duration (minutes)',
380
+ y='Popularity',
381
+ title='Popularity vs. Track Duration',
382
+ markers=True, # Adds points to the line
383
+ line_shape='spline', # Smoothens the line
384
+ color_discrete_sequence=['blue']
385
+ )
386
+ fig1.update_layout(template='plotly_white', xaxis_title='Track Duration (Minutes)', yaxis_title='Average Popularity')
387
+ st.plotly_chart(fig1)
388
+ else:
389
+ st.error("Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")
390
+
391
+ with tab2:
392
+ st.markdown("**Popularity by Time Signature:** This bar chart compares the average popularity of songs based on their time signatures.")
393
 
394
+ if 'Time Signature' in df.columns and 'Popularity' in df.columns:
395
+ pop_by_time = df.groupby('Time Signature')['Popularity'].mean().reset_index()
396
+ fig2 = px.bar(
397
+ pop_by_time,
398
+ x='Time Signature',
399
+ y='Popularity',
400
+ title='Average Popularity by Time Signature',
401
+ color='Popularity',
402
+ color_continuous_scale='purples'
403
+ )
404
+ fig2.update_layout(template='plotly_white', xaxis_title='Time Signature', yaxis_title='Average Popularity')
405
+ st.plotly_chart(fig2)
406
+ else:
407
+ st.error("Cannot plot: 'Time Signature' or 'Popularity' column missing.")
 
 
 
 
 
 
 
 
408
 
409
+ def generate_feature_comparisons(df):
410
+ st.header("Feature Comparisons Across Decades")
411
+ tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
412
+ with tab1:
413
+ st.markdown("**Feature Comparison:** Compares features across decades.")
414
+ if 'Decade' in df.columns:
415
+ features_by_decade = df.groupby('Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
416
+ fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
417
+ barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
418
+ fig21.update_layout(template='plotly_white', width=800, height=400)
419
+ st.plotly_chart(fig21)
420
+ else:
421
+ st.error("Cannot plot: 'Decade' column missing.")
422
+ with tab2:
423
+ st.markdown("**Loudness Over Time:** Tracks loudness trends.")
424
+ if 'Year' in df.columns and 'Loudness' in df.columns:
425
+ loudness_by_year = df.groupby('Year')['Loudness'].mean().reset_index()
426
+ fig22 = px.line(loudness_by_year, x='Year', y='Loudness', title='Average Loudness Over Time', color_discrete_sequence=['green'])
427
+ fig22.update_layout(template='plotly_white', width=800, height=400)
428
+ st.plotly_chart(fig22)
429
+ else:
430
+ st.error("Cannot plot: 'Year' or 'Loudness' column missing.")
431
+
432
+ def generate_top_artists_songs(df):
433
+ st.header("Top Artists and Songs")
434
+ tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
435
 
436
+ with tab1:
437
+ st.markdown("**Most Featured Artists:** Displays the top 10 artists with the highest song counts, highlighting their dominance in the dataset.")
438
+ if 'Artist Name(s)' in df.columns:
439
+ top_artists = df['Artist Name(s)'].value_counts().nlargest(10).reset_index()
440
+ top_artists.columns = ['Artist Name(s)', 'Count']
441
+ fig13 = px.sunburst(
442
+ top_artists, path=['Artist Name(s)'], values='Count',
443
+ title='Most Featured Artists',
444
+ color='Count',
445
+ color_continuous_scale='greens'
446
+ )
447
+ fig13.update_layout(template='plotly_white', width=900, height=500)
448
+ st.plotly_chart(fig13)
449
+ else:
450
+ st.error("Cannot plot: 'Artist Name(s)' column missing.")
451
 
452
+ with tab2:
453
+ st.markdown("**Songs by Artists and Years:** Analyzes song release trends across different years, focusing on the top artists.")
454
+ if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
455
+ artist_year = df.groupby(['Artist Name(s)', 'Year']).size().reset_index(name='Count')
456
+ fig16 = px.sunburst(
457
+ artist_year, path=['Year', 'Artist Name(s)'], values='Count',
458
+ title='Songs Released by Artists Over the Years',
459
+ color='Count',
460
+ color_continuous_scale=px.colors.qualitative.Set2
461
+ )
462
+ fig16.update_layout(width=900, height=500)
463
+ st.plotly_chart(fig16)
464
+ else:
465
+ st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
466
 
467
  def generate_network_analysis(df):
468
  st.header("Network Analysis")
 
472
  df.columns = df.columns.str.strip()
473
 
474
  with tab1:
475
+ st.markdown("**Top Collaborating Artists:** This chart highlights artists who frequently collaborate with each other.")
476
+ if 'Artist Name(s)' in df.columns:
477
+ df['Artist Name(s)'] = df['Artist Name(s)'].astype(str).str.split(', ')
478
  collaborations = []
479
+ for artists in df['Artist Name(s)']:
480
  collaborations.extend(combinations(sorted(artists), 2))
481
 
482
  collab_counts = Counter(collaborations)
 
494
  plt.title("Top 20 Artist Collaborations")
495
  st.pyplot(plt)
496
  else:
497
+ st.error("Cannot plot: 'Artist Name(s)' column missing. Available columns: " + ", ".join(df.columns))
498
 
499
  with tab2:
500
+ st.markdown("**Genre Crossover:** This chart shows how different music genres are connected and often blend together.")
501
  if 'Genres' in df.columns:
502
  df['Genres'] = df['Genres'].astype(str).str.split(', ')
503
  genre_pairs = []