Ezhil commited on
Commit
873154b
·
1 Parent(s): 6ce9997

Changes in spotify logo, access of raw data, removed raw data sample preview

Browse files
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
- import streamlit as st
3
  import pandas as pd
 
4
  from models.data_processor import load_data
5
  from functions.visualizations import (
6
  generate_popularity_trends, generate_audio_features, generate_genre_analysis,
@@ -12,16 +12,13 @@ from functions.visualizations import (
12
  # Load Data
13
  df = load_data()
14
 
15
- # Sidebar - Add Spotify Logo from Local File
16
- logo_path = "assests/spotify-logo.png" # Adjust this if needed
17
- if os.path.exists(logo_path):
18
- st.sidebar.image(logo_path, use_column_width=True)
19
 
20
  # Sidebar - Title & Filters
21
  st.sidebar.title("Music Data Analysis")
22
- st.sidebar.markdown("[View Raw Data](data/music_data.csv)",
23
- unsafe_allow_html=True)
24
-
25
  analysis_option = st.sidebar.selectbox(
26
  "Choose Analysis",
27
  [
@@ -43,38 +40,48 @@ else:
43
  "No data loaded or 'Decade' column missing. Check the 'data' folder.")
44
  filtered_df = pd.DataFrame()
45
 
 
 
 
 
46
  # Main Content
47
  st.title("Music Data Analysis Dashboard")
48
  st.markdown("Explore trends and insights from a diverse music dataset.")
49
 
50
- if not df.empty:
51
- st.write("### Raw Data Sample")
52
- st.dataframe(df.head())
53
- else:
54
- st.error("Failed to load raw data. Check the 'data/music_data.csv' file.")
55
-
56
- # Call Analysis Functions Based on Selection
57
  if analysis_option == "Popularity Trends Over Time":
 
58
  generate_popularity_trends(filtered_df)
59
  elif analysis_option == "Audio Features Analysis":
 
60
  generate_audio_features(filtered_df)
61
  elif analysis_option == "Genre & Artist Analysis":
 
62
  generate_genre_analysis(filtered_df)
63
  elif analysis_option == "Explicit Content Trends":
 
64
  generate_explicit_trends(filtered_df)
65
  elif analysis_option == "Album & Label Insights":
 
66
  generate_album_insights(filtered_df)
67
  elif analysis_option == "Tempo & Mood Analysis":
 
68
  generate_tempo_mood(filtered_df)
69
  elif analysis_option == "Top Artists and Songs":
 
70
  generate_top_artists_songs(filtered_df)
71
  elif analysis_option == "Album Release Trends":
 
72
  generate_album_release_trends(filtered_df)
73
  elif analysis_option == "Track Duration Analysis":
 
74
  generate_duration_analysis(filtered_df)
75
  elif analysis_option == "Streaming and Engagement Insights":
 
76
  generate_streaming_insights(filtered_df)
77
  elif analysis_option == "Feature Comparisons Across Decades":
 
78
  generate_feature_comparisons(filtered_df)
79
  elif analysis_option == "Network Analysis":
 
80
  generate_network_analysis(filtered_df)
 
1
  import os
 
2
  import pandas as pd
3
+ import streamlit as st
4
  from models.data_processor import load_data
5
  from functions.visualizations import (
6
  generate_popularity_trends, generate_audio_features, generate_genre_analysis,
 
12
  # Load Data
13
  df = load_data()
14
 
15
+ # Sidebar - Add Spotify Logo from URL at left top middle
16
+ # Using a reliable Spotify logo URL (fallback to green logo)
17
+ st.sidebar.image("https://upload.wikimedia.org/wikipedia/commons/1/19/Spotify_logo_without_text.svg",
18
+ width=150, caption="Spotify", use_column_width=False)
19
 
20
  # Sidebar - Title & Filters
21
  st.sidebar.title("Music Data Analysis")
 
 
 
22
  analysis_option = st.sidebar.selectbox(
23
  "Choose Analysis",
24
  [
 
40
  "No data loaded or 'Decade' column missing. Check the 'data' folder.")
41
  filtered_df = pd.DataFrame()
42
 
43
+ # Add View Raw Data link at the bottom of the sidebar
44
+ st.sidebar.markdown(
45
+ "[View Raw Data Source](https://www.kaggle.com/datasets/joebeachcapital/top-10000-spotify-songs-1960-now)", unsafe_allow_html=True)
46
+
47
  # Main Content
48
  st.title("Music Data Analysis Dashboard")
49
  st.markdown("Explore trends and insights from a diverse music dataset.")
50
 
51
+ # Call Analysis Functions Based on Selection with updated explanations
 
 
 
 
 
 
52
  if analysis_option == "Popularity Trends Over Time":
53
+ st.markdown("**Popularity Trends:** Tracks popularity changes over time.")
54
  generate_popularity_trends(filtered_df)
55
  elif analysis_option == "Audio Features Analysis":
56
+ st.markdown("**Audio Features:** Shows feature distributions.")
57
  generate_audio_features(filtered_df)
58
  elif analysis_option == "Genre & Artist Analysis":
59
+ st.markdown("**Genre & Artist:** Highlights top genres.")
60
  generate_genre_analysis(filtered_df)
61
  elif analysis_option == "Explicit Content Trends":
62
+ st.markdown("**Explicit Trends:** Compares explicit songs.")
63
  generate_explicit_trends(filtered_df)
64
  elif analysis_option == "Album & Label Insights":
65
+ st.markdown("**Album & Label:** Displays top labels.")
66
  generate_album_insights(filtered_df)
67
  elif analysis_option == "Tempo & Mood Analysis":
68
+ st.markdown("**Tempo & Mood:** Tracks tempo trends.")
69
  generate_tempo_mood(filtered_df)
70
  elif analysis_option == "Top Artists and Songs":
71
+ st.markdown("**Top Artists/Songs:** Lists top artists and songs.")
72
  generate_top_artists_songs(filtered_df)
73
  elif analysis_option == "Album Release Trends":
74
+ st.markdown("**Album Trends:** Shows release patterns.")
75
  generate_album_release_trends(filtered_df)
76
  elif analysis_option == "Track Duration Analysis":
77
+ st.markdown("**Duration Analysis:** Displays track durations.")
78
  generate_duration_analysis(filtered_df)
79
  elif analysis_option == "Streaming and Engagement Insights":
80
+ st.markdown("**Streaming Insights:** Explores engagement trends.")
81
  generate_streaming_insights(filtered_df)
82
  elif analysis_option == "Feature Comparisons Across Decades":
83
+ st.markdown("**Feature Comparisons:** Compares features across decades.")
84
  generate_feature_comparisons(filtered_df)
85
  elif analysis_option == "Network Analysis":
86
+ st.markdown("**Network Analysis:** Visualizes artist connections.")
87
  generate_network_analysis(filtered_df)
functions/__pycache__/visualizations.cpython-310.pyc CHANGED
Binary files a/functions/__pycache__/visualizations.cpython-310.pyc and b/functions/__pycache__/visualizations.cpython-310.pyc differ
 
functions/visualizations.py CHANGED
@@ -7,277 +7,225 @@ import networkx as nx
7
  import plotly.graph_objects as go
8
  from itertools import combinations
9
 
10
-
11
  def generate_popularity_trends(df):
12
  st.header("Popularity Trends Over Time")
13
  tab1, tab2 = st.tabs(["Average Popularity", "Individual Songs"])
14
  with tab1:
15
- st.markdown("<span style='color:blue'>**Average Popularity by Decade**</span>: Tracks how song popularity has <span style='color:red'>changed over time</span>. This <span style='color:green'>blue</span> line chart highlights peaks.", unsafe_allow_html=True)
16
  if 'Decade' in df.columns:
17
- avg_pop_by_decade = df.groupby(
18
- 'Decade')['Popularity'].mean().reset_index()
19
- fig1 = px.line(avg_pop_by_decade, x='Decade', y='Popularity',
20
- title='Average Popularity by Decade', color_discrete_sequence=['blue'])
21
  fig1.update_layout(template='plotly_white', width=800, height=400)
22
  st.plotly_chart(fig1)
23
  else:
24
  st.error("Cannot plot: 'Decade' column missing.")
25
  with tab2:
26
- st.markdown("<span style='color:blue'>**Song Popularity Over Time**</span>: Highlights individual trends with <span style='color:red'>red</span> points, showing <span style='color:green'>green</span> details on hover.", unsafe_allow_html=True)
27
  if 'Year' in df.columns:
28
- fig2 = px.scatter(df, x='Year', y='Popularity', title='Song Popularity Over Time', hover_data=[
29
- 'Track Name', 'Artist Name(s)'], color_discrete_sequence=['red'])
30
  fig2.update_layout(template='plotly_white', width=800, height=400)
31
  st.plotly_chart(fig2)
32
  else:
33
  st.error("Cannot plot: 'Year' column missing.")
34
 
35
-
36
  def generate_audio_features(df):
37
  st.header("Audio Features Analysis")
38
- feature = st.selectbox(
39
- "Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness'])
40
  tab1, tab2, tab3 = st.tabs(["Distribution", "By Decade", "Correlations"])
41
  with tab1:
42
- st.markdown(
43
- f"<span style='color:blue'>**Distribution of {feature}**</span>: Shows variation in <span style='color:red'>{feature.lower()}</span> with <span style='color:green'>green</span> bars.", unsafe_allow_html=True)
44
- fig3 = px.histogram(
45
- df, x=feature, title=f'Distribution of {feature}', color_discrete_sequence=['green'])
46
  fig3.update_layout(template='plotly_white', width=800, height=400)
47
  st.plotly_chart(fig3)
48
  with tab2:
49
- st.markdown(
50
- f"<span style='color:blue'>**{feature} by Decade**</span>: Compares <span style='color:red'>{feature.lower()}</span> across decades with <span style='color:green'>green</span> boxes.", unsafe_allow_html=True)
51
  if 'Decade' in df.columns:
52
- fig4 = px.box(df, x='Decade', y=feature,
53
- title=f'{feature} Distribution by Decade', color_discrete_sequence=['green'])
54
  fig4.update_layout(template='plotly_white', width=800, height=400)
55
  st.plotly_chart(fig4)
56
  else:
57
  st.error("Cannot plot: 'Decade' column missing.")
58
  with tab3:
59
- st.markdown("<span style='color:blue'>**Feature Correlations**</span>: Explores relationships with <span style='color:red'>multi-colored</span> scatter points.", unsafe_allow_html=True)
60
  fig, ax = plt.subplots()
61
  sns.pairplot(df[['Energy', 'Danceability', 'Valence', 'Tempo']])
62
  st.pyplot(fig)
63
 
64
-
65
  def generate_genre_analysis(df):
66
  st.header("Genre & Artist Analysis")
67
- tab1, tab2, tab3 = st.tabs(
68
- ["Top Genres", "Genre Distribution", "Artist Popularity"])
69
  with tab1:
70
- st.markdown("<span style='color:blue'>**Top Genres by Decade**</span>: Shows frequent genres with <span style='color:red'>red</span> bars, <span style='color:green'>green</span> highlights.", unsafe_allow_html=True)
71
  if 'Decade' in df.columns:
72
- genre_decade = df.explode('Genres').groupby(
73
- ['Decade', 'Genres']).size().reset_index(name='Count')
74
- top_genres = genre_decade.groupby('Decade').apply(
75
- lambda x: x.nlargest(5, 'Count')).reset_index(drop=True)
76
- fig5 = px.bar(top_genres, x='Decade', y='Count', color='Genres',
77
- title='Top Genres by Decade', color_discrete_sequence=px.colors.qualitative.Set1)
78
  fig5.update_layout(template='plotly_white', width=800, height=400)
79
  st.plotly_chart(fig5)
80
  else:
81
  st.error("Cannot plot: 'Decade' column missing.")
82
  with tab2:
83
- st.markdown("<span style='color:blue'>**Genre Distribution**</span>: Breaks down genres with <span style='color:red'>multi-colored</span> pie slices.", unsafe_allow_html=True)
84
- genre_counts = df.explode(
85
- 'Genres')['Genres'].value_counts().reset_index()
86
- fig6 = px.pie(genre_counts, values='count', names='Genres',
87
- title='Genre Distribution', color_discrete_sequence=px.colors.qualitative.Set2)
88
  fig6.update_layout(width=800, height=400)
89
  st.plotly_chart(fig6)
90
  with tab3:
91
- st.markdown("<span style='color:blue'>**Artist Popularity Heatmap**</span>: Visualizes popularity with <span style='color:red'>red</span> intensity.", unsafe_allow_html=True)
92
  if 'Artist Name(s)' in df.columns:
93
- artist_pop = df.groupby('Artist Name(s)')[
94
- 'Popularity'].mean().reset_index()
95
- fig7 = px.imshow(pd.pivot_table(df, values='Popularity', index='Artist Name(s)', aggfunc='mean').fillna(
96
- 0), title='Artist Popularity Heatmap', color_continuous_scale='Reds')
97
  fig7.update_layout(width=800, height=400)
98
  st.plotly_chart(fig7)
99
  else:
100
  st.error("Cannot plot: 'Artist Name(s)' column missing.")
101
 
102
-
103
  def generate_explicit_trends(df):
104
  st.header("Explicit Content Trends")
105
- st.markdown("<span style='color:blue'>**Explicit vs Non-Explicit Songs**</span>: Compares content with <span style='color:red'>stacked bars</span> in <span style='color:green'>green</span> and <span style='color:purple'>purple</span>.", unsafe_allow_html=True)
106
  if 'Decade' in df.columns and 'Explicit' in df.columns:
107
- explicit_by_decade = df.groupby(
108
- ['Decade', 'Explicit']).size().unstack().fillna(0)
109
- fig8 = px.bar(explicit_by_decade, barmode='stack',
110
- title='Explicit vs Non-Explicit Songs by Decade', color_discrete_sequence=['green', 'purple'])
111
  fig8.update_layout(template='plotly_white', width=800, height=400)
112
  st.plotly_chart(fig8)
113
  else:
114
  st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")
115
 
116
-
117
  def generate_album_insights(df):
118
  st.header("Album & Label Insights")
119
  tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])
120
  with tab1:
121
- st.markdown("<span style='color:blue'>**Top Record Labels**</span>: Identifies labels with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
122
  if 'Label' in df.columns:
123
  top_labels = df['Label'].value_counts().nlargest(10).reset_index()
124
- fig9 = px.bar(top_labels, x='Label', y='count',
125
- title='Top Record Labels by Song Count', color_discrete_sequence=['blue'])
126
  fig9.update_layout(template='plotly_white', width=800, height=400)
127
  st.plotly_chart(fig9)
128
  else:
129
  st.error("Cannot plot: 'Label' column missing.")
130
  with tab2:
131
- st.markdown("<span style='color:blue'>**Album Popularity**</span>: Shows albums with <span style='color:red'>red</span> bubbles.", unsafe_allow_html=True)
132
  if 'Album Name' in df.columns and 'Popularity' in df.columns:
133
- album_pop = df.groupby('Album Name')['Popularity'].agg(
134
- ['mean', 'count']).reset_index()
135
- fig10 = px.scatter(album_pop, x='count', y='mean', size='mean', hover_data=[
136
- 'Album Name'], title='Albums: Song Count vs Average Popularity', color_discrete_sequence=['red'])
137
  fig10.update_layout(template='plotly_white', width=800, height=400)
138
  st.plotly_chart(fig10)
139
  else:
140
  st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")
141
 
142
-
143
  def generate_tempo_mood(df):
144
  st.header("Tempo & Mood Analysis")
145
  tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
146
  with tab1:
147
- st.markdown("<span style='color:blue'>**Tempo Trends**</span>: Tracks changes with <span style='color:red'>orange</span> line.", unsafe_allow_html=True)
148
  if 'Year' in df.columns and 'Tempo' in df.columns:
149
  tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
150
- fig11 = px.line(tempo_by_year, x='Year', y='Tempo',
151
- title='Average Tempo Over Time', color_discrete_sequence=['orange'])
152
  fig11.update_layout(template='plotly_white', width=800, height=400)
153
  st.plotly_chart(fig11)
154
  else:
155
  st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
156
  with tab2:
157
- st.markdown("<span style='color:blue'>**Valence vs Energy**</span>: Groups mood with <span style='color:red'>purple</span> points.", unsafe_allow_html=True)
158
  if 'Valence' in df.columns and 'Energy' in df.columns:
159
- fig12 = px.scatter(df, x='Valence', y='Energy', title='Valence vs Energy', hover_data=[
160
- 'Track Name'], color_discrete_sequence=['purple'])
161
  fig12.update_layout(template='plotly_white', width=800, height=400)
162
  st.plotly_chart(fig12)
163
  else:
164
  st.error("Cannot plot: 'Valence' or 'Energy' column missing.")
165
 
166
-
167
  def generate_top_artists_songs(df):
168
  st.header("Top Artists and Songs")
169
  tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
170
  with tab1:
171
- st.markdown("<span style='color:blue'>**Most Featured Artists**</span>: Shows artists with <span style='color:red'>green</span> bars.", unsafe_allow_html=True)
172
  if 'Artist Name(s)' in df.columns:
173
- top_artists = df['Artist Name(s)'].value_counts().nlargest(
174
- 10).reset_index()
175
- fig13 = px.bar(top_artists, x='Artist Name(s)', y='count',
176
- title='Most Featured Artists', color_discrete_sequence=['green'])
177
  fig13.update_layout(template='plotly_white', width=800, height=400)
178
  st.plotly_chart(fig13)
179
  else:
180
  st.error("Cannot plot: 'Artist Name(s)' column missing.")
181
  with tab2:
182
- st.markdown(
183
- "<span style='color:blue'>**Top 10 Songs**</span>: Lists songs with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
184
  if 'Track Name' in df.columns and 'Popularity' in df.columns:
185
- top_songs = df.nlargest(10, 'Popularity')[
186
- ['Track Name', 'Popularity']]
187
- fig14 = px.bar(top_songs, y='Track Name', x='Popularity', orientation='h',
188
- title='Top 10 Songs by Popularity', color_discrete_sequence=['blue'])
189
  fig14.update_layout(template='plotly_white', width=800, height=400)
190
  st.plotly_chart(fig14)
191
  else:
192
  st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
193
 
194
-
195
  def generate_album_release_trends(df):
196
  st.header("Album Release Trends")
197
  tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
198
  with tab1:
199
- st.markdown("<span style='color:blue'>**Albums per Year**</span>: Tracks releases with <span style='color:red'>purple</span> line.", unsafe_allow_html=True)
200
  if 'Year' in df.columns:
201
- albums_per_year = df['Year'].value_counts(
202
- ).sort_index().reset_index()
203
- fig15 = px.line(albums_per_year, x='Year', y='count',
204
- title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
205
  fig15.update_layout(template='plotly_white', width=800, height=400)
206
  st.plotly_chart(fig15)
207
  else:
208
  st.error("Cannot plot: 'Year' column missing.")
209
  with tab2:
210
- st.markdown("<span style='color:blue'>**Songs by Artists and Years**</span>: Visualizes with <span style='color:red'>heatmap colors</span>.", unsafe_allow_html=True)
211
  if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
212
- artist_year = df.groupby(
213
- ['Artist Name(s)', 'Year']).size().unstack().fillna(0)
214
- fig16 = px.imshow(
215
- artist_year, title='Songs Released by Artists Across Years', color_continuous_scale='Viridis')
216
  fig16.update_layout(width=800, height=400)
217
  st.plotly_chart(fig16)
218
  else:
219
  st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
220
 
221
-
222
  def generate_duration_analysis(df):
223
  st.header("Track Duration Analysis")
224
  tab1, tab2 = st.tabs(["Distribution", "By Decade"])
225
  with tab1:
226
- st.markdown("<span style='color:blue'>**Track Duration Distribution**</span>: Shows lengths with <span style='color:red'>orange</span> bars.", unsafe_allow_html=True)
227
  if 'Track Duration (ms)' in df.columns:
228
- fig17 = px.histogram(df, x='Track Duration (ms)',
229
- title='Distribution of Track Durations', color_discrete_sequence=['orange'])
230
  fig17.update_layout(template='plotly_white', width=800, height=400)
231
  st.plotly_chart(fig17)
232
  else:
233
  st.error("Cannot plot: 'Track Duration (ms)' column missing.")
234
  with tab2:
235
- st.markdown("<span style='color:blue'>**Duration by Decade**</span>: Compares with <span style='color:red'>green</span> boxes.", unsafe_allow_html=True)
236
  if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
237
- fig18 = px.box(df, x='Decade', y='Track Duration (ms)',
238
- title='Track Duration by Decade', color_discrete_sequence=['green'])
239
  fig18.update_layout(template='plotly_white', width=800, height=400)
240
  st.plotly_chart(fig18)
241
  else:
242
- st.error(
243
- "Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")
244
-
245
 
246
  def generate_streaming_insights(df):
247
  st.header("Streaming and Engagement Insights")
248
  tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])
249
  with tab1:
250
- st.markdown("<span style='color:blue'>**Popularity vs Duration**</span>: Explores trends with <span style='color:red'>blue</span> scatter.", unsafe_allow_html=True)
251
  if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
252
- fig19 = px.scatter(df, x='Track Duration (ms)', y='Popularity',
253
- title='Popularity vs Track Duration', color_discrete_sequence=['blue'])
254
  fig19.update_layout(template='plotly_white', width=800, height=400)
255
  st.plotly_chart(fig19)
256
  else:
257
- st.error(
258
- "Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")
259
  with tab2:
260
- st.markdown("<span style='color:blue'>**Popularity by Time Signature**</span>: Compares with <span style='color:red'>purple</span> bars.", unsafe_allow_html=True)
261
  if 'Time Signature' in df.columns and 'Popularity' in df.columns:
262
- pop_by_time = df.groupby('Time Signature')[
263
- 'Popularity'].mean().reset_index()
264
- fig20 = px.bar(pop_by_time, x='Time Signature', y='Popularity',
265
- title='Average Popularity by Time Signature', color_discrete_sequence=['purple'])
266
  fig20.update_layout(template='plotly_white', width=800, height=400)
267
  st.plotly_chart(fig20)
268
  else:
269
- st.error(
270
- "Cannot plot: 'Time Signature' or 'Popularity' column missing.")
271
-
272
 
273
  def generate_feature_comparisons(df):
274
  st.header("Feature Comparisons Across Decades")
275
  tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
276
  with tab1:
277
- st.markdown("<span style='color:blue'>**Feature Comparison**</span>: Compares features with <span style='color:red'>multi-colored</span> bars.", unsafe_allow_html=True)
278
  if 'Decade' in df.columns:
279
- features_by_decade = df.groupby(
280
- 'Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
281
  fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
282
  barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
283
  fig21.update_layout(template='plotly_white', width=800, height=400)
@@ -285,36 +233,29 @@ def generate_feature_comparisons(df):
285
  else:
286
  st.error("Cannot plot: 'Decade' column missing.")
287
  with tab2:
288
- st.markdown("<span style='color:blue'>**Loudness Over Time**</span>: Tracks with <span style='color:red'>green</span> line.", unsafe_allow_html=True)
289
  if 'Year' in df.columns and 'Loudness' in df.columns:
290
- loudness_by_year = df.groupby(
291
- 'Year')['Loudness'].mean().reset_index()
292
- fig22 = px.line(loudness_by_year, x='Year', y='Loudness',
293
- title='Average Loudness Over Time', color_discrete_sequence=['green'])
294
  fig22.update_layout(template='plotly_white', width=800, height=400)
295
  st.plotly_chart(fig22)
296
  else:
297
  st.error("Cannot plot: 'Year' or 'Loudness' column missing.")
298
 
299
-
300
  def generate_network_analysis(df):
301
  st.header("Network Analysis")
302
  tab1, tab2 = st.tabs(["Artist Collaborations", "Genre Crossover"])
303
  with tab1:
304
- st.markdown("<span style='color:blue'>**Artist Collaborations**</span>: Visualizes connections with <span style='color:red'>interactive red nodes</span>. Hover for details.", unsafe_allow_html=True)
305
  if 'Artist Name(s)' in df.columns:
306
- # Filter out non-string values and handle missing data
307
  valid_artists = df['Artist Name(s)'].dropna().astype(str)
308
  G = nx.Graph()
309
  for artists in valid_artists:
310
- artists_list = [a.strip() for a in artists.split(
311
- ',') if a.strip()] # Split and clean
312
- if len(artists_list) > 1: # Check length of list
313
  for a1, a2 in combinations(artists_list, 2):
314
  G.add_edge(a1, a2)
315
  if G.number_of_nodes() > 0:
316
- # Convert to Plotly format
317
- # Use spring layout for better spacing
318
  pos = nx.spring_layout(G)
319
  edge_x = []
320
  edge_y = []
@@ -353,7 +294,7 @@ def generate_network_analysis(df):
353
  else:
354
  st.error("Cannot plot: 'Artist Name(s)' column missing.")
355
  with tab2:
356
- st.markdown("<span style='color:blue'>**Genre Crossover**</span>: Placeholder with <span style='color:red'>future multi-color</span> potential.", unsafe_allow_html=True)
357
  st.write("To implement, install `holoviews` and use the following code:")
358
  st.code("""
359
  import holoviews as hv
@@ -362,4 +303,4 @@ def generate_network_analysis(df):
362
  chord_data = genre_pairs.groupby(['Genres_x', 'Genres_y']).size().reset_index(name='value')
363
  chord = hv.Chord(chord_data).opts(title="Genre Crossover")
364
  st.write(hv.render(chord, backend='bokeh'))
365
- """)
 
7
  import plotly.graph_objects as go
8
  from itertools import combinations
9
 
 
10
  def generate_popularity_trends(df):
11
  st.header("Popularity Trends Over Time")
12
  tab1, tab2 = st.tabs(["Average Popularity", "Individual Songs"])
13
  with tab1:
14
+ st.markdown("**Average Popularity by Decade:** Tracks popularity changes over time.")
15
  if 'Decade' in df.columns:
16
+ avg_pop_by_decade = df.groupby('Decade')['Popularity'].mean().reset_index()
17
+ fig1 = px.line(avg_pop_by_decade, x='Decade', y='Popularity', title='Average Popularity by Decade', color_discrete_sequence=['blue'])
 
 
18
  fig1.update_layout(template='plotly_white', width=800, height=400)
19
  st.plotly_chart(fig1)
20
  else:
21
  st.error("Cannot plot: 'Decade' column missing.")
22
  with tab2:
23
+ st.markdown("**Song Popularity Over Time:** Highlights individual trends.")
24
  if 'Year' in df.columns:
25
+ fig2 = px.scatter(df, x='Year', y='Popularity', title='Song Popularity Over Time', hover_data=['Track Name', 'Artist Name(s)'], color_discrete_sequence=['red'])
 
26
  fig2.update_layout(template='plotly_white', width=800, height=400)
27
  st.plotly_chart(fig2)
28
  else:
29
  st.error("Cannot plot: 'Year' column missing.")
30
 
 
31
  def generate_audio_features(df):
32
  st.header("Audio Features Analysis")
33
+ feature = st.selectbox("Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness'])
 
34
  tab1, tab2, tab3 = st.tabs(["Distribution", "By Decade", "Correlations"])
35
  with tab1:
36
+ st.markdown(f"**Distribution of {feature}:** Shows feature variations.")
37
+ fig3 = px.histogram(df, x=feature, title=f'Distribution of {feature}', color_discrete_sequence=['green'])
 
 
38
  fig3.update_layout(template='plotly_white', width=800, height=400)
39
  st.plotly_chart(fig3)
40
  with tab2:
41
+ st.markdown(f"**{feature} by Decade:** Compares across decades.")
 
42
  if 'Decade' in df.columns:
43
+ fig4 = px.box(df, x='Decade', y=feature, title=f'{feature} Distribution by Decade', color_discrete_sequence=['green'])
 
44
  fig4.update_layout(template='plotly_white', width=800, height=400)
45
  st.plotly_chart(fig4)
46
  else:
47
  st.error("Cannot plot: 'Decade' column missing.")
48
  with tab3:
49
+ st.markdown("**Feature Correlations:** Explores relationships.")
50
  fig, ax = plt.subplots()
51
  sns.pairplot(df[['Energy', 'Danceability', 'Valence', 'Tempo']])
52
  st.pyplot(fig)
53
 
 
54
  def generate_genre_analysis(df):
55
  st.header("Genre & Artist Analysis")
56
+ tab1, tab2, tab3 = st.tabs(["Top Genres", "Genre Distribution", "Artist Popularity"])
 
57
  with tab1:
58
+ st.markdown("**Top Genres by Decade:** Highlights frequent genres.")
59
  if 'Decade' in df.columns:
60
+ genre_decade = df.explode('Genres').groupby(['Decade', 'Genres']).size().reset_index(name='Count')
61
+ top_genres = genre_decade.groupby('Decade').apply(lambda x: x.nlargest(5, 'Count')).reset_index(drop=True)
62
+ fig5 = px.bar(top_genres, x='Decade', y='Count', color='Genres', title='Top Genres by Decade', color_discrete_sequence=px.colors.qualitative.Set1)
 
 
 
63
  fig5.update_layout(template='plotly_white', width=800, height=400)
64
  st.plotly_chart(fig5)
65
  else:
66
  st.error("Cannot plot: 'Decade' column missing.")
67
  with tab2:
68
+ st.markdown("**Genre Distribution:** Breaks down genres.")
69
+ genre_counts = df.explode('Genres')['Genres'].value_counts().reset_index()
70
+ fig6 = px.pie(genre_counts, values='count', names='Genres', title='Genre Distribution', color_discrete_sequence=px.colors.qualitative.Set2)
 
 
71
  fig6.update_layout(width=800, height=400)
72
  st.plotly_chart(fig6)
73
  with tab3:
74
+ st.markdown("**Artist Popularity Heatmap:** Visualizes popularity.")
75
  if 'Artist Name(s)' in df.columns:
76
+ artist_pop = df.groupby('Artist Name(s)')['Popularity'].mean().reset_index()
77
+ fig7 = px.imshow(pd.pivot_table(df, values='Popularity', index='Artist Name(s)', aggfunc='mean').fillna(0), title='Artist Popularity Heatmap', color_continuous_scale='Reds')
 
 
78
  fig7.update_layout(width=800, height=400)
79
  st.plotly_chart(fig7)
80
  else:
81
  st.error("Cannot plot: 'Artist Name(s)' column missing.")
82
 
 
83
  def generate_explicit_trends(df):
84
  st.header("Explicit Content Trends")
85
+ st.markdown("**Explicit vs Non-Explicit Songs:** Compares content.")
86
  if 'Decade' in df.columns and 'Explicit' in df.columns:
87
+ explicit_by_decade = df.groupby(['Decade', 'Explicit']).size().unstack().fillna(0)
88
+ fig8 = px.bar(explicit_by_decade, barmode='stack', title='Explicit vs Non-Explicit Songs by Decade', color_discrete_sequence=['green', 'purple'])
 
 
89
  fig8.update_layout(template='plotly_white', width=800, height=400)
90
  st.plotly_chart(fig8)
91
  else:
92
  st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")
93
 
 
94
  def generate_album_insights(df):
95
  st.header("Album & Label Insights")
96
  tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])
97
  with tab1:
98
+ st.markdown("**Top Record Labels:** Identifies top labels.")
99
  if 'Label' in df.columns:
100
  top_labels = df['Label'].value_counts().nlargest(10).reset_index()
101
+ fig9 = px.bar(top_labels, x='Label', y='count', title='Top Record Labels by Song Count', color_discrete_sequence=['blue'])
 
102
  fig9.update_layout(template='plotly_white', width=800, height=400)
103
  st.plotly_chart(fig9)
104
  else:
105
  st.error("Cannot plot: 'Label' column missing.")
106
  with tab2:
107
+ st.markdown("**Album Popularity:** Shows album trends.")
108
  if 'Album Name' in df.columns and 'Popularity' in df.columns:
109
+ album_pop = df.groupby('Album Name')['Popularity'].agg(['mean', 'count']).reset_index()
110
+ fig10 = px.scatter(album_pop, x='count', y='mean', size='mean', hover_data=['Album Name'], title='Albums: Song Count vs Average Popularity', color_discrete_sequence=['red'])
 
 
111
  fig10.update_layout(template='plotly_white', width=800, height=400)
112
  st.plotly_chart(fig10)
113
  else:
114
  st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")
115
 
 
116
  def generate_tempo_mood(df):
117
  st.header("Tempo & Mood Analysis")
118
  tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
119
  with tab1:
120
+ st.markdown("**Tempo Trends:** Tracks tempo changes.")
121
  if 'Year' in df.columns and 'Tempo' in df.columns:
122
  tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
123
+ fig11 = px.line(tempo_by_year, x='Year', y='Tempo', title='Average Tempo Over Time', color_discrete_sequence=['orange'])
 
124
  fig11.update_layout(template='plotly_white', width=800, height=400)
125
  st.plotly_chart(fig11)
126
  else:
127
  st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
128
  with tab2:
129
+ st.markdown("**Valence vs Energy:** Groups mood patterns.")
130
  if 'Valence' in df.columns and 'Energy' in df.columns:
131
+ fig12 = px.scatter(df, x='Valence', y='Energy', title='Valence vs Energy', hover_data=['Track Name'], color_discrete_sequence=['purple'])
 
132
  fig12.update_layout(template='plotly_white', width=800, height=400)
133
  st.plotly_chart(fig12)
134
  else:
135
  st.error("Cannot plot: 'Valence' or 'Energy' column missing.")
136
 
 
137
  def generate_top_artists_songs(df):
138
  st.header("Top Artists and Songs")
139
  tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
140
  with tab1:
141
+ st.markdown("**Most Featured Artists:** Shows top artists.")
142
  if 'Artist Name(s)' in df.columns:
143
+ top_artists = df['Artist Name(s)'].value_counts().nlargest(10).reset_index()
144
+ fig13 = px.bar(top_artists, x='Artist Name(s)', y='count', title='Most Featured Artists', color_discrete_sequence=['green'])
 
 
145
  fig13.update_layout(template='plotly_white', width=800, height=400)
146
  st.plotly_chart(fig13)
147
  else:
148
  st.error("Cannot plot: 'Artist Name(s)' column missing.")
149
  with tab2:
150
+ st.markdown("**Top 10 Songs:** Lists top songs.")
 
151
  if 'Track Name' in df.columns and 'Popularity' in df.columns:
152
+ top_songs = df.nlargest(10, 'Popularity')[['Track Name', 'Popularity']]
153
+ fig14 = px.bar(top_songs, y='Track Name', x='Popularity', orientation='h', title='Top 10 Songs by Popularity', color_discrete_sequence=['blue'])
 
 
154
  fig14.update_layout(template='plotly_white', width=800, height=400)
155
  st.plotly_chart(fig14)
156
  else:
157
  st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
158
 
 
159
  def generate_album_release_trends(df):
160
  st.header("Album Release Trends")
161
  tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
162
  with tab1:
163
+ st.markdown("**Albums per Year:** Tracks release patterns.")
164
  if 'Year' in df.columns:
165
+ albums_per_year = df['Year'].value_counts().sort_index().reset_index()
166
+ fig15 = px.line(albums_per_year, x='Year', y='count', title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
 
 
167
  fig15.update_layout(template='plotly_white', width=800, height=400)
168
  st.plotly_chart(fig15)
169
  else:
170
  st.error("Cannot plot: 'Year' column missing.")
171
  with tab2:
172
+ st.markdown("**Songs by Artists and Years:** Visualizes trends.")
173
  if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
174
+ artist_year = df.groupby(['Artist Name(s)', 'Year']).size().unstack().fillna(0)
175
+ fig16 = px.imshow(artist_year, title='Songs Released by Artists Across Years', color_continuous_scale='Viridis')
 
 
176
  fig16.update_layout(width=800, height=400)
177
  st.plotly_chart(fig16)
178
  else:
179
  st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
180
 
 
181
  def generate_duration_analysis(df):
182
  st.header("Track Duration Analysis")
183
  tab1, tab2 = st.tabs(["Distribution", "By Decade"])
184
  with tab1:
185
+ st.markdown("**Track Duration Distribution:** Shows duration lengths.")
186
  if 'Track Duration (ms)' in df.columns:
187
+ fig17 = px.histogram(df, x='Track Duration (ms)', title='Distribution of Track Durations', color_discrete_sequence=['orange'])
 
188
  fig17.update_layout(template='plotly_white', width=800, height=400)
189
  st.plotly_chart(fig17)
190
  else:
191
  st.error("Cannot plot: 'Track Duration (ms)' column missing.")
192
  with tab2:
193
+ st.markdown("**Duration by Decade:** Compares durations.")
194
  if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
195
+ fig18 = px.box(df, x='Decade', y='Track Duration (ms)', title='Track Duration by Decade', color_discrete_sequence=['green'])
 
196
  fig18.update_layout(template='plotly_white', width=800, height=400)
197
  st.plotly_chart(fig18)
198
  else:
199
+ st.error("Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")
 
 
200
 
201
  def generate_streaming_insights(df):
202
  st.header("Streaming and Engagement Insights")
203
  tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])
204
  with tab1:
205
+ st.markdown("**Popularity vs Duration:** Explores engagement trends.")
206
  if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
207
+ fig19 = px.scatter(df, x='Track Duration (ms)', y='Popularity', title='Popularity vs Track Duration', color_discrete_sequence=['blue'])
 
208
  fig19.update_layout(template='plotly_white', width=800, height=400)
209
  st.plotly_chart(fig19)
210
  else:
211
+ st.error("Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")
 
212
  with tab2:
213
+ st.markdown("**Popularity by Time Signature:** Compares popularity.")
214
  if 'Time Signature' in df.columns and 'Popularity' in df.columns:
215
+ pop_by_time = df.groupby('Time Signature')['Popularity'].mean().reset_index()
216
+ fig20 = px.bar(pop_by_time, x='Time Signature', y='Popularity', title='Average Popularity by Time Signature', color_discrete_sequence=['purple'])
 
 
217
  fig20.update_layout(template='plotly_white', width=800, height=400)
218
  st.plotly_chart(fig20)
219
  else:
220
+ st.error("Cannot plot: 'Time Signature' or 'Popularity' column missing.")
 
 
221
 
222
  def generate_feature_comparisons(df):
223
  st.header("Feature Comparisons Across Decades")
224
  tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
225
  with tab1:
226
+ st.markdown("**Feature Comparison:** Compares features across decades.")
227
  if 'Decade' in df.columns:
228
+ features_by_decade = df.groupby('Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
 
229
  fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
230
  barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
231
  fig21.update_layout(template='plotly_white', width=800, height=400)
 
233
  else:
234
  st.error("Cannot plot: 'Decade' column missing.")
235
  with tab2:
236
+ st.markdown("**Loudness Over Time:** Tracks loudness trends.")
237
  if 'Year' in df.columns and 'Loudness' in df.columns:
238
+ loudness_by_year = df.groupby('Year')['Loudness'].mean().reset_index()
239
+ fig22 = px.line(loudness_by_year, x='Year', y='Loudness', title='Average Loudness Over Time', color_discrete_sequence=['green'])
 
 
240
  fig22.update_layout(template='plotly_white', width=800, height=400)
241
  st.plotly_chart(fig22)
242
  else:
243
  st.error("Cannot plot: 'Year' or 'Loudness' column missing.")
244
 
 
245
  def generate_network_analysis(df):
246
  st.header("Network Analysis")
247
  tab1, tab2 = st.tabs(["Artist Collaborations", "Genre Crossover"])
248
  with tab1:
249
+ st.markdown("**Artist Collaborations:** Visualizes artist connections.")
250
  if 'Artist Name(s)' in df.columns:
 
251
  valid_artists = df['Artist Name(s)'].dropna().astype(str)
252
  G = nx.Graph()
253
  for artists in valid_artists:
254
+ artists_list = [a.strip() for a in artists.split(',') if a.strip()]
255
+ if len(artists_list) > 1:
 
256
  for a1, a2 in combinations(artists_list, 2):
257
  G.add_edge(a1, a2)
258
  if G.number_of_nodes() > 0:
 
 
259
  pos = nx.spring_layout(G)
260
  edge_x = []
261
  edge_y = []
 
294
  else:
295
  st.error("Cannot plot: 'Artist Name(s)' column missing.")
296
  with tab2:
297
+ st.markdown("**Genre Crossover:** Placeholder for future visualization.")
298
  st.write("To implement, install `holoviews` and use the following code:")
299
  st.code("""
300
  import holoviews as hv
 
303
  chord_data = genre_pairs.groupby(['Genres_x', 'Genres_y']).size().reset_index(name='value')
304
  chord = hv.Chord(chord_data).opts(title="Genre Crossover")
305
  st.write(hv.render(chord, backend='bokeh'))
306
+ """)
models/__pycache__/data_processor.cpython-310.pyc CHANGED
Binary files a/models/__pycache__/data_processor.cpython-310.pyc and b/models/__pycache__/data_processor.cpython-310.pyc differ
 
models/data_processor.py CHANGED
@@ -4,7 +4,7 @@ import streamlit as st
4
  def load_data():
5
  try:
6
  df = pd.read_csv('data/music_data.csv', on_bad_lines='skip')
7
- st.write("**Raw Data Sample:**", df.head())
8
  except FileNotFoundError:
9
  st.error("Error: 'data/music_data.csv' not found. Please ensure the file exists.")
10
  return pd.DataFrame()
@@ -23,6 +23,8 @@ def load_data():
23
  df['Year'] = pd.to_datetime(df['Album Release Date'], errors='coerce').dt.year
24
  df['Year'] = df['Year'].fillna(0).astype(int)
25
  df['Decade'] = (df['Year'] // 10 * 10).astype(int)
 
 
26
 
27
  df['Genres'] = df['Artist Genres'].fillna('Unknown').str.split(',').apply(lambda x: [g.strip() for g in x])
28
  df['Popularity'] = pd.to_numeric(df['Popularity'], errors='coerce').fillna(0)
@@ -30,6 +32,5 @@ def load_data():
30
  if 'Decade' not in df.columns:
31
  st.error("Failed to create 'Decade' column")
32
  return df
33
- st.write("**Processed Data Sample:**", df[['Track Name', 'Year', 'Decade', 'Popularity']].head())
34
-
35
  return df
 
4
  def load_data():
5
  try:
6
  df = pd.read_csv('data/music_data.csv', on_bad_lines='skip')
7
+ st.write("**Raw Data Sample:**", df.head()) # Temporary for debugging, will be removed
8
  except FileNotFoundError:
9
  st.error("Error: 'data/music_data.csv' not found. Please ensure the file exists.")
10
  return pd.DataFrame()
 
23
  df['Year'] = pd.to_datetime(df['Album Release Date'], errors='coerce').dt.year
24
  df['Year'] = df['Year'].fillna(0).astype(int)
25
  df['Decade'] = (df['Year'] // 10 * 10).astype(int)
26
+ # Remove rows where Decade is 0
27
+ df = df[df['Decade'] != 0]
28
 
29
  df['Genres'] = df['Artist Genres'].fillna('Unknown').str.split(',').apply(lambda x: [g.strip() for g in x])
30
  df['Popularity'] = pd.to_numeric(df['Popularity'], errors='coerce').fillna(0)
 
32
  if 'Decade' not in df.columns:
33
  st.error("Failed to create 'Decade' column")
34
  return df
35
+ # Removed Processed Data Sample output as per requirement
 
36
  return df