Spaces:

Ezhil24
/

DataVisualizatioin_spotify

Sleeping

App Files Files Community

Ezhil commited on Mar 5, 2025

Commit

873154b

1 Parent(s): 6ce9997

Changes in spotify logo, access of raw data, removed raw data sample preview

Browse files

Files changed (5) hide show

app.py +22 -15
functions/__pycache__/visualizations.cpython-310.pyc +0 -0
functions/visualizations.py +68 -127
models/__pycache__/data_processor.cpython-310.pyc +0 -0
models/data_processor.py +4 -3

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
-import streamlit as st
 import pandas as pd
 from models.data_processor import load_data
 from functions.visualizations import (
     generate_popularity_trends, generate_audio_features, generate_genre_analysis,
@@ -12,16 +12,13 @@ from functions.visualizations import (
 # Load Data
 df = load_data()
-# Sidebar - Add Spotify Logo from Local File
-logo_path = "assests/spotify-logo.png"  # Adjust this if needed
-if os.path.exists(logo_path):
-    st.sidebar.image(logo_path, use_column_width=True)
 # Sidebar - Title & Filters
 st.sidebar.title("Music Data Analysis")
-st.sidebar.markdown("[View Raw Data](data/music_data.csv)",
-                    unsafe_allow_html=True)
 analysis_option = st.sidebar.selectbox(
     "Choose Analysis",
     [
@@ -43,38 +40,48 @@ else:
         "No data loaded or 'Decade' column missing. Check the 'data' folder.")
     filtered_df = pd.DataFrame()
 # Main Content
 st.title("Music Data Analysis Dashboard")
 st.markdown("Explore trends and insights from a diverse music dataset.")
-if not df.empty:
-    st.write("### Raw Data Sample")
-    st.dataframe(df.head())
-else:
-    st.error("Failed to load raw data. Check the 'data/music_data.csv' file.")
-# Call Analysis Functions Based on Selection
 if analysis_option == "Popularity Trends Over Time":
     generate_popularity_trends(filtered_df)
 elif analysis_option == "Audio Features Analysis":
     generate_audio_features(filtered_df)
 elif analysis_option == "Genre & Artist Analysis":
     generate_genre_analysis(filtered_df)
 elif analysis_option == "Explicit Content Trends":
     generate_explicit_trends(filtered_df)
 elif analysis_option == "Album & Label Insights":
     generate_album_insights(filtered_df)
 elif analysis_option == "Tempo & Mood Analysis":
     generate_tempo_mood(filtered_df)
 elif analysis_option == "Top Artists and Songs":
     generate_top_artists_songs(filtered_df)
 elif analysis_option == "Album Release Trends":
     generate_album_release_trends(filtered_df)
 elif analysis_option == "Track Duration Analysis":
     generate_duration_analysis(filtered_df)
 elif analysis_option == "Streaming and Engagement Insights":
     generate_streaming_insights(filtered_df)
 elif analysis_option == "Feature Comparisons Across Decades":
     generate_feature_comparisons(filtered_df)
 elif analysis_option == "Network Analysis":
     generate_network_analysis(filtered_df)

 import os
 import pandas as pd
+import streamlit as st
 from models.data_processor import load_data
 from functions.visualizations import (
     generate_popularity_trends, generate_audio_features, generate_genre_analysis,
 # Load Data
 df = load_data()
+# Sidebar - Add Spotify Logo from URL at left top middle
+# Using a reliable Spotify logo URL (fallback to green logo)
+st.sidebar.image("https://upload.wikimedia.org/wikipedia/commons/1/19/Spotify_logo_without_text.svg",
+                 width=150, caption="Spotify", use_column_width=False)
 # Sidebar - Title & Filters
 st.sidebar.title("Music Data Analysis")
 analysis_option = st.sidebar.selectbox(
     "Choose Analysis",
     [
         "No data loaded or 'Decade' column missing. Check the 'data' folder.")
     filtered_df = pd.DataFrame()
+# Add View Raw Data link at the bottom of the sidebar
+st.sidebar.markdown(
+    "[View Raw Data Source](https://www.kaggle.com/datasets/joebeachcapital/top-10000-spotify-songs-1960-now)", unsafe_allow_html=True)
 # Main Content
 st.title("Music Data Analysis Dashboard")
 st.markdown("Explore trends and insights from a diverse music dataset.")
+# Call Analysis Functions Based on Selection with updated explanations
 if analysis_option == "Popularity Trends Over Time":
+    st.markdown("**Popularity Trends:** Tracks popularity changes over time.")
     generate_popularity_trends(filtered_df)
 elif analysis_option == "Audio Features Analysis":
+    st.markdown("**Audio Features:** Shows feature distributions.")
     generate_audio_features(filtered_df)
 elif analysis_option == "Genre & Artist Analysis":
+    st.markdown("**Genre & Artist:** Highlights top genres.")
     generate_genre_analysis(filtered_df)
 elif analysis_option == "Explicit Content Trends":
+    st.markdown("**Explicit Trends:** Compares explicit songs.")
     generate_explicit_trends(filtered_df)
 elif analysis_option == "Album & Label Insights":
+    st.markdown("**Album & Label:** Displays top labels.")
     generate_album_insights(filtered_df)
 elif analysis_option == "Tempo & Mood Analysis":
+    st.markdown("**Tempo & Mood:** Tracks tempo trends.")
     generate_tempo_mood(filtered_df)
 elif analysis_option == "Top Artists and Songs":
+    st.markdown("**Top Artists/Songs:** Lists top artists and songs.")
     generate_top_artists_songs(filtered_df)
 elif analysis_option == "Album Release Trends":
+    st.markdown("**Album Trends:** Shows release patterns.")
     generate_album_release_trends(filtered_df)
 elif analysis_option == "Track Duration Analysis":
+    st.markdown("**Duration Analysis:** Displays track durations.")
     generate_duration_analysis(filtered_df)
 elif analysis_option == "Streaming and Engagement Insights":
+    st.markdown("**Streaming Insights:** Explores engagement trends.")
     generate_streaming_insights(filtered_df)
 elif analysis_option == "Feature Comparisons Across Decades":
+    st.markdown("**Feature Comparisons:** Compares features across decades.")
     generate_feature_comparisons(filtered_df)
 elif analysis_option == "Network Analysis":
+    st.markdown("**Network Analysis:** Visualizes artist connections.")
     generate_network_analysis(filtered_df)

functions/__pycache__/visualizations.cpython-310.pyc CHANGED Viewed

Binary files a/functions/__pycache__/visualizations.cpython-310.pyc and b/functions/__pycache__/visualizations.cpython-310.pyc differ

functions/visualizations.py CHANGED Viewed

@@ -7,277 +7,225 @@ import networkx as nx
 import plotly.graph_objects as go
 from itertools import combinations
 def generate_popularity_trends(df):
     st.header("Popularity Trends Over Time")
     tab1, tab2 = st.tabs(["Average Popularity", "Individual Songs"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Average Popularity by Decade**</span>: Tracks how song popularity has <span style='color:red'>changed over time</span>. This <span style='color:green'>blue</span> line chart highlights peaks.", unsafe_allow_html=True)
         if 'Decade' in df.columns:
-            avg_pop_by_decade = df.groupby(
-                'Decade')['Popularity'].mean().reset_index()
-            fig1 = px.line(avg_pop_by_decade, x='Decade', y='Popularity',
-                           title='Average Popularity by Decade', color_discrete_sequence=['blue'])
             fig1.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig1)
         else:
             st.error("Cannot plot: 'Decade' column missing.")
     with tab2:
-        st.markdown("<span style='color:blue'>**Song Popularity Over Time**</span>: Highlights individual trends with <span style='color:red'>red</span> points, showing <span style='color:green'>green</span> details on hover.", unsafe_allow_html=True)
         if 'Year' in df.columns:
-            fig2 = px.scatter(df, x='Year', y='Popularity', title='Song Popularity Over Time', hover_data=[
-                              'Track Name', 'Artist Name(s)'], color_discrete_sequence=['red'])
             fig2.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig2)
         else:
             st.error("Cannot plot: 'Year' column missing.")
 def generate_audio_features(df):
     st.header("Audio Features Analysis")
-    feature = st.selectbox(
-        "Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness'])
     tab1, tab2, tab3 = st.tabs(["Distribution", "By Decade", "Correlations"])
     with tab1:
-        st.markdown(
-            f"<span style='color:blue'>**Distribution of {feature}**</span>: Shows variation in <span style='color:red'>{feature.lower()}</span> with <span style='color:green'>green</span> bars.", unsafe_allow_html=True)
-        fig3 = px.histogram(
-            df, x=feature, title=f'Distribution of {feature}', color_discrete_sequence=['green'])
         fig3.update_layout(template='plotly_white', width=800, height=400)
         st.plotly_chart(fig3)
     with tab2:
-        st.markdown(
-            f"<span style='color:blue'>**{feature} by Decade**</span>: Compares <span style='color:red'>{feature.lower()}</span> across decades with <span style='color:green'>green</span> boxes.", unsafe_allow_html=True)
         if 'Decade' in df.columns:
-            fig4 = px.box(df, x='Decade', y=feature,
-                          title=f'{feature} Distribution by Decade', color_discrete_sequence=['green'])
             fig4.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig4)
         else:
             st.error("Cannot plot: 'Decade' column missing.")
     with tab3:
-        st.markdown("<span style='color:blue'>**Feature Correlations**</span>: Explores relationships with <span style='color:red'>multi-colored</span> scatter points.", unsafe_allow_html=True)
         fig, ax = plt.subplots()
         sns.pairplot(df[['Energy', 'Danceability', 'Valence', 'Tempo']])
         st.pyplot(fig)
 def generate_genre_analysis(df):
     st.header("Genre & Artist Analysis")
-    tab1, tab2, tab3 = st.tabs(
-        ["Top Genres", "Genre Distribution", "Artist Popularity"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Top Genres by Decade**</span>: Shows frequent genres with <span style='color:red'>red</span> bars, <span style='color:green'>green</span> highlights.", unsafe_allow_html=True)
         if 'Decade' in df.columns:
-            genre_decade = df.explode('Genres').groupby(
-                ['Decade', 'Genres']).size().reset_index(name='Count')
-            top_genres = genre_decade.groupby('Decade').apply(
-                lambda x: x.nlargest(5, 'Count')).reset_index(drop=True)
-            fig5 = px.bar(top_genres, x='Decade', y='Count', color='Genres',
-                          title='Top Genres by Decade', color_discrete_sequence=px.colors.qualitative.Set1)
             fig5.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig5)
         else:
             st.error("Cannot plot: 'Decade' column missing.")
     with tab2:
-        st.markdown("<span style='color:blue'>**Genre Distribution**</span>: Breaks down genres with <span style='color:red'>multi-colored</span> pie slices.", unsafe_allow_html=True)
-        genre_counts = df.explode(
-            'Genres')['Genres'].value_counts().reset_index()
-        fig6 = px.pie(genre_counts, values='count', names='Genres',
-                      title='Genre Distribution', color_discrete_sequence=px.colors.qualitative.Set2)
         fig6.update_layout(width=800, height=400)
         st.plotly_chart(fig6)
     with tab3:
-        st.markdown("<span style='color:blue'>**Artist Popularity Heatmap**</span>: Visualizes popularity with <span style='color:red'>red</span> intensity.", unsafe_allow_html=True)
         if 'Artist Name(s)' in df.columns:
-            artist_pop = df.groupby('Artist Name(s)')[
-                'Popularity'].mean().reset_index()
-            fig7 = px.imshow(pd.pivot_table(df, values='Popularity', index='Artist Name(s)', aggfunc='mean').fillna(
-                0), title='Artist Popularity Heatmap', color_continuous_scale='Reds')
             fig7.update_layout(width=800, height=400)
             st.plotly_chart(fig7)
         else:
             st.error("Cannot plot: 'Artist Name(s)' column missing.")
 def generate_explicit_trends(df):
     st.header("Explicit Content Trends")
-    st.markdown("<span style='color:blue'>**Explicit vs Non-Explicit Songs**</span>: Compares content with <span style='color:red'>stacked bars</span> in <span style='color:green'>green</span> and <span style='color:purple'>purple</span>.", unsafe_allow_html=True)
     if 'Decade' in df.columns and 'Explicit' in df.columns:
-        explicit_by_decade = df.groupby(
-            ['Decade', 'Explicit']).size().unstack().fillna(0)
-        fig8 = px.bar(explicit_by_decade, barmode='stack',
-                      title='Explicit vs Non-Explicit Songs by Decade', color_discrete_sequence=['green', 'purple'])
         fig8.update_layout(template='plotly_white', width=800, height=400)
         st.plotly_chart(fig8)
     else:
         st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")
 def generate_album_insights(df):
     st.header("Album & Label Insights")
     tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Top Record Labels**</span>: Identifies labels with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
         if 'Label' in df.columns:
             top_labels = df['Label'].value_counts().nlargest(10).reset_index()
-            fig9 = px.bar(top_labels, x='Label', y='count',
-                          title='Top Record Labels by Song Count', color_discrete_sequence=['blue'])
             fig9.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig9)
         else:
             st.error("Cannot plot: 'Label' column missing.")
     with tab2:
-        st.markdown("<span style='color:blue'>**Album Popularity**</span>: Shows albums with <span style='color:red'>red</span> bubbles.", unsafe_allow_html=True)
         if 'Album Name' in df.columns and 'Popularity' in df.columns:
-            album_pop = df.groupby('Album Name')['Popularity'].agg(
-                ['mean', 'count']).reset_index()
-            fig10 = px.scatter(album_pop, x='count', y='mean', size='mean', hover_data=[
-                               'Album Name'], title='Albums: Song Count vs Average Popularity', color_discrete_sequence=['red'])
             fig10.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig10)
         else:
             st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")
 def generate_tempo_mood(df):
     st.header("Tempo & Mood Analysis")
     tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Tempo Trends**</span>: Tracks changes with <span style='color:red'>orange</span> line.", unsafe_allow_html=True)
         if 'Year' in df.columns and 'Tempo' in df.columns:
             tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
-            fig11 = px.line(tempo_by_year, x='Year', y='Tempo',
-                            title='Average Tempo Over Time', color_discrete_sequence=['orange'])
             fig11.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig11)
         else:
             st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
     with tab2:
-        st.markdown("<span style='color:blue'>**Valence vs Energy**</span>: Groups mood with <span style='color:red'>purple</span> points.", unsafe_allow_html=True)
         if 'Valence' in df.columns and 'Energy' in df.columns:
-            fig12 = px.scatter(df, x='Valence', y='Energy', title='Valence vs Energy', hover_data=[
-                               'Track Name'], color_discrete_sequence=['purple'])
             fig12.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig12)
         else:
             st.error("Cannot plot: 'Valence' or 'Energy' column missing.")
 def generate_top_artists_songs(df):
     st.header("Top Artists and Songs")
     tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Most Featured Artists**</span>: Shows artists with <span style='color:red'>green</span> bars.", unsafe_allow_html=True)
         if 'Artist Name(s)' in df.columns:
-            top_artists = df['Artist Name(s)'].value_counts().nlargest(
-                10).reset_index()
-            fig13 = px.bar(top_artists, x='Artist Name(s)', y='count',
-                           title='Most Featured Artists', color_discrete_sequence=['green'])
             fig13.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig13)
         else:
             st.error("Cannot plot: 'Artist Name(s)' column missing.")
     with tab2:
-        st.markdown(
-            "<span style='color:blue'>**Top 10 Songs**</span>: Lists songs with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
         if 'Track Name' in df.columns and 'Popularity' in df.columns:
-            top_songs = df.nlargest(10, 'Popularity')[
-                ['Track Name', 'Popularity']]
-            fig14 = px.bar(top_songs, y='Track Name', x='Popularity', orientation='h',
-                           title='Top 10 Songs by Popularity', color_discrete_sequence=['blue'])
             fig14.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig14)
         else:
             st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
 def generate_album_release_trends(df):
     st.header("Album Release Trends")
     tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Albums per Year**</span>: Tracks releases with <span style='color:red'>purple</span> line.", unsafe_allow_html=True)
         if 'Year' in df.columns:
-            albums_per_year = df['Year'].value_counts(
-            ).sort_index().reset_index()
-            fig15 = px.line(albums_per_year, x='Year', y='count',
-                            title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
             fig15.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig15)
         else:
             st.error("Cannot plot: 'Year' column missing.")
     with tab2:
-        st.markdown("<span style='color:blue'>**Songs by Artists and Years**</span>: Visualizes with <span style='color:red'>heatmap colors</span>.", unsafe_allow_html=True)
         if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
-            artist_year = df.groupby(
-                ['Artist Name(s)', 'Year']).size().unstack().fillna(0)
-            fig16 = px.imshow(
-                artist_year, title='Songs Released by Artists Across Years', color_continuous_scale='Viridis')
             fig16.update_layout(width=800, height=400)
             st.plotly_chart(fig16)
         else:
             st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
 def generate_duration_analysis(df):
     st.header("Track Duration Analysis")
     tab1, tab2 = st.tabs(["Distribution", "By Decade"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Track Duration Distribution**</span>: Shows lengths with <span style='color:red'>orange</span> bars.", unsafe_allow_html=True)
         if 'Track Duration (ms)' in df.columns:
-            fig17 = px.histogram(df, x='Track Duration (ms)',
-                                 title='Distribution of Track Durations', color_discrete_sequence=['orange'])
             fig17.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig17)
         else:
             st.error("Cannot plot: 'Track Duration (ms)' column missing.")
     with tab2:
-        st.markdown("<span style='color:blue'>**Duration by Decade**</span>: Compares with <span style='color:red'>green</span> boxes.", unsafe_allow_html=True)
         if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
-            fig18 = px.box(df, x='Decade', y='Track Duration (ms)',
-                           title='Track Duration by Decade', color_discrete_sequence=['green'])
             fig18.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig18)
         else:
-            st.error(
-                "Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")
 def generate_streaming_insights(df):
     st.header("Streaming and Engagement Insights")
     tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Popularity vs Duration**</span>: Explores trends with <span style='color:red'>blue</span> scatter.", unsafe_allow_html=True)
         if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
-            fig19 = px.scatter(df, x='Track Duration (ms)', y='Popularity',
-                               title='Popularity vs Track Duration', color_discrete_sequence=['blue'])
             fig19.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig19)
         else:
-            st.error(
-                "Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")
     with tab2:
-        st.markdown("<span style='color:blue'>**Popularity by Time Signature**</span>: Compares with <span style='color:red'>purple</span> bars.", unsafe_allow_html=True)
         if 'Time Signature' in df.columns and 'Popularity' in df.columns:
-            pop_by_time = df.groupby('Time Signature')[
-                'Popularity'].mean().reset_index()
-            fig20 = px.bar(pop_by_time, x='Time Signature', y='Popularity',
-                           title='Average Popularity by Time Signature', color_discrete_sequence=['purple'])
             fig20.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig20)
         else:
-            st.error(
-                "Cannot plot: 'Time Signature' or 'Popularity' column missing.")
 def generate_feature_comparisons(df):
     st.header("Feature Comparisons Across Decades")
     tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Feature Comparison**</span>: Compares features with <span style='color:red'>multi-colored</span> bars.", unsafe_allow_html=True)
         if 'Decade' in df.columns:
-            features_by_decade = df.groupby(
-                'Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
             fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
                            barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
             fig21.update_layout(template='plotly_white', width=800, height=400)
@@ -285,36 +233,29 @@ def generate_feature_comparisons(df):
         else:
             st.error("Cannot plot: 'Decade' column missing.")
     with tab2:
-        st.markdown("<span style='color:blue'>**Loudness Over Time**</span>: Tracks with <span style='color:red'>green</span> line.", unsafe_allow_html=True)
         if 'Year' in df.columns and 'Loudness' in df.columns:
-            loudness_by_year = df.groupby(
-                'Year')['Loudness'].mean().reset_index()
-            fig22 = px.line(loudness_by_year, x='Year', y='Loudness',
-                            title='Average Loudness Over Time', color_discrete_sequence=['green'])
             fig22.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig22)
         else:
             st.error("Cannot plot: 'Year' or 'Loudness' column missing.")
 def generate_network_analysis(df):
     st.header("Network Analysis")
     tab1, tab2 = st.tabs(["Artist Collaborations", "Genre Crossover"])
     with tab1:
-        st.markdown("<span style='color:blue'>**Artist Collaborations**</span>: Visualizes connections with <span style='color:red'>interactive red nodes</span>. Hover for details.", unsafe_allow_html=True)
         if 'Artist Name(s)' in df.columns:
-            # Filter out non-string values and handle missing data
             valid_artists = df['Artist Name(s)'].dropna().astype(str)
             G = nx.Graph()
             for artists in valid_artists:
-                artists_list = [a.strip() for a in artists.split(
-                    ',') if a.strip()]  # Split and clean
-                if len(artists_list) > 1:  # Check length of list
                     for a1, a2 in combinations(artists_list, 2):
                         G.add_edge(a1, a2)
             if G.number_of_nodes() > 0:
-                # Convert to Plotly format
-                # Use spring layout for better spacing
                 pos = nx.spring_layout(G)
                 edge_x = []
                 edge_y = []
@@ -353,7 +294,7 @@ def generate_network_analysis(df):
         else:
             st.error("Cannot plot: 'Artist Name(s)' column missing.")
     with tab2:
-        st.markdown("<span style='color:blue'>**Genre Crossover**</span>: Placeholder with <span style='color:red'>future multi-color</span> potential.", unsafe_allow_html=True)
         st.write("To implement, install `holoviews` and use the following code:")
         st.code("""
         import holoviews as hv
@@ -362,4 +303,4 @@ def generate_network_analysis(df):
         chord_data = genre_pairs.groupby(['Genres_x', 'Genres_y']).size().reset_index(name='value')
         chord = hv.Chord(chord_data).opts(title="Genre Crossover")
         st.write(hv.render(chord, backend='bokeh'))
-        """)

 import plotly.graph_objects as go
 from itertools import combinations
 def generate_popularity_trends(df):
     st.header("Popularity Trends Over Time")
     tab1, tab2 = st.tabs(["Average Popularity", "Individual Songs"])
     with tab1:
+        st.markdown("**Average Popularity by Decade:** Tracks popularity changes over time.")
         if 'Decade' in df.columns:
+            avg_pop_by_decade = df.groupby('Decade')['Popularity'].mean().reset_index()
+            fig1 = px.line(avg_pop_by_decade, x='Decade', y='Popularity', title='Average Popularity by Decade', color_discrete_sequence=['blue'])
             fig1.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig1)
         else:
             st.error("Cannot plot: 'Decade' column missing.")
     with tab2:
+        st.markdown("**Song Popularity Over Time:** Highlights individual trends.")
         if 'Year' in df.columns:
+            fig2 = px.scatter(df, x='Year', y='Popularity', title='Song Popularity Over Time', hover_data=['Track Name', 'Artist Name(s)'], color_discrete_sequence=['red'])
             fig2.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig2)
         else:
             st.error("Cannot plot: 'Year' column missing.")
 def generate_audio_features(df):
     st.header("Audio Features Analysis")
+    feature = st.selectbox("Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness'])
     tab1, tab2, tab3 = st.tabs(["Distribution", "By Decade", "Correlations"])
     with tab1:
+        st.markdown(f"**Distribution of {feature}:** Shows feature variations.")
+        fig3 = px.histogram(df, x=feature, title=f'Distribution of {feature}', color_discrete_sequence=['green'])
         fig3.update_layout(template='plotly_white', width=800, height=400)
         st.plotly_chart(fig3)
     with tab2:
+        st.markdown(f"**{feature} by Decade:** Compares across decades.")
         if 'Decade' in df.columns:
+            fig4 = px.box(df, x='Decade', y=feature, title=f'{feature} Distribution by Decade', color_discrete_sequence=['green'])
             fig4.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig4)
         else:
             st.error("Cannot plot: 'Decade' column missing.")
     with tab3:
+        st.markdown("**Feature Correlations:** Explores relationships.")
         fig, ax = plt.subplots()
         sns.pairplot(df[['Energy', 'Danceability', 'Valence', 'Tempo']])
         st.pyplot(fig)
 def generate_genre_analysis(df):
     st.header("Genre & Artist Analysis")
+    tab1, tab2, tab3 = st.tabs(["Top Genres", "Genre Distribution", "Artist Popularity"])
     with tab1:
+        st.markdown("**Top Genres by Decade:** Highlights frequent genres.")
         if 'Decade' in df.columns:
+            genre_decade = df.explode('Genres').groupby(['Decade', 'Genres']).size().reset_index(name='Count')
+            top_genres = genre_decade.groupby('Decade').apply(lambda x: x.nlargest(5, 'Count')).reset_index(drop=True)
+            fig5 = px.bar(top_genres, x='Decade', y='Count', color='Genres', title='Top Genres by Decade', color_discrete_sequence=px.colors.qualitative.Set1)
             fig5.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig5)
         else:
             st.error("Cannot plot: 'Decade' column missing.")
     with tab2:
+        st.markdown("**Genre Distribution:** Breaks down genres.")
+        genre_counts = df.explode('Genres')['Genres'].value_counts().reset_index()
+        fig6 = px.pie(genre_counts, values='count', names='Genres', title='Genre Distribution', color_discrete_sequence=px.colors.qualitative.Set2)
         fig6.update_layout(width=800, height=400)
         st.plotly_chart(fig6)
     with tab3:
+        st.markdown("**Artist Popularity Heatmap:** Visualizes popularity.")
         if 'Artist Name(s)' in df.columns:
+            artist_pop = df.groupby('Artist Name(s)')['Popularity'].mean().reset_index()
+            fig7 = px.imshow(pd.pivot_table(df, values='Popularity', index='Artist Name(s)', aggfunc='mean').fillna(0), title='Artist Popularity Heatmap', color_continuous_scale='Reds')
             fig7.update_layout(width=800, height=400)
             st.plotly_chart(fig7)
         else:
             st.error("Cannot plot: 'Artist Name(s)' column missing.")
 def generate_explicit_trends(df):
     st.header("Explicit Content Trends")
+    st.markdown("**Explicit vs Non-Explicit Songs:** Compares content.")
     if 'Decade' in df.columns and 'Explicit' in df.columns:
+        explicit_by_decade = df.groupby(['Decade', 'Explicit']).size().unstack().fillna(0)
+        fig8 = px.bar(explicit_by_decade, barmode='stack', title='Explicit vs Non-Explicit Songs by Decade', color_discrete_sequence=['green', 'purple'])
         fig8.update_layout(template='plotly_white', width=800, height=400)
         st.plotly_chart(fig8)
     else:
         st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")
 def generate_album_insights(df):
     st.header("Album & Label Insights")
     tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])
     with tab1:
+        st.markdown("**Top Record Labels:** Identifies top labels.")
         if 'Label' in df.columns:
             top_labels = df['Label'].value_counts().nlargest(10).reset_index()
+            fig9 = px.bar(top_labels, x='Label', y='count', title='Top Record Labels by Song Count', color_discrete_sequence=['blue'])
             fig9.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig9)
         else:
             st.error("Cannot plot: 'Label' column missing.")
     with tab2:
+        st.markdown("**Album Popularity:** Shows album trends.")
         if 'Album Name' in df.columns and 'Popularity' in df.columns:
+            album_pop = df.groupby('Album Name')['Popularity'].agg(['mean', 'count']).reset_index()
+            fig10 = px.scatter(album_pop, x='count', y='mean', size='mean', hover_data=['Album Name'], title='Albums: Song Count vs Average Popularity', color_discrete_sequence=['red'])
             fig10.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig10)
         else:
             st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")
 def generate_tempo_mood(df):
     st.header("Tempo & Mood Analysis")
     tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
     with tab1:
+        st.markdown("**Tempo Trends:** Tracks tempo changes.")
         if 'Year' in df.columns and 'Tempo' in df.columns:
             tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
+            fig11 = px.line(tempo_by_year, x='Year', y='Tempo', title='Average Tempo Over Time', color_discrete_sequence=['orange'])
             fig11.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig11)
         else:
             st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
     with tab2:
+        st.markdown("**Valence vs Energy:** Groups mood patterns.")
         if 'Valence' in df.columns and 'Energy' in df.columns:
+            fig12 = px.scatter(df, x='Valence', y='Energy', title='Valence vs Energy', hover_data=['Track Name'], color_discrete_sequence=['purple'])
             fig12.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig12)
         else:
             st.error("Cannot plot: 'Valence' or 'Energy' column missing.")
 def generate_top_artists_songs(df):
     st.header("Top Artists and Songs")
     tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
     with tab1:
+        st.markdown("**Most Featured Artists:** Shows top artists.")
         if 'Artist Name(s)' in df.columns:
+            top_artists = df['Artist Name(s)'].value_counts().nlargest(10).reset_index()
+            fig13 = px.bar(top_artists, x='Artist Name(s)', y='count', title='Most Featured Artists', color_discrete_sequence=['green'])
             fig13.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig13)
         else:
             st.error("Cannot plot: 'Artist Name(s)' column missing.")
     with tab2:
+        st.markdown("**Top 10 Songs:** Lists top songs.")
         if 'Track Name' in df.columns and 'Popularity' in df.columns:
+            top_songs = df.nlargest(10, 'Popularity')[['Track Name', 'Popularity']]
+            fig14 = px.bar(top_songs, y='Track Name', x='Popularity', orientation='h', title='Top 10 Songs by Popularity', color_discrete_sequence=['blue'])
             fig14.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig14)
         else:
             st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
 def generate_album_release_trends(df):
     st.header("Album Release Trends")
     tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
     with tab1:
+        st.markdown("**Albums per Year:** Tracks release patterns.")
         if 'Year' in df.columns:
+            albums_per_year = df['Year'].value_counts().sort_index().reset_index()
+            fig15 = px.line(albums_per_year, x='Year', y='count', title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
             fig15.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig15)
         else:
             st.error("Cannot plot: 'Year' column missing.")
     with tab2:
+        st.markdown("**Songs by Artists and Years:** Visualizes trends.")
         if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
+            artist_year = df.groupby(['Artist Name(s)', 'Year']).size().unstack().fillna(0)
+            fig16 = px.imshow(artist_year, title='Songs Released by Artists Across Years', color_continuous_scale='Viridis')
             fig16.update_layout(width=800, height=400)
             st.plotly_chart(fig16)
         else:
             st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
 def generate_duration_analysis(df):
     st.header("Track Duration Analysis")
     tab1, tab2 = st.tabs(["Distribution", "By Decade"])
     with tab1:
+        st.markdown("**Track Duration Distribution:** Shows duration lengths.")
         if 'Track Duration (ms)' in df.columns:
+            fig17 = px.histogram(df, x='Track Duration (ms)', title='Distribution of Track Durations', color_discrete_sequence=['orange'])
             fig17.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig17)
         else:
             st.error("Cannot plot: 'Track Duration (ms)' column missing.")
     with tab2:
+        st.markdown("**Duration by Decade:** Compares durations.")
         if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
+            fig18 = px.box(df, x='Decade', y='Track Duration (ms)', title='Track Duration by Decade', color_discrete_sequence=['green'])
             fig18.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig18)
         else:
+            st.error("Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")
 def generate_streaming_insights(df):
     st.header("Streaming and Engagement Insights")
     tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])
     with tab1:
+        st.markdown("**Popularity vs Duration:** Explores engagement trends.")
         if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
+            fig19 = px.scatter(df, x='Track Duration (ms)', y='Popularity', title='Popularity vs Track Duration', color_discrete_sequence=['blue'])
             fig19.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig19)
         else:
+            st.error("Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")
     with tab2:
+        st.markdown("**Popularity by Time Signature:** Compares popularity.")
         if 'Time Signature' in df.columns and 'Popularity' in df.columns:
+            pop_by_time = df.groupby('Time Signature')['Popularity'].mean().reset_index()
+            fig20 = px.bar(pop_by_time, x='Time Signature', y='Popularity', title='Average Popularity by Time Signature', color_discrete_sequence=['purple'])
             fig20.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig20)
         else:
+            st.error("Cannot plot: 'Time Signature' or 'Popularity' column missing.")
 def generate_feature_comparisons(df):
     st.header("Feature Comparisons Across Decades")
     tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
     with tab1:
+        st.markdown("**Feature Comparison:** Compares features across decades.")
         if 'Decade' in df.columns:
+            features_by_decade = df.groupby('Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
             fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
                            barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
             fig21.update_layout(template='plotly_white', width=800, height=400)
         else:
             st.error("Cannot plot: 'Decade' column missing.")
     with tab2:
+        st.markdown("**Loudness Over Time:** Tracks loudness trends.")
         if 'Year' in df.columns and 'Loudness' in df.columns:
+            loudness_by_year = df.groupby('Year')['Loudness'].mean().reset_index()
+            fig22 = px.line(loudness_by_year, x='Year', y='Loudness', title='Average Loudness Over Time', color_discrete_sequence=['green'])
             fig22.update_layout(template='plotly_white', width=800, height=400)
             st.plotly_chart(fig22)
         else:
             st.error("Cannot plot: 'Year' or 'Loudness' column missing.")
 def generate_network_analysis(df):
     st.header("Network Analysis")
     tab1, tab2 = st.tabs(["Artist Collaborations", "Genre Crossover"])
     with tab1:
+        st.markdown("**Artist Collaborations:** Visualizes artist connections.")
         if 'Artist Name(s)' in df.columns:
             valid_artists = df['Artist Name(s)'].dropna().astype(str)
             G = nx.Graph()
             for artists in valid_artists:
+                artists_list = [a.strip() for a in artists.split(',') if a.strip()]
+                if len(artists_list) > 1:
                     for a1, a2 in combinations(artists_list, 2):
                         G.add_edge(a1, a2)
             if G.number_of_nodes() > 0:
                 pos = nx.spring_layout(G)
                 edge_x = []
                 edge_y = []
         else:
             st.error("Cannot plot: 'Artist Name(s)' column missing.")
     with tab2:
+        st.markdown("**Genre Crossover:** Placeholder for future visualization.")
         st.write("To implement, install `holoviews` and use the following code:")
         st.code("""
         import holoviews as hv
         chord_data = genre_pairs.groupby(['Genres_x', 'Genres_y']).size().reset_index(name='value')
         chord = hv.Chord(chord_data).opts(title="Genre Crossover")
         st.write(hv.render(chord, backend='bokeh'))
+        """)

models/__pycache__/data_processor.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/data_processor.cpython-310.pyc and b/models/__pycache__/data_processor.cpython-310.pyc differ

models/data_processor.py CHANGED Viewed

@@ -4,7 +4,7 @@ import streamlit as st
 def load_data():
     try:
         df = pd.read_csv('data/music_data.csv', on_bad_lines='skip')
-        st.write("**Raw Data Sample:**", df.head())
     except FileNotFoundError:
         st.error("Error: 'data/music_data.csv' not found. Please ensure the file exists.")
         return pd.DataFrame()
@@ -23,6 +23,8 @@ def load_data():
     df['Year'] = pd.to_datetime(df['Album Release Date'], errors='coerce').dt.year
     df['Year'] = df['Year'].fillna(0).astype(int)
     df['Decade'] = (df['Year'] // 10 * 10).astype(int)
     df['Genres'] = df['Artist Genres'].fillna('Unknown').str.split(',').apply(lambda x: [g.strip() for g in x])
     df['Popularity'] = pd.to_numeric(df['Popularity'], errors='coerce').fillna(0)
@@ -30,6 +32,5 @@ def load_data():
     if 'Decade' not in df.columns:
         st.error("Failed to create 'Decade' column")
         return df
-    st.write("**Processed Data Sample:**", df[['Track Name', 'Year', 'Decade', 'Popularity']].head())
     return df

 def load_data():
     try:
         df = pd.read_csv('data/music_data.csv', on_bad_lines='skip')
+        st.write("**Raw Data Sample:**", df.head())  # Temporary for debugging, will be removed
     except FileNotFoundError:
         st.error("Error: 'data/music_data.csv' not found. Please ensure the file exists.")
         return pd.DataFrame()
     df['Year'] = pd.to_datetime(df['Album Release Date'], errors='coerce').dt.year
     df['Year'] = df['Year'].fillna(0).astype(int)
     df['Decade'] = (df['Year'] // 10 * 10).astype(int)
+    # Remove rows where Decade is 0
+    df = df[df['Decade'] != 0]
     df['Genres'] = df['Artist Genres'].fillna('Unknown').str.split(',').apply(lambda x: [g.strip() for g in x])
     df['Popularity'] = pd.to_numeric(df['Popularity'], errors='coerce').fillna(0)
     if 'Decade' not in df.columns:
         st.error("Failed to create 'Decade' column")
         return df
+    # Removed Processed Data Sample output as per requirement
     return df