Spaces:

aikanava
/

song_recommendation_system

Sleeping

App Files Files Community

aikanava commited on Mar 12, 2025

Commit

1c6ddfa

verified ·

1 Parent(s): f53e67b

upload files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
app.py +102 -0
requirements.txt +6 -0
spotify_millsongdata.csv +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+spotify_millsongdata.csv filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.neighbors import NearestNeighbors
+import matplotlib.pyplot as plt
+import seaborn as sns
+from collections import Counter
+st.set_page_config(page_title="🎵 Lyrics-Based Song Recommendations")
+# Load dataset
+@st.cache_data
+def load_data():
+    df = pd.read_csv("spotify_millsongdata.csv")  # Update with actual file path
+    df = df.dropna(subset=["text"])  # Remove missing lyrics
+    return df
+df = load_data()
+# Convert lyrics into numerical features using TF-IDF
+vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
+lyrics_matrix = vectorizer.fit_transform(df["text"])
+# Train KNN Model
+knn = NearestNeighbors(n_neighbors=5, metric="cosine")
+knn.fit(lyrics_matrix)
+# Streamlit UI
+st.title("🎶 Lyrics-Based Song Recommendation System")
+st.markdown(
+    "Discover songs that match your favorite lyrics! This app uses **TF-IDF** and **KNN** to find songs with similar lyrical content."
+)
+# Tabs for better UI
+tab1, tab2 = st.tabs(["📊 Dataset Overview", "🎤 Lyrics-Based Recommendation"])
+with tab1:
+    # Dataset Sample
+    sample_df = df.sample(20)
+    st.dataframe(sample_df[["song", "artist", "text"]])
+    # Expander for Dataset Statistics
+    with st.expander("📊 Dataset Statistics"):
+        # Dataset Statistics
+        total_songs = df.shape[0]
+        unique_artists = df["artist"].nunique()
+        avg_lyrics_length = df["text"].apply(lambda x: len(x.split())).mean()
+        st.write(f"📊 **Total Songs**: {total_songs}")
+        st.write(f"🎤 **Unique Artists**: {unique_artists}")
+        st.write(f"📖 **Average Lyrics Length**: {avg_lyrics_length:.2f} words")
+    # Expander for Lyrics Length Distribution
+    with st.expander("📖 Lyrics Length Distribution (Word Count per Song)"):
+        # Lyrics Length Distribution
+        lyrics_length = df["text"].apply(lambda x: len(x.split()))
+        fig, ax = plt.subplots(figsize=(8, 4))
+        sns.histplot(lyrics_length, kde=True, ax=ax, color="skyblue")
+        ax.set_xlabel("Word Count")
+        ax.set_ylabel("Number of Songs")
+        st.pyplot(fig)
+    # Most Frequent Artists
+    with st.expander("🎤 Most Frequent Artists in the Dataset"):
+        artist_counts = df["artist"].value_counts().head(10)
+        fig, ax = plt.subplots(figsize=(8, 4))
+        sns.barplot(y=artist_counts.index, x=artist_counts.values, ax=ax, palette="mako")
+        ax.set_xlabel("Number of Songs")
+        ax.set_ylabel("Artist")
+        st.pyplot(fig)
+with tab2:
+    st.subheader("Enter Lyrics Snippet")
+    user_lyrics = st.text_area("Type lyrics snippet:", "")
+    if st.button("🎶 Get Recommendations") and user_lyrics.strip():
+        # Convert user input into the same TF-IDF space
+        user_vector = vectorizer.transform([user_lyrics])
+        # Find similar songs
+        distances, indices = knn.kneighbors(user_vector)
+        st.subheader("🎧 Recommended Songs:")
+        recommendations = []
+        for i, idx in enumerate(indices[0]):
+            recommended_song = df.iloc[idx]["song"]
+            recommended_artist = df.iloc[idx]["artist"]
+            similarity_score = 1 - distances[0][i]  # Convert cosine distance to similarity
+            recommendations.append((recommended_song, recommended_artist, similarity_score))
+            st.write(f"🎶 **{recommended_song}** - {recommended_artist}  (Similarity: `{similarity_score:.2f}`)")
+        # Plot similarity scores
+        with st.expander("📊 Similarity Scores"):
+            fig, ax = plt.subplots(figsize=(8, 4))
+            song_names = [rec[0] for rec in recommendations]
+            similarity_scores = [rec[2] for rec in recommendations]
+            sns.barplot(x=similarity_scores, y=song_names, ax=ax, palette="coolwarm")
+            ax.set_xlabel("Similarity Score")
+            ax.set_ylabel("Recommended Songs")
+            st.pyplot(fig)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+pandas
+numpy
+scikit-learn
+matplotlib
+seaborn

spotify_millsongdata.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cd19a8adf74791bfd99e1ccb8b1fc3bd2ed33399faeb86fa3677638a5623afd
+size 74864162