import gradio as gr import pandas as pd import pickle from sklearn.metrics.pairwise import cosine_similarity import heapq # Load data and model df = pd.read_csv('./DATA/spotify_millsongdata.csv') # Load saved embeddings with open("./DATA/lyrics_embeddings.pkl", "rb") as f: lyrics_embeddings = pickle.load(f) # List of artists and songs artists = df['artist'].unique() song_titles = df['song'] # Recommendation logic def recommend_songs(song_index, top_n=5, batch_size=100): top_sim_scores = [] num_batches = len(df) // batch_size + 1 for i in range(num_batches): start_idx = i * batch_size end_idx = min((i + 1) * batch_size, len(df)) # Compute cosine similarity for the current batch cosine_sim_batch = cosine_similarity( lyrics_embeddings[start_idx:end_idx], [lyrics_embeddings[song_index]] ) # Select the top N most similar songs for j, sim_score in enumerate(cosine_sim_batch): global_idx = start_idx + j heapq.heappush(top_sim_scores, (sim_score[0], global_idx)) if len(top_sim_scores) > top_n + 1: heapq.heappop(top_sim_scores) # Exclude the selected song itself and return the most similar songs with their similarity scores top_sim_scores = sorted(top_sim_scores, key=lambda x: x[0], reverse=True)[1:top_n+1] recommended_songs = [(song_titles[i[1]], df['link'][i[1]], round(i[0], 2)) for i in top_sim_scores] return recommended_songs # Interface logic function def get_songs_by_artist(artist_name): filtered_songs = df[df['artist'] == artist_name]['song'].tolist() return gr.update(choices=filtered_songs, value=filtered_songs[0] if filtered_songs else None) def gradio_recommend(song_title): try: # Find the index of the selected song song_index = song_titles[song_titles == song_title].index[0] # Get recommended songs recommendations = recommend_songs(song_index) # Format the output, making song links clickable result = "
" for song, link, sim_score in recommendations: result += f"Song Name: {song}
" result += f"Search Link: {link}
" result += f"Lyrics Similarity: {sim_score:.2f}

" result += "
" return result except IndexError: return "Song not found." # Create Gradio multi-page interface with gr.Blocks(css=""" @media (max-width: 768px) { .gr-container { width: 100%; padding: 10px; box-sizing: border-box; } .gr-dropdown select { width: 100%; height: 40px; /* Limit height */ font-size: 16px; padding: 5px; box-sizing: border-box; } .gr-button { width: 100%; font-size: 16px; margin-top: 10px; } .gr-html, .gr-row { width: 100%; font-size: 16px; margin: 10px 0; } h1 { font-size: 24px; } p { font-size: 14px; } .gr-dropdown::after { content: ''; width: 12px; height: 12px; border: solid black; border-width: 0 2px 2px 0; display: inline-block; transform: rotate(45deg); margin-left: 10px; } } """) as demo: gr.Markdown( """

Music Recommendation System

Get the most relevant song recommendations based on lyrics similarity

""" ) # Page 1: Select artist with gr.Row(): with gr.Column(): artist_dropdown = gr.Dropdown(choices=list(artists), label="Select Artist") next_button = gr.Button("Next") # Page 2: Select song and get recommendations with gr.Row(visible=False) as song_selection_row: song_dropdown = gr.Dropdown(label="Select Song") recommend_button = gr.Button("Get Recommendations") output = gr.HTML(label="Recommended Similar Songs") # Event bindings artist_dropdown.change(get_songs_by_artist, inputs=artist_dropdown, outputs=song_dropdown) next_button.click(lambda: gr.update(visible=True), None, song_selection_row) recommend_button.click(gradio_recommend, inputs=song_dropdown, outputs=output) if __name__ == "__main__": demo.launch()