Revibe / Backend /app /services /recommendation.py
garvitcpp's picture
Upload 19 files
398778b verified
import pandas as pd
from app.models.ml_models import MLModels
class RecommendationService:
def __init__(self):
self.ml_models = MLModels()
# Ensure data is loaded
self.ml_models.load_models()
def get_recommendations_by_song(self, song_input):
tracks = self.ml_models.tracks
scaled_df = self.ml_models.scaled_df
parts = song_input.rsplit(' - ', 1)
if len(parts) != 2:
return {'error': "Invalid song input format. Please use 'Song Name - Artist Name'."}
song_name, artist_from_input = parts
matching_songs = tracks[(tracks['track_name'].str.contains(song_name, case=False, na=False)) &
(tracks['artists'].str.contains(artist_from_input, case=False, na=False))]
if len(matching_songs) == 0:
return {'error': "No matching songs found."}
selected_song_idx = matching_songs.index[0]
selected_song_cluster = scaled_df.loc[selected_song_idx, 'cluster']
cluster_songs = scaled_df[scaled_df['cluster'] == selected_song_cluster]
recommendations = cluster_songs.sample(min(10, len(cluster_songs)))
recommendations = recommendations.sample(min(5, len(recommendations)))
return self._format_recommendations(recommendations)
def get_recommendations_by_artist_and_genre(self, artist_name, genre):
tracks = self.ml_models.tracks
scaled_df = self.ml_models.scaled_df
scaler = self.ml_models.scaler
kmeans = self.ml_models.kmeans
matching_songs = tracks[(tracks['track_genre'].str.lower().str.contains(genre.lower(), na=False)) |
(tracks['artists'].str.lower().str.contains(artist_name.lower(), na=False))]
if len(matching_songs) == 0:
return {'error': "No matching songs found for the given artist or genre."}
avg_features = matching_songs.loc[:, ['tempo', 'loudness', 'danceability', 'energy', 'acousticness',
'instrumentalness', 'speechiness', 'liveness', 'valence']].mean().values
input_df = pd.DataFrame({
'tempo': [avg_features[0]],
'loudness': [avg_features[1]],
'track_genre_encoded': [tracks['track_genre_encoded'].mean()],
'artists_encoded': [tracks['artists_encoded'].mean()],
'danceability': [avg_features[2]],
'energy': [avg_features[3]],
'acousticness': [avg_features[4]],
'instrumentalness': [avg_features[5]],
'speechiness': [avg_features[6]],
'liveness': [avg_features[7]],
'valence': [avg_features[8]]
})
input_features = scaler.transform(input_df)
closest_cluster = kmeans.predict(input_features)[0]
cluster_songs = scaled_df[scaled_df['cluster'] == closest_cluster]
recommendations = cluster_songs.sample(min(10, len(cluster_songs)))
recommendations = recommendations.sample(min(5, len(recommendations)))
return self._format_recommendations(recommendations)
def _format_recommendations(self, recommendations):
tracks = self.ml_models.tracks
recommended_song_names = tracks.loc[recommendations.index, 'track_name']
recommended_artist_names = tracks.loc[recommendations.index, 'artists']
return [{'song_name': song_name, 'artist': artist}
for song_name, artist in zip(recommended_song_names, recommended_artist_names)]