Spaces:
Runtime error
Runtime error
| !pip install python-dotenv | |
| # imports | |
| import pandas as pd | |
| import h5py | |
| import os | |
| from sqlalchemy import create_engine | |
| import requests | |
| import time | |
| from dotenv import load_dotenv | |
| import pandas as pd | |
| df = pd.read_csv('/content/drive/MyDrive/CMPE-258: Team Neurobytes/Neurobytes/db/data/music_data.csv') | |
| df.dropna(inplace=True) | |
| import pandas as pd | |
| import torch | |
| from torch.utils.data import DataLoader | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from sklearn.preprocessing import LabelEncoder, MinMaxScaler | |
| from sklearn.model_selection import train_test_split | |
| import torch.optim as optim | |
| # Encode categorical data | |
| label_encoders = {} | |
| unknown_label = 'unknown' # Define an unknown label | |
| for column in ['artist_name', 'tags', 'title']: | |
| le = LabelEncoder() | |
| # Get unique categories plus an 'unknown' category | |
| unique_categories = df[column].unique().tolist() | |
| # Add 'unknown' to the list of categories | |
| unique_categories.append(unknown_label) | |
| # Fit the LabelEncoder to these categories | |
| le.fit(unique_categories) | |
| df[column] = le.transform(df[column].astype(str)) | |
| # Store the encoder | |
| label_encoders[column] = le | |
| # Normalize numerical features | |
| scaler = MinMaxScaler() | |
| df[['duration', 'listeners', 'playcount']] = scaler.fit_transform( | |
| df[['duration', 'listeners', 'playcount']]) | |
| # Split data into features and target | |
| X = df[['artist_name', 'tags', 'duration', 'listeners', 'playcount']] | |
| y = df['title'] | |
| # Split the dataset into training and testing sets | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42) | |
| class SongRecommender(nn.Module): | |
| def __init__(self): | |
| super(SongRecommender, self).__init__() | |
| self.fc1 = nn.Linear(5, 128) # Adjust input features if needed | |
| self.fc2 = nn.Linear(128, 256) | |
| self.fc3 = nn.Linear(256, 128) | |
| # Output size = number of unique titles including 'unknown' | |
| # Add 1 for the 'unknown' label | |
| self.output = nn.Linear(128, len(y.unique()) + 1) | |
| def forward(self, x): | |
| x = F.relu(self.fc1(x)) | |
| x = F.relu(self.fc2(x)) | |
| x = F.relu(self.fc3(x)) | |
| x = self.output(x) | |
| return x | |
| model = SongRecommender() | |
| optimizer = optim.Adam(model.parameters(), lr=0.001) | |
| criterion = nn.CrossEntropyLoss() | |
| def train_model(model, X_train, y_train, X_test, y_test): | |
| train_loader = DataLoader( | |
| list(zip(X_train.values.astype(float), y_train)), batch_size=50, shuffle=True) | |
| test_loader = DataLoader( | |
| list(zip(X_test.values.astype(float), y_test)), batch_size=50, shuffle=False) | |
| model.train() | |
| for epoch in range(50): # Number of epochs | |
| train_loss = 0 | |
| for features, labels in train_loader: | |
| optimizer.zero_grad() | |
| outputs = model(torch.tensor(features).float()) | |
| # Ensure labels are long type | |
| loss = criterion(outputs, torch.tensor(labels).long()) | |
| loss.backward() | |
| optimizer.step() | |
| train_loss += loss.item() | |
| # Validation phase | |
| model.eval() | |
| validation_loss = 0 | |
| for features, labels in test_loader: | |
| outputs = model(torch.tensor(features).float()) | |
| loss = criterion(outputs, torch.tensor(labels).long()) | |
| validation_loss += loss.item() | |
| print(f'Epoch {epoch+1}, Training Loss: {train_loss / len(train_loader)}, Validation Loss: {validation_loss / len(test_loader)}') | |
| train_model(model, X_train, y_train, X_test, y_test) | |
| # save the model | |
| torch.save(model.state_dict(), 'model.pth') | |
| # load the model | |
| model = SongRecommender() | |
| def recommend_songs(model, input_features): | |
| model.eval() | |
| print(input_features) | |
| with torch.no_grad(): | |
| try: | |
| artist_index = label_encoders['artist_name'].transform( | |
| [input_features['artist_name']]) | |
| except ValueError: | |
| artist_index = label_encoders['artist_name'].transform(['unknown']) | |
| try: | |
| tags_index = label_encoders['tags'].transform( | |
| [input_features['tags']]) | |
| except ValueError: | |
| tags_index = label_encoders['tags'].transform(['unknown']) | |
| # Create a DataFrame with feature names | |
| scaled_features = pd.DataFrame( | |
| [[input_features['duration'], input_features['listeners'], | |
| input_features['playcount']]], | |
| columns=['duration', 'listeners', 'playcount'] | |
| ) | |
| scaled_features = scaler.transform(scaled_features)[0] | |
| features = torch.tensor( | |
| [artist_index[0], tags_index[0], *scaled_features]).float().unsqueeze(0) | |
| predictions = model(features) | |
| top_5_values, top_5_indices = predictions.topk(5) | |
| recommended_song_ids = top_5_indices.squeeze().tolist() | |
| return label_encoders['title'].inverse_transform(recommended_song_ids) | |
| import requests | |
| def fetch_song_data(api_key, artist_name, track_name): | |
| url = "http://ws.audioscrobbler.com/2.0/" | |
| params = { | |
| 'method': 'track.getInfo', | |
| 'api_key': api_key, | |
| 'artist': artist_name, | |
| 'track': track_name, | |
| 'format': 'json' | |
| } | |
| response = requests.get(url, params=params) | |
| print(response.content) | |
| return response.json() if response.status_code == 200 else {} | |
| def parse_song_data(song_data): | |
| if song_data and 'track' in song_data: | |
| track = song_data['track'] | |
| return { | |
| 'artist_name': track['artist']['name'], | |
| 'tags': ', '.join([tag['name'] for tag in track.get('toptags', {}).get('tag', [])]), | |
| 'duration': float(track.get('duration', 0)), | |
| 'listeners': int(track.get('listeners', 0)), | |
| 'playcount': int(track.get('playcount', 0)), | |
| 'album': track.get('album', {}).get('title', 'Unknown') | |
| } | |
| return {} | |
| from dotenv import load_dotenv | |
| import os | |
| load_dotenv() | |
| api_key = os.getenv('LASTFM_API_KEY') | |
| artist_name = 'Lagy Gaga' | |
| track_name = 'Poker Face' | |
| # Fetch and parse song data | |
| song_data = fetch_song_data(api_key, artist_name, track_name) | |
| parsed_data = parse_song_data(song_data) | |
| print(song_data) | |
| # if the song is not found, or the tags column is empty, print a message | |
| if not parsed_data or not parsed_data['tags']: | |
| print("Song not found or tags not available.") | |
| else: | |
| recommend_songs(model, parsed_data) |