Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.compose import make_column_transformer | |
| from sklearn.model_selection import GroupShuffleSplit | |
| from tensorflow import keras | |
| from tensorflow.keras import layers | |
| from tensorflow.keras import callbacks | |
| spotify = pd.read_csv('../input/dl-course-data/spotify.csv') | |
| X = spotify.copy().dropna() | |
| y = X.pop('track_popularity') | |
| artists = X['track_artist'] | |
| features_num = ['danceability', 'energy', 'key', 'loudness', 'mode', | |
| 'speechiness', 'acousticness', 'instrumentalness', | |
| 'liveness', 'valence', 'tempo', 'duration_ms'] | |
| features_cat = ['playlist_genre'] | |
| preprocessor = make_column_transformer( | |
| (StandardScaler(), features_num), | |
| (OneHotEncoder(), features_cat), | |
| ) | |
| # We'll do a "grouped" split to keep all of an artist's songs in one | |
| # split or the other. This is to help prevent signal leakage. | |
| def group_split(X, y, group, train_size=0.75): | |
| splitter = GroupShuffleSplit(train_size=train_size) | |
| train, test = next(splitter.split(X, y, groups=group)) | |
| return (X.iloc[train], X.iloc[test], y.iloc[train], y.iloc[test]) | |
| X_train, X_valid, y_train, y_valid = group_split(X, y, artists) | |
| X_train = preprocessor.fit_transform(X_train) | |
| X_valid = preprocessor.transform(X_valid) | |
| y_train = y_train / 100 # popularity is on a scale 0-100, so this rescales to 0-1. | |
| y_valid = y_valid / 100 | |
| input_shape = [X_train.shape[1]] | |
| print("Input shape: {}".format(input_shape)) | |
| model = keras.Sequential([ | |
| layers.Dense(1, input_shape=input_shape), | |
| ]) | |
| model.compile( | |
| optimizer='adam', | |
| loss='mae', | |
| ) | |
| history = model.fit( | |
| X_train, y_train, | |
| validation_data=(X_valid, y_valid), | |
| batch_size=512, | |
| epochs=50, | |
| verbose=0, # suppress output since we'll plot the curves | |
| ) | |
| history_df = pd.DataFrame(history.history) | |
| history_df.loc[0:, ['loss', 'val_loss']].plot() | |
| print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min())); | |
| model = keras.Sequential([ | |
| layers.Dense(128, activation='relu', input_shape=input_shape), | |
| layers.Dense(64, activation='relu'), | |
| layers.Dense(1) | |
| ]) | |
| model.compile( | |
| optimizer='adam', | |
| loss='mae', | |
| ) | |
| history = model.fit( | |
| X_train, y_train, | |
| validation_data=(X_valid, y_valid), | |
| batch_size=512, | |
| epochs=50, | |
| ) | |
| history_df = pd.DataFrame(history.history) | |
| history_df.loc[:, ['loss', 'val_loss']].plot() | |
| print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min())); | |
| early_stopping = callbacks.EarlyStopping( | |
| patience=5, | |
| min_delta=0.001, | |
| restore_best_weights=True, | |
| ) | |
| model = keras.Sequential([ | |
| layers.Dense(128, activation='relu', input_shape=input_shape), | |
| layers.Dense(64, activation='relu'), | |
| layers.Dense(1) | |
| ]) | |
| model.compile( | |
| optimizer='adam', | |
| loss='mae', | |
| ) | |
| history = model.fit( | |
| X_train, y_train, | |
| validation_data=(X_valid, y_valid), | |
| batch_size=512, | |
| epochs=50, | |
| callbacks=[early_stopping] | |
| ) | |
| history_df = pd.DataFrame(history.history) | |
| history_df.loc[:, ['loss', 'val_loss']].plot() | |
| print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min())); | |