Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.compose import make_column_transformer | |
| hotel = pd.read_csv('../input/dl-course-data/hotel.csv') | |
| X = hotel.copy() | |
| y = X.pop('is_canceled') | |
| X['arrival_date_month'] = \ | |
| X['arrival_date_month'].map( | |
| {'January':1, 'February': 2, 'March':3, | |
| 'April':4, 'May':5, 'June':6, 'July':7, | |
| 'August':8, 'September':9, 'October':10, | |
| 'November':11, 'December':12} | |
| ) | |
| features_num = [ | |
| "lead_time", "arrival_date_week_number", | |
| "arrival_date_day_of_month", "stays_in_weekend_nights", | |
| "stays_in_week_nights", "adults", "children", "babies", | |
| "is_repeated_guest", "previous_cancellations", | |
| "previous_bookings_not_canceled", "required_car_parking_spaces", | |
| "total_of_special_requests", "adr", | |
| ] | |
| features_cat = [ | |
| "hotel", "arrival_date_month", "meal", | |
| "market_segment", "distribution_channel", | |
| "reserved_room_type", "deposit_type", "customer_type", | |
| ] | |
| transformer_num = make_pipeline( | |
| SimpleImputer(strategy="constant"), # there are a few missing values | |
| StandardScaler(), | |
| ) | |
| transformer_cat = make_pipeline( | |
| SimpleImputer(strategy="constant", fill_value="NA"), | |
| OneHotEncoder(handle_unknown='ignore'), | |
| ) | |
| preprocessor = make_column_transformer( | |
| (transformer_num, features_num), | |
| (transformer_cat, features_cat), | |
| ) | |
| # stratify - make sure classes are evenlly represented across splits | |
| X_train, X_valid, y_train, y_valid = \ | |
| train_test_split(X, y, stratify=y, train_size=0.75) | |
| X_train = preprocessor.fit_transform(X_train) | |
| X_valid = preprocessor.transform(X_valid) | |
| input_shape = [X_train.shape[1]] | |
| from tensorflow import keras | |
| from tensorflow.keras import layers | |
| model = keras.Sequential([ | |
| layers.BatchNormalization(input_shape=input_shape), | |
| layers.Dense(256, activation='relu'), | |
| layers.BatchNormalization(), | |
| layers.Dropout(0.3), | |
| layers.Dense(256, activation='relu'), | |
| layers.BatchNormalization(), | |
| layers.Dropout(0.3), | |
| layers.Dense(1, activation='sigmoid') | |
| ]) | |
| model.compile( | |
| optimizer='adam', | |
| loss='binary_crossentropy', | |
| metrics=['binary_accuracy'], | |
| ) | |
| early_stopping = keras.callbacks.EarlyStopping( | |
| patience=5, | |
| min_delta=0.001, | |
| restore_best_weights=True, | |
| ) | |
| history = model.fit( | |
| X_train, y_train, | |
| validation_data=(X_valid, y_valid), | |
| batch_size=512, | |
| epochs=200, | |
| callbacks=[early_stopping], | |
| ) | |
| history_df = pd.DataFrame(history.history) | |
| history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy") | |
| history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot(title="Accuracy") | |