Spaces:
Runtime error
Runtime error
| import keras | |
| import pandas as pd | |
| import gradio as gr | |
| from sklearn.preprocessing import MinMaxScaler | |
| import numpy as np | |
| from hampel import hampel | |
| import pickle | |
| import matplotlib.pyplot as plt | |
| df = pd.read_csv('merge_data_final.csv', index_col = 0).reset_index() | |
| df = df.drop(columns = 'index') | |
| #convert to mean sea level by adding 2.75 feet. | |
| df['sea surface height'] = df['sea surface height']+0.8382 | |
| df['Date'] = pd.to_datetime(df['Date'], format = '%Y/%m') | |
| # Function to create a dataset | |
| def create_dataset(df1, city): | |
| #Creating dataframe for one city | |
| df = df1[df1['City'] == city] | |
| df.set_index('Date',inplace=True) | |
| ##outlier detection | |
| for i in ['sea surface temp', 'sea bottom temp', 'sea salinity', | |
| 'sea surface height', 'CO2', 'seasonally adjust CO2', | |
| 'greenland_avg_mass', 'antarctica_avg_mass', 'tavg_C', 'tmin_C', | |
| 'tmax_C', 'prcp_m']: | |
| imputation_data = hampel(df[i], window_size=12, n=3, imputation=True) | |
| df[i]= imputation_data | |
| ## create rolling mean and std of sea surface height, the sliding window is set as 4 | |
| df['mean_elevation4'] =df['sea surface height'].rolling(4).mean() | |
| df['std_elevation4'] =df['sea surface height'].rolling(4).std() | |
| df['mean_elevation4'].fillna(df['sea surface height'].rolling(2).mean(),inplace = True) | |
| df['mean_elevation4'].fillna(0,inplace = True) | |
| df['std_elevation4'].fillna(df['sea surface height'].rolling(2).std(),inplace = True) | |
| df['std_elevation4'].fillna(0,inplace = True) | |
| attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','tavg_C','tmin_C', 'tmax_C', 'prcp_m', | |
| 'mean_elevation4', 'std_elevation4'] | |
| target = ['sea surface height'] | |
| sc_x = MinMaxScaler(feature_range=(0, 1)) | |
| sc_y = MinMaxScaler(feature_range=(0, 1)) | |
| df[attribute] = sc_x.fit_transform(df[attribute]) | |
| df[target] = sc_y.fit_transform(df[target]) | |
| df.reset_index(0,inplace = True) | |
| return df,sc_x,sc_y | |
| def process(df): | |
| attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m', | |
| 'mean_elevation4', 'std_elevation4'] #list(selected_feat) | |
| target =['elevation_next_month','elevation_next2_month','elevation_next3_month'] | |
| X = df[attribute] | |
| y = df[target] | |
| return np.array(X), np.array(y) | |
| # Functions to transform data into 3D shape | |
| def build_train_attribute(train, n_in): | |
| X_train = [] | |
| for i in range(train.shape[0]-n_in+1): | |
| X_train.append(train[i:i+n_in]) | |
| return np.array(X_train) | |
| def get_prediction(city,lookback = 12, df = df): | |
| name = city.replace(' ','_') | |
| lstm_model = keras.models.load_model(f'{name}_lstm.h5') | |
| xgb_model = pickle.load(open(f'{name}_xgboost.sav', 'rb')) | |
| subdf,sc_x,sc_y = create_dataset(df, city) | |
| def process(df): | |
| attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m', | |
| 'mean_elevation4', 'std_elevation4'] #list(selected_feat) | |
| X = df[attribute] | |
| return np.array(X) | |
| df_att = process(subdf) | |
| x = build_train_attribute(df_att, lookback) | |
| prediction_lstm = sc_y.inverse_transform(lstm_model.predict(x)) | |
| prediction_xgb = sc_y.inverse_transform(xgb_model.predict(df_att)) | |
| y_unscaled = sc_y.inverse_transform(np.array(subdf['sea surface height']).reshape(-1, 1)) | |
| prediction_average = (prediction_lstm+prediction_xgb[lookback-1:])/2 | |
| return prediction_average,subdf,y_unscaled | |
| def draw(city, df = df): | |
| pred,subdf, y_unscaled = get_prediction(city) | |
| pred_1st = pred[-1][0] | |
| pred_2nd = pred[-1][1] | |
| pred_3rd = pred[-1][2] | |
| fig = plt.figure() | |
| # plt.figure(figsize=(20,7)) | |
| plt.plot(pd.period_range("2001-01", "2021-01",freq='M').to_timestamp(),pred[:,0],'r' , linewidth=4) | |
| plt.plot(subdf['Date'],y_unscaled,'green' , linewidth=2) | |
| plt.plot(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1],'blue' , linewidth=4) | |
| for date, pred in zip(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1]): | |
| plt.text(date,pred,pred, ha = 'left',va = 'top') | |
| plt.legend(('Past Predicted Value','Test','Future Predicted Value')) | |
| plt.xlabel('Time') | |
| plt.ylabel('Scaled Mean Sea Level (m)') | |
| plt.title('Average Ensemble Model Performance Compare to Historical data') | |
| # ax.set_ylim(0.8,1.2) | |
| return fig, pred_1st,pred_2nd,pred_3rd | |
| output = gr.outputs.Plot(type="auto") | |
| output1 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-01(m)") | |
| output2 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)") | |
| output3 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)") | |
| dropdown = gr.inputs.Dropdown(choices=list(df.City.unique()), type="value", default=None, label=None, optional=False) | |
| app = gr.Interface(fn=draw, inputs=[dropdown], outputs=[output,output1,output2,output3],description="This model is only capable of predicting mean sea level in next three month based on historical data from 2000 to 2020") | |
| app.launch() |