| from datetime import datetime, timedelta |
| import pandas as pd |
| import numpy as np |
|
|
| import model_utils as mu |
| from statsmodels.tsa.arima.model import ARIMA |
|
|
| def model_run(df_all): |
| """ Prediciton function that runs ARIMA model and predicts tomorrow cryptocurrency price. |
| Useful for forecasting a variable using ARIMA model. |
| Use historical 'prices' and get prediction. |
| Give prediction output to the client. |
| """ |
| first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1)) |
| |
| reframed_lags, df_final=mu.data_transform(df_all, first_day_future) |
| |
| print(f'I have transformed the dataset into the frame for supervised learning') |
| df=reframed_lags[['prices','price_eth','GSPC','Day','Month', 'TNX', 'Employment', 'google_trend','EURUSD']] |
| date=pd.to_datetime(dict(year=reframed_lags['Year'], month=reframed_lags['Month'], day=reframed_lags['Day'])) |
| df_with_date=pd.concat([date,df],axis=1) |
| df_with_date.columns=np.append('date',df.columns) |
| df_with_date.set_index('date',inplace=True) |
| df_with_date=df_with_date.dropna() |
| df_past=df_with_date.iloc[:-1,:] |
| df_future=df_with_date.iloc[-1:,:] |
| model = ARIMA(df_past['prices'],exog=df_past.drop(columns=['prices']), order=(2,1,2)) |
| model_fit = model.fit() |
|
|
| |
| predictions = model_fit.forecast(steps=1,exog=df_future.drop(columns='prices')) |
|
|
| |
| df_with_forecast=reframed_lags.copy() |
| df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=predictions[-1:].values[0] |
| |
| |
| |
|
|
| |
| if len(reframed_lags)>500: |
| train_size=0.9 |
| elif len(reframed_lags)>200: |
| train_size=0.8 |
| else: |
| train_size=0.7 |
| predictions=[] |
| test_labels_all=[] |
| test_labels_all1=[] |
| train_labels_all=[] |
| data_arima=df_with_date |
| window_length=int((len(data_arima)-len(data_arima)*train_size)) |
| for i in range(0,window_length): |
| train_accuracy=data_arima.iloc[0:int(len(data_arima)*train_size)+i,:] |
| |
| test_accuracy=data_arima.iloc[len(train_accuracy):len(train_accuracy)+1,:] |
| train_features_accuracy=train_accuracy.drop(columns='prices') |
| test_features_accuracy=test_accuracy.drop(columns='prices') |
| train_labels_accuracy=train_accuracy['prices'] |
| test_labels_accuracy=test_accuracy['prices'] |
| print(train_labels_accuracy) |
|
|
| arima = ARIMA(train_labels_accuracy,exog=train_features_accuracy, order=(2,1,2)) |
| arima_fit=arima.fit() |
| prediction_arima = arima_fit.forecast(steps=1,exog=test_features_accuracy) |
| predictions=np.append(predictions,prediction_arima) |
| test_labels_all=np.append(test_labels_all,test_labels_accuracy) |
| train_labels_all=np.append(train_labels_all,train_accuracy) |
| test_labels_all1=np.append(test_labels_all1,test_accuracy) |
|
|
| |
| from sklearn.metrics import r2_score |
| accuracy=r2_score(predictions,test_labels_all) |
| result_arima=pd.DataFrame({'prediction':predictions,'data':test_labels_all}) |
| result_arima.to_csv('result_arima_kat.csv') |
| return df_with_forecast, accuracy, result_arima |
|
|