File size: 3,649 Bytes
e59b179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout
from tqdm import tqdm

neurons = 512  # number of hidden units in the LSTM layer
activation_function = "tanh"  # activation function for LSTM and Dense layer
loss = (
    "mse"  # loss function for calculating the gradient, in this case Mean Squared Error
)
optimizer = "adam"  # optimizer for appljying gradient decent
dropout = 0.25  # dropout ratio used after each LSTM layer to avoid overfitting
batch_size = 128


def preprocess(df):
    df = df.copy()
    df["ts"] = df["ts"].astype(np.int64)
    df["ts"] = df["ts"] / 1000
    df["timestamp"] = pd.to_datetime(df["ts"], unit="s")
    df = df[["timestamp", "low", "high", "close", "open", "quoteVol"]]
    for col in ["low", "high", "close", "open", "quoteVol"]:
        df[col] = df[col].astype(float)
    df.set_index(df["timestamp"], inplace=True)
    df.drop(["timestamp"], axis=1, inplace=True)
    df["Date"] = pd.to_datetime(df.index.values.tolist()).date

    return df


def normalize(closedf):
    scaler = MinMaxScaler(feature_range=(0, 1))
    closedfsc = scaler.fit_transform(
        np.array(closedf.drop("Date", axis=1)).reshape(-1, 1)
    )
    return closedfsc, scaler


def split_train_test(closedfsc, training_size, test_size):
    train_data, test_data = (
        closedfsc[0:training_size, :],
        closedfsc[training_size : len(closedfsc), :1],
    )
    return train_data, test_data


def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i : (i + time_step), 0]  ###i=0, 0,1,2,3-----99   100
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)


# def build_model(inputs):
#     model = Sequential()
#     model.add(
#         LSTM(
#             neurons,
#             return_sequences=True,
#             input_shape=(inputs.shape[1], inputs.shape[2]),
#             activation=activation_function,
#         )
#     )
#     model.add(Dropout(dropout))
#     model.add(LSTM(neurons, return_sequences=True, activation=activation_function))
#     model.add(Dropout(dropout))
#     model.add(LSTM(neurons, activation=activation_function))
#     model.add(Dropout(dropout))
#     model.add(Dense(units=1))
#     model.add(Activation(activation_function))
#     model.compile(loss=loss, optimizer=optimizer, metrics=["mae"])
#     return model


def build_model():
    model = Sequential()

    model.add(LSTM(256, input_shape=(None, 1), activation="relu"))

    model.add(Dense(1))

    model.compile(loss="mean_squared_error", optimizer="adam")
    return model


def train_model(
    model, x_train, y_train, X_test, y_test, epochs, progress_callback=None
):
    train_losses = []  # To store training losses
    val_losses = []  # To store validation losses
    for epoch in tqdm(range(epochs)):
        history = model.fit(
            x_train,
            y_train,
            epochs=1,
            verbose=0,
            validation_data=(X_test, y_test),
            batch_size=32,
        )
        train_loss = history.history["loss"][0]
        val_loss = history.history["val_loss"][0]

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        if progress_callback:
            progress_callback(epoch, history)
    return model, train_losses, val_losses