P2SAMAPA commited on
Commit
40ed09a
·
unverified ·
1 Parent(s): 7f01f9e

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +109 -157
models.py CHANGED
@@ -1,179 +1,131 @@
1
  """
2
- Model architectures: Transformer, Random Forest, XGBoost
 
 
 
 
 
 
3
  """
4
 
5
  import tensorflow as tf
6
  from tensorflow.keras.models import Model
7
- from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D
 
 
 
 
8
  from tensorflow.keras.regularizers import l2
9
- from tensorflow.keras.callbacks import EarlyStopping
10
- from sklearn.ensemble import RandomForestClassifier
11
- import xgboost as xgb
12
  import numpy as np
13
 
14
 
15
- class PositionalEncoding(tf.keras.layers.Layer):
16
- """Adds positional information to input sequences for Transformer"""
17
-
18
- def __init__(self, max_seq_len=100, **kwargs):
19
- super(PositionalEncoding, self).__init__(**kwargs)
20
- self.max_seq_len = max_seq_len
21
-
22
- def build(self, input_shape):
23
- seq_len = input_shape[1]
24
- d_model = input_shape[2]
25
-
26
- position = np.arange(0, seq_len, dtype=np.float32).reshape(-1, 1)
27
- div_term = np.exp(np.arange(0, d_model, 2, dtype=np.float32) *
28
- -(np.log(10000.0) / d_model))
29
-
30
- pos_encoding = np.zeros((seq_len, d_model), dtype=np.float32)
31
- pos_encoding[:, 0::2] = np.sin(position * div_term)[:, :len(range(0, d_model, 2))]
32
-
33
- if d_model > 1:
34
- cos_values = np.cos(position * div_term)
35
- odd_positions = range(1, d_model, 2)
36
- pos_encoding[:, 1::2] = cos_values[:, :len(odd_positions)]
37
-
38
- self.pos_encoding = self.add_weight(
39
- name='positional_encoding',
40
- shape=(1, seq_len, d_model),
41
- initializer=tf.keras.initializers.Constant(pos_encoding),
42
- trainable=False
43
  )
44
-
45
- super(PositionalEncoding, self).build(input_shape)
46
-
47
- def call(self, inputs):
48
- return inputs + self.pos_encoding
49
-
50
- def compute_output_shape(self, input_shape):
51
- return input_shape
52
-
53
- def get_config(self):
54
- config = super(PositionalEncoding, self).get_config()
55
- config.update({'max_seq_len': self.max_seq_len})
56
- return config
57
-
58
-
59
- def directional_loss(y_true, y_pred):
60
- """Custom loss that penalizes incorrect direction predictions"""
61
- abs_error = tf.abs(y_true - y_pred)
62
- signs_match = tf.cast(tf.math.sign(y_true) == tf.math.sign(y_pred), tf.float32)
63
- penalty = tf.where(signs_match > 0.5, abs_error, abs_error * 2.0)
64
- return tf.reduce_mean(penalty)
65
-
66
-
67
- def build_transformer_model(input_shape, num_outputs, num_heads=2, ff_dim=64,
68
- num_layers=1, dropout_rate=0.2):
69
- """Build a pure Transformer architecture"""
70
- inputs = Input(shape=input_shape)
71
- x = PositionalEncoding()(inputs)
72
-
73
- for _ in range(num_layers):
74
- attn_output = MultiHeadAttention(
75
- num_heads=num_heads,
76
- key_dim=input_shape[1] // num_heads,
77
- dropout=dropout_rate
78
  )(x, x)
79
-
80
- x = LayerNormalization(epsilon=1e-6)(x + attn_output)
81
-
82
- ff_output = Dense(ff_dim, activation='relu', kernel_regularizer=l2(0.01))(x)
83
- ff_output = Dropout(dropout_rate)(ff_output)
84
- ff_output = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff_output)
85
-
86
- x = LayerNormalization(epsilon=1e-6)(x + ff_output)
87
-
88
  x = GlobalAveragePooling1D()(x)
89
  x = Dropout(dropout_rate)(x)
90
- x = Dense(ff_dim, activation='relu', kernel_regularizer=l2(0.01))(x)
91
  x = Dropout(dropout_rate)(x)
92
- outputs = Dense(num_outputs)(x)
93
-
94
- model = Model(inputs=inputs, outputs=outputs)
95
-
96
- return model
97
-
98
-
99
- def train_transformer(X_train, y_train, X_val, y_val, epochs=100):
100
- """Train Transformer model"""
101
- model = build_transformer_model(
102
- input_shape=(X_train.shape[1], X_train.shape[2]),
103
- num_outputs=y_train.shape[1]
 
 
 
 
 
 
 
 
 
104
  )
105
-
106
- model.compile(
107
- optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
108
- loss=directional_loss,
109
- metrics=['mae']
110
  )
111
-
112
- early_stop = EarlyStopping(
113
- monitor='val_loss',
114
- patience=20,
115
- restore_best_weights=True,
116
- verbose=0
117
  )
118
-
 
 
 
 
 
 
 
119
  history = model.fit(
120
  X_train, y_train,
121
  validation_data=(X_val, y_val),
122
- epochs=epochs,
123
- batch_size=32,
124
- callbacks=[early_stop],
125
- verbose=0
126
  )
127
-
128
  return model, history
129
-
130
-
131
- def train_ensemble(X_train, y_train, X_val, y_val):
132
- """Train Random Forest + XGBoost ensemble"""
133
-
134
- # Random Forest
135
- rf_model = RandomForestClassifier(
136
- n_estimators=500,
137
- max_depth=15,
138
- min_samples_split=10,
139
- min_samples_leaf=3,
140
- max_features='sqrt',
141
- random_state=42,
142
- n_jobs=-1
143
- )
144
- rf_model.fit(X_train, y_train)
145
-
146
- # XGBoost
147
- xgb_model = xgb.XGBClassifier(
148
- n_estimators=500,
149
- max_depth=8,
150
- learning_rate=0.03,
151
- subsample=0.8,
152
- colsample_bytree=0.8,
153
- min_child_weight=3,
154
- gamma=0.1,
155
- reg_alpha=0.1,
156
- reg_lambda=1.0,
157
- random_state=42,
158
- n_jobs=-1,
159
- early_stopping_rounds=50,
160
- eval_metric='mlogloss'
161
- )
162
- xgb_model.fit(
163
- X_train, y_train,
164
- eval_set=[(X_val, y_val)],
165
- verbose=False
166
- )
167
-
168
- return rf_model, xgb_model
169
-
170
-
171
- def predict_ensemble(rf_model, xgb_model, X_test):
172
- """Make predictions with ensemble"""
173
- rf_probs = rf_model.predict_proba(X_test)
174
- xgb_probs = xgb_model.predict_proba(X_test)
175
-
176
- ensemble_probs = (rf_probs + xgb_probs) / 2
177
- preds = np.argmax(ensemble_probs, axis=1)
178
-
179
- return preds
 
1
  """
2
+ Temporal Fusion Transformer (TFT-inspired) in Keras/TensorFlow.
3
+
4
+ Components:
5
+ - Gated Residual Network (GRN)
6
+ - Variable Selection Network (VSN)
7
+ - Multi-head Self-Attention
8
+ - Classification head (cross-entropy, softmax)
9
  """
10
 
11
  import tensorflow as tf
12
  from tensorflow.keras.models import Model
13
+ from tensorflow.keras.layers import (
14
+ Input, Dense, Dropout, LayerNormalization,
15
+ MultiHeadAttention, GlobalAveragePooling1D,
16
+ Multiply, Add
17
+ )
18
  from tensorflow.keras.regularizers import l2
19
+ from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
 
 
20
  import numpy as np
21
 
22
 
23
+ def gated_residual_network(x, units, dropout_rate=0.1, time_dist=True):
24
+ residual = x
25
+ def td(layer): return tf.keras.layers.TimeDistributed(layer) if time_dist else layer
26
+
27
+ h = td(Dense(units, kernel_regularizer=l2(1e-4)))(x)
28
+ h = tf.keras.layers.ELU()(h)
29
+ h = Dropout(dropout_rate)(h)
30
+ h = td(Dense(units, kernel_regularizer=l2(1e-4)))(h)
31
+ gate = td(Dense(units, activation='sigmoid'))(x)
32
+ h = Multiply()([h, gate])
33
+ if residual.shape[-1] != units:
34
+ residual = td(Dense(units, use_bias=False))(residual)
35
+ out = Add()([h, residual])
36
+ return LayerNormalization(epsilon=1e-6)(out)
37
+
38
+
39
+ def variable_selection_network(inputs, num_features, units, dropout_rate=0.1):
40
+ feature_outputs = []
41
+ for i in range(num_features):
42
+ feat = tf.keras.layers.Lambda(
43
+ lambda t, idx=i: tf.expand_dims(t[:, :, idx], axis=-1)
44
+ )(inputs)
45
+ feature_outputs.append(
46
+ gated_residual_network(feat, units, dropout_rate, time_dist=True)
 
 
 
 
47
  )
48
+ stacked = tf.stack(feature_outputs, axis=2)
49
+ w = gated_residual_network(inputs, num_features, dropout_rate, time_dist=True)
50
+ w = tf.keras.layers.TimeDistributed(Dense(num_features, activation='softmax'))(w)
51
+ w_exp = tf.expand_dims(w, axis=-1)
52
+ return tf.reduce_sum(stacked * w_exp, axis=2)
53
+
54
+
55
+ def build_tft_model(seq_len, num_features, num_outputs,
56
+ d_model=64, num_heads=4, num_layers=2,
57
+ dropout_rate=0.15, ff_mult=2):
58
+ d_model = (d_model // num_heads) * num_heads
59
+ inputs = Input(shape=(seq_len, num_features), name='seq_input')
60
+
61
+ x = variable_selection_network(inputs, num_features, d_model, dropout_rate)
62
+ x = gated_residual_network(x, d_model, dropout_rate, time_dist=True)
63
+
64
+ positions = tf.range(start=0, limit=seq_len, delta=1)
65
+ pos_emb = tf.keras.layers.Embedding(
66
+ input_dim=seq_len, output_dim=d_model, name='pos_emb'
67
+ )(positions)
68
+ pos_emb = tf.expand_dims(pos_emb, axis=0)
69
+ x = x + pos_emb
70
+
71
+ for i in range(num_layers):
72
+ attn = MultiHeadAttention(
73
+ num_heads=num_heads, key_dim=d_model // num_heads,
74
+ dropout=dropout_rate, name=f'attn_{i}'
 
 
 
 
 
 
 
75
  )(x, x)
76
+ x = LayerNormalization(epsilon=1e-6)(x + attn)
77
+ ff = gated_residual_network(x, d_model * ff_mult, dropout_rate, time_dist=True)
78
+ ff = gated_residual_network(ff, d_model, dropout_rate, time_dist=True)
79
+ x = LayerNormalization(epsilon=1e-6)(x + ff)
80
+
 
 
 
 
81
  x = GlobalAveragePooling1D()(x)
82
  x = Dropout(dropout_rate)(x)
83
+ x = gated_residual_network(x, d_model, dropout_rate, time_dist=False)
84
  x = Dropout(dropout_rate)(x)
85
+ outputs = Dense(num_outputs, activation='softmax', name='etf_probs')(x)
86
+
87
+ return Model(inputs=inputs, outputs=outputs, name='TFT_ETF_Classifier')
88
+
89
+
90
+ def train_tft(X_train, y_train, X_val, y_val, epochs=200,
91
+ d_model=64, num_heads=4, num_layers=2, dropout_rate=0.15):
92
+ """
93
+ Train TFT classifier.
94
+ y_train/y_val: integer class labels (0-4, argmax of 5-day fwd returns)
95
+ Loss: sparse_categorical_crossentropy (correct for classification)
96
+ LR : cosine decay with warm restarts
97
+ """
98
+ seq_len = X_train.shape[1]
99
+ num_features = X_train.shape[2]
100
+ num_outputs = int(np.max(y_train)) + 1
101
+
102
+ model = build_tft_model(
103
+ seq_len=seq_len, num_features=num_features, num_outputs=num_outputs,
104
+ d_model=d_model, num_heads=num_heads, num_layers=num_layers,
105
+ dropout_rate=dropout_rate
106
  )
107
+
108
+ lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
109
+ initial_learning_rate=5e-4, first_decay_steps=500,
110
+ t_mul=2.0, m_mul=0.9, alpha=1e-5
 
111
  )
112
+ model.compile(
113
+ optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
114
+ loss='sparse_categorical_crossentropy',
115
+ metrics=['accuracy']
 
 
116
  )
117
+
118
+ callbacks = [
119
+ EarlyStopping(monitor='val_accuracy', patience=30,
120
+ restore_best_weights=True, mode='max', verbose=1),
121
+ ReduceLROnPlateau(monitor='val_loss', factor=0.5,
122
+ patience=10, min_lr=1e-6, verbose=0)
123
+ ]
124
+
125
  history = model.fit(
126
  X_train, y_train,
127
  validation_data=(X_val, y_val),
128
+ epochs=epochs, batch_size=64,
129
+ callbacks=callbacks, verbose=1, shuffle=True
 
 
130
  )
 
131
  return model, history