|
|
|
|
|
|
|
|
import os |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import tensorflow as tf |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.preprocessing import MinMaxScaler |
|
|
import joblib |
|
|
import matplotlib.pyplot as plt |
|
|
from datetime import timezone |
|
|
|
|
|
|
|
|
from config import ( |
|
|
NUM_FEATURES, SYMBOL, TIMEFRAME, DAYS_OF_DATA_TO_FETCH, LIMIT_PER_FETCH, |
|
|
WINDOW_SIZE, BASE_FEATURE_COLS, |
|
|
PREDICTION_HORIZON, PRICE_CHANGE_THRESHOLD, |
|
|
EPOCHS, BATCH_SIZE, |
|
|
MODEL_SAVE_DIR, MODEL_NAME, |
|
|
PRICE_VOL_SCALER_NAME, INDICATOR_SCALER_NAME, PRICE_VOL_SCALER_NAME, INDICATOR_SCALER_NAME, |
|
|
EXPECTED_SCALED_FEATURES_FOR_MODEL, EXPECTED_FEATURES_ORDER |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
from data_handler import fetch_ohlcv_data_ccxt, calculate_technical_indicators, calculate_targets, create_sequences |
|
|
from model_builder import build_lstm_model |
|
|
|
|
|
|
|
|
from sklearn.metrics import classification_report, confusion_matrix |
|
|
from sklearn.utils.class_weight import compute_class_weight |
|
|
|
|
|
|
|
|
def main(): |
|
|
print("Iniciando script de treinamento da RNN...") |
|
|
os.makedirs(MODEL_SAVE_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
try: |
|
|
ohlcv_df_raw = fetch_ohlcv_data_ccxt(SYMBOL, TIMEFRAME, DAYS_OF_DATA_TO_FETCH, LIMIT_PER_FETCH) |
|
|
except Exception as e: |
|
|
print(f"Falha ao buscar dados com CCXT: {e}") |
|
|
|
|
|
return |
|
|
|
|
|
if ohlcv_df_raw.empty: print("DataFrame de dados raw está vazio."); return |
|
|
|
|
|
ohlcv_df_with_ta = calculate_technical_indicators(ohlcv_df_raw) |
|
|
if ohlcv_df_with_ta.empty: print("DataFrame vazio após cálculo de indicadores."); return |
|
|
|
|
|
ohlcv_df_final_features = calculate_targets(ohlcv_df_with_ta, PREDICTION_HORIZON, PRICE_CHANGE_THRESHOLD) |
|
|
if ohlcv_df_final_features.empty: print("DataFrame vazio após cálculo de alvos."); return |
|
|
|
|
|
|
|
|
|
|
|
api_price_vol_atr_cols = [ |
|
|
'open_div_atr', 'high_div_atr', 'low_div_atr', 'close_div_atr', |
|
|
'volume_div_atr', 'body_size_norm_atr' |
|
|
] |
|
|
|
|
|
api_price_vol_cols = [col for col in api_price_vol_atr_cols if col in BASE_FEATURE_COLS] |
|
|
|
|
|
api_indicator_cols = [ |
|
|
'log_return_1', 'rsi_14', 'atr', 'bbp', 'cci_37', 'mfi_37', |
|
|
'body_vs_avg_body', 'macd', 'sma_10_div_atr' |
|
|
] |
|
|
|
|
|
|
|
|
api_indicator_cols = [col for col in api_indicator_cols if col in BASE_FEATURE_COLS and col not in api_price_vol_cols] |
|
|
|
|
|
print("Preparando e salvando scalers para a API...") |
|
|
|
|
|
|
|
|
missing_base_for_api_scalers = [col for col in BASE_FEATURE_COLS if col not in ohlcv_df_final_features.columns] |
|
|
if missing_base_for_api_scalers: |
|
|
print(f"ERRO: Colunas de BASE_FEATURE_COLS ({missing_base_for_api_scalers}) não encontradas para fitar scalers da API.") |
|
|
print(f"Disponíveis: {ohlcv_df_final_features.columns.tolist()}") |
|
|
return |
|
|
|
|
|
api_price_vol_cols = [col for col in BASE_FEATURE_COLS if 'close_div_atr' in col or 'volume_div_atr' in col] |
|
|
api_indicator_cols = [col for col in BASE_FEATURE_COLS if col not in api_price_vol_cols] |
|
|
|
|
|
if api_price_vol_cols: |
|
|
price_volume_scaler_api = MinMaxScaler() |
|
|
price_volume_scaler_api.fit(ohlcv_df_final_features[api_price_vol_cols]) |
|
|
joblib.dump(price_volume_scaler_api, os.path.join(MODEL_SAVE_DIR, PRICE_VOL_SCALER_NAME)) |
|
|
print(f"Scaler de Preço/Volume (API: {api_price_vol_cols}) salvo.") |
|
|
else: |
|
|
print("Aviso: Nenhuma coluna de preço/volume definida para o scaler da API.") |
|
|
|
|
|
if api_indicator_cols: |
|
|
indicator_scaler_api = MinMaxScaler() |
|
|
indicator_scaler_api.fit(ohlcv_df_final_features[api_indicator_cols]) |
|
|
joblib.dump(indicator_scaler_api, os.path.join(MODEL_SAVE_DIR, INDICATOR_SCALER_NAME)) |
|
|
print(f"Scaler de Indicadores (API: {api_indicator_cols}) salvo.") |
|
|
else: |
|
|
print("Aviso: Nenhuma coluna de indicador definida para o scaler da API.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"Escalonando features para treinamento (colunas: {BASE_FEATURE_COLS})...") |
|
|
training_features_df = ohlcv_df_final_features[BASE_FEATURE_COLS].copy() |
|
|
|
|
|
general_training_scaler = MinMaxScaler() |
|
|
scaled_values_for_training = general_training_scaler.fit_transform(training_features_df) |
|
|
|
|
|
|
|
|
|
|
|
df_scaled_for_sequences = pd.DataFrame( |
|
|
scaled_values_for_training, |
|
|
columns=EXPECTED_SCALED_FEATURES_FOR_MODEL, |
|
|
index=training_features_df.index |
|
|
) |
|
|
|
|
|
|
|
|
df_for_sequences = df_scaled_for_sequences.join(ohlcv_df_final_features[['target']]) |
|
|
df_for_sequences.dropna(inplace=True) |
|
|
if df_for_sequences.empty: print("DataFrame para sequências vazio após escalonamento/join."); return |
|
|
|
|
|
|
|
|
|
|
|
X, y = create_sequences(df_for_sequences, "target", WINDOW_SIZE, EXPECTED_SCALED_FEATURES_FOR_MODEL) |
|
|
if X.shape[0] == 0: print("Nenhuma sequência criada."); return |
|
|
|
|
|
print("Preparando, fitando e salvando scalers...") |
|
|
os.makedirs(MODEL_SAVE_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
missing_base_cols = [col for col in BASE_FEATURE_COLS if col not in ohlcv_df_final_features.columns] |
|
|
if missing_base_cols: |
|
|
print(f"ERRO FATAL: Colunas de BASE_FEATURE_COLS ({missing_base_cols}) não encontradas em ohlcv_df_final_features.") |
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
price_vol_atr_norm_cols = [ |
|
|
'open_div_atr', 'high_div_atr', 'low_div_atr', 'close_div_atr', |
|
|
'volume_div_atr', 'body_size_norm_atr' |
|
|
] |
|
|
|
|
|
price_vol_atr_norm_cols = [col for col in price_vol_atr_norm_cols if col in BASE_FEATURE_COLS] |
|
|
|
|
|
|
|
|
|
|
|
other_indicator_cols = [col for col in BASE_FEATURE_COLS if col not in price_vol_atr_norm_cols] |
|
|
|
|
|
|
|
|
df_for_pv_scaler = ohlcv_df_final_features[price_vol_atr_norm_cols].copy() |
|
|
df_for_ind_scaler = ohlcv_df_final_features[other_indicator_cols].copy() |
|
|
|
|
|
|
|
|
if not df_for_pv_scaler.empty: |
|
|
pv_atr_scaler = MinMaxScaler() |
|
|
pv_atr_scaler.fit(df_for_pv_scaler) |
|
|
joblib.dump(pv_atr_scaler, os.path.join(MODEL_SAVE_DIR, PRICE_VOL_SCALER_NAME)) |
|
|
print(f"Scaler de Preço/Volume (ATR Norm) (API: {price_vol_atr_norm_cols}) salvo.") |
|
|
|
|
|
scaled_pv_data = pv_atr_scaler.transform(df_for_pv_scaler) |
|
|
else: |
|
|
print(f"AVISO: Sem dados para o scaler de Preço/Volume (ATR Norm) ({price_vol_atr_norm_cols}).") |
|
|
scaled_pv_data = pd.DataFrame() |
|
|
|
|
|
|
|
|
if not df_for_ind_scaler.empty: |
|
|
other_ind_scaler = MinMaxScaler() |
|
|
other_ind_scaler.fit(df_for_ind_scaler) |
|
|
joblib.dump(other_ind_scaler, os.path.join(MODEL_SAVE_DIR, INDICATOR_SCALER_NAME)) |
|
|
print(f"Scaler de Outros Indicadores (API: {other_indicator_cols}) salvo.") |
|
|
|
|
|
scaled_other_ind_data = other_ind_scaler.transform(df_for_ind_scaler) |
|
|
else: |
|
|
print(f"AVISO: Sem dados para o scaler de Outros Indicadores ({other_indicator_cols}).") |
|
|
scaled_other_ind_data = pd.DataFrame() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_scaled_for_sequences = pd.DataFrame(index=ohlcv_df_final_features.index) |
|
|
|
|
|
|
|
|
if not df_for_pv_scaler.empty: |
|
|
for i, col_name in enumerate(price_vol_atr_norm_cols): |
|
|
df_scaled_for_sequences[f"{col_name}_scaled"] = scaled_pv_data[:, i] |
|
|
|
|
|
|
|
|
if not df_for_ind_scaler.empty: |
|
|
for i, col_name in enumerate(other_indicator_cols): |
|
|
df_scaled_for_sequences[f"{col_name}_scaled"] = scaled_other_ind_data[:, i] |
|
|
|
|
|
|
|
|
|
|
|
missing_scaled_cols = [col for col in EXPECTED_SCALED_FEATURES_FOR_MODEL if col not in df_scaled_for_sequences.columns] |
|
|
if missing_scaled_cols: |
|
|
print(f"ERRO FATAL: Colunas escaladas esperadas ({missing_scaled_cols}) não foram criadas para as sequências.") |
|
|
print(f"Colunas escaladas disponíveis: {df_scaled_for_sequences.columns.tolist()}") |
|
|
return |
|
|
|
|
|
|
|
|
df_scaled_for_sequences = df_scaled_for_sequences[EXPECTED_SCALED_FEATURES_FOR_MODEL] |
|
|
|
|
|
|
|
|
df_for_sequences = df_scaled_for_sequences.join(ohlcv_df_final_features[['target']]) |
|
|
df_for_sequences.dropna(inplace=True) |
|
|
if df_for_sequences.empty: print("DataFrame para sequências vazio após escalonamento/join."); return |
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) |
|
|
print(f"Treino: {X_train.shape[0]} amostras, Teste: {X_test.shape[0]} amostras.") |
|
|
|
|
|
X, y = create_sequences(df_for_sequences, "target", WINDOW_SIZE, EXPECTED_SCALED_FEATURES_FOR_MODEL) |
|
|
if X.shape[0] == 0: print("Nenhuma sequência criada."); return |
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) |
|
|
print(f"Treino: {X_train.shape[0]} amostras, Teste: {X_test.shape[0]} amostras.") |
|
|
|
|
|
|
|
|
print("Construindo modelo LSTM...") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
actual_num_features = NUM_FEATURES |
|
|
|
|
|
model_input_shape = (WINDOW_SIZE, actual_num_features) |
|
|
model = build_lstm_model(model_input_shape) |
|
|
|
|
|
print("Iniciando treinamento do modelo...") |
|
|
reduce_lr_cb = tf.keras.callbacks.ReduceLROnPlateau( |
|
|
monitor='val_loss', |
|
|
factor=0.5, |
|
|
patience=7, |
|
|
min_lr=1e-7, |
|
|
verbose=1 |
|
|
) |
|
|
early_stopping_cb = tf.keras.callbacks.EarlyStopping( |
|
|
monitor='val_loss', |
|
|
patience=25, |
|
|
restore_best_weights=True |
|
|
) |
|
|
callbacks_list = [early_stopping_cb, reduce_lr_cb] |
|
|
|
|
|
class_weights_map = { |
|
|
0: 0.4, |
|
|
1: 4.0 |
|
|
} |
|
|
|
|
|
unique_classes, counts = np.unique(y_train, return_counts=True) |
|
|
print(f"Distribuição das classes no treino: {dict(zip(unique_classes, counts))}") |
|
|
class_weights_vals = compute_class_weight('balanced', classes=unique_classes, y=y_train) |
|
|
class_weights_map = {cls: weight for cls, weight in zip(unique_classes, class_weights_vals)} |
|
|
|
|
|
|
|
|
|
|
|
class_weights_map = {0: 0.5, 1: 3.5} |
|
|
|
|
|
|
|
|
|
|
|
print(f"Pesos de Classe: {class_weights_map}") |
|
|
|
|
|
""" class_weights_map = { |
|
|
0: 0.4, # Diminui o peso da classe majoritária |
|
|
1: 4.0 # Aumenta BASTANTE o peso da classe minoritária (Rise) |
|
|
} """ |
|
|
|
|
|
|
|
|
history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, |
|
|
validation_split=0.1, |
|
|
callbacks=callbacks_list, class_weight=class_weights_map, verbose=1) |
|
|
|
|
|
|
|
|
|
|
|
print("Avaliando modelo treinado no conjunto de teste...") |
|
|
loss, accuracy_keras = model.evaluate(X_test, y_test, verbose=0) |
|
|
print(f"Perda no Teste (Keras): {loss:.4f}") |
|
|
print(f"Acurácia no Teste (Keras, thr=0.5): {accuracy_keras:.4f}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from sklearn.metrics import classification_report, confusion_matrix |
|
|
y_pred_probs = model.predict(X_test) |
|
|
y_pred_classes = (y_pred_probs > 0.65).astype(int) |
|
|
|
|
|
print("\nRelatório de Classificação no Conjunto de Teste:") |
|
|
print(classification_report(y_test, y_pred_classes, target_names=['No Rise (0)', 'Rise (1)'])) |
|
|
|
|
|
print("\nMatriz de Confusão no Conjunto de Teste:") |
|
|
cm = confusion_matrix(y_test, y_pred_classes) |
|
|
print(cm) |
|
|
import seaborn as sns |
|
|
plt.figure(figsize=(6,5)) |
|
|
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['No Rise', 'Rise'], yticklabels=['No Rise', 'Rise']) |
|
|
plt.xlabel('Predito') |
|
|
plt.ylabel('Verdadeiro') |
|
|
plt.title('Matriz de Confusão') |
|
|
plt.savefig(os.path.join(MODEL_SAVE_DIR, "confusion_matrix.png")) |
|
|
|
|
|
|
|
|
print("Salvando modelo e scalers...") |
|
|
os.makedirs(MODEL_SAVE_DIR, exist_ok=True) |
|
|
|
|
|
model_path = os.path.join(MODEL_SAVE_DIR, MODEL_NAME) |
|
|
model.save(model_path) |
|
|
print(f"Modelo salvo em: {model_path}") |
|
|
|
|
|
|
|
|
|
|
|
model_save_path = os.path.join(MODEL_SAVE_DIR, MODEL_NAME) |
|
|
model.save(model_save_path) |
|
|
print(f"Modelo TREINADO salvo em: {model_save_path}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(12, 6)) |
|
|
plt.subplot(1, 2, 1) |
|
|
plt.plot(history.history['accuracy'], label='Acurácia Treino') |
|
|
plt.plot(history.history['val_accuracy'], label='Acurácia Validação') |
|
|
plt.title('Acurácia do Modelo') |
|
|
plt.xlabel('Época') |
|
|
plt.ylabel('Acurácia') |
|
|
plt.legend() |
|
|
|
|
|
plt.subplot(1, 2, 2) |
|
|
plt.plot(history.history['loss'], label='Perda Treino') |
|
|
plt.plot(history.history['val_loss'], label='Perda Validação') |
|
|
plt.title('Perda do Modelo') |
|
|
plt.xlabel('Época') |
|
|
plt.ylabel('Perda') |
|
|
plt.legend() |
|
|
|
|
|
|
|
|
plot_path = os.path.join(MODEL_SAVE_DIR, "training_history.png") |
|
|
plt.savefig(plot_path) |
|
|
print(f"Gráfico do histórico de treinamento salvo em: {plot_path}") |
|
|
plt.show() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\nAnálise com diferentes thresholds de predição no conjunto de teste:") |
|
|
y_pred_probs = model.predict(X_test) |
|
|
thresholds_to_test = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75] |
|
|
for thresh in thresholds_to_test: |
|
|
print(f"\n--- Resultados com Threshold: {thresh:.2f} ---") |
|
|
y_pred_classes = (y_pred_probs > thresh).astype(int) |
|
|
|
|
|
|
|
|
print(classification_report(y_test, y_pred_classes, target_names=['No Rise (0)', 'Rise (1)'], zero_division=0)) |
|
|
print("Matriz de Confusão:") |
|
|
print(confusion_matrix(y_test, y_pred_classes)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Script de treinamento concluído.") |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|
|
|
|
|
|
print("Iniciando configuração de GPU") |
|
|
gpus = tf.config.experimental.list_physical_devices('GPU') |
|
|
if gpus: |
|
|
try: |
|
|
for gpu in gpus: |
|
|
tf.config.experimental.set_memory_growth(gpu, True) |
|
|
logical_gpus = tf.config.experimental.list_logical_devices('GPU') |
|
|
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") |
|
|
except RuntimeError as e: |
|
|
print(e) |
|
|
|
|
|
|
|
|
|