| import pandas as pd | |
| import numpy as np | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.metrics import accuracy_score, classification_report | |
| import joblib | |
| import os | |
| import logging | |
| def load_features_target(horizon): | |
| print(f"{horizon} - HORIZON") | |
| X = pd.read_csv(f"data/features_{horizon}m.csv", index_col=0) | |
| y = pd.read_csv(f"data/target_{horizon}m.csv", index_col=0).squeeze() | |
| print(f"Tô aqui: {X, y}") | |
| return X, y | |
| def temporal_train_split(X, y, test_size=0.2): | |
| n = len(X) | |
| if n == 0: | |
| return None, None, None, None | |
| split_idx = int(n * (1 - test_size)) | |
| X_train = X.iloc[:split_idx] | |
| X_test = X.iloc[split_idx:] | |
| y_train = y.iloc[:split_idx] | |
| y_test = y.iloc[split_idx:] | |
| return X_train, X_test, y_train, y_test | |
| def train_model(horizon, model_type='random_forest', save_model=True): | |
| X, y = load_features_target(horizon) | |
| print(f"\n========== Horizonte {horizon} ==========") | |
| print("Shape X:", X.shape) | |
| print("Shape y:", y.shape) | |
| if len(X) == 0: | |
| print("❌ Dataset vazio. Pulando treino.") | |
| return None | |
| X_train, X_test, y_train, y_test = temporal_train_split(X, y) | |
| if X_train is None or len(X_train) == 0: | |
| print("❌ Sem dados de treino.") | |
| return None | |
| if model_type == 'logistic': | |
| model = LogisticRegression(max_iter=1000, random_state=42) | |
| else: | |
| model = RandomForestClassifier( | |
| n_estimators=100, | |
| random_state=42 | |
| ) | |
| print("Treinando modelo...") | |
| model.fit(X_train, y_train) | |
| if len(X_test) == 0: | |
| print("⚠️ Sem dados de teste. Modelo treinado mas sem avaliação.") | |
| return model | |
| y_pred = model.predict(X_test) | |
| acc = accuracy_score(y_test, y_pred) | |
| print(f"Acurácia: {acc:.4f}") | |
| report = classification_report( | |
| y_test, | |
| y_pred, | |
| target_names=['Queda', 'Sobe'] | |
| ) | |
| print(report) | |
| if save_model: | |
| os.makedirs("models", exist_ok=True) | |
| path = f"models/{model_type}_{horizon}m.pkl" | |
| joblib.dump(model, path) | |
| print("Modelo salvo em:", path) | |
| return model | |
| def train_all_horizons(): | |
| horizons = [10, 24, 60, 120] | |
| print("\n===== Random Forest =====") | |
| for h in horizons: | |
| train_model(h, 'random_forest') | |
| print("\n===== Logistic Regression =====") | |
| for h in horizons: | |
| train_model(h, 'logistic') |