import pandas as pd import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report import joblib import os import logging def load_features_target(horizon): print(f"{horizon} - HORIZON") X = pd.read_csv(f"data/features_{horizon}m.csv", index_col=0) y = pd.read_csv(f"data/target_{horizon}m.csv", index_col=0).squeeze() print(f"Tô aqui: {X, y}") return X, y def temporal_train_split(X, y, test_size=0.2): n = len(X) if n == 0: return None, None, None, None split_idx = int(n * (1 - test_size)) X_train = X.iloc[:split_idx] X_test = X.iloc[split_idx:] y_train = y.iloc[:split_idx] y_test = y.iloc[split_idx:] return X_train, X_test, y_train, y_test def train_model(horizon, model_type='random_forest', save_model=True): X, y = load_features_target(horizon) print(f"\n========== Horizonte {horizon} ==========") print("Shape X:", X.shape) print("Shape y:", y.shape) if len(X) == 0: print("❌ Dataset vazio. Pulando treino.") return None X_train, X_test, y_train, y_test = temporal_train_split(X, y) if X_train is None or len(X_train) == 0: print("❌ Sem dados de treino.") return None if model_type == 'logistic': model = LogisticRegression(max_iter=1000, random_state=42) else: model = RandomForestClassifier( n_estimators=100, random_state=42 ) print("Treinando modelo...") model.fit(X_train, y_train) if len(X_test) == 0: print("⚠️ Sem dados de teste. Modelo treinado mas sem avaliação.") return model y_pred = model.predict(X_test) acc = accuracy_score(y_test, y_pred) print(f"Acurácia: {acc:.4f}") report = classification_report( y_test, y_pred, target_names=['Queda', 'Sobe'] ) print(report) if save_model: os.makedirs("models", exist_ok=True) path = f"models/{model_type}_{horizon}m.pkl" joblib.dump(model, path) print("Modelo salvo em:", path) return model def train_all_horizons(): horizons = [10, 24, 60, 120] print("\n===== Random Forest =====") for h in horizons: train_model(h, 'random_forest') print("\n===== Logistic Regression =====") for h in horizons: train_model(h, 'logistic')