File size: 2,495 Bytes
57789e6 43056e4 651b3a9 af171b5 57789e6 af171b5 57789e6 43056e4 af171b5 43056e4 57789e6 af171b5 43056e4 57789e6 af171b5 57789e6 af171b5 43056e4 57789e6 af171b5 43056e4 af171b5 43056e4 af171b5 43056e4 af171b5 43056e4 af171b5 57789e6 af171b5 43056e4 af171b5 43056e4 af171b5 43056e4 af171b5 43056e4 af171b5 43056e4 af171b5 43056e4 af171b5 43056e4 af171b5 43056e4 af171b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import os
import logging
def load_features_target(horizon):
print(f"{horizon} - HORIZON")
X = pd.read_csv(f"data/features_{horizon}m.csv", index_col=0)
y = pd.read_csv(f"data/target_{horizon}m.csv", index_col=0).squeeze()
print(f"Tô aqui: {X, y}")
return X, y
def temporal_train_split(X, y, test_size=0.2):
n = len(X)
if n == 0:
return None, None, None, None
split_idx = int(n * (1 - test_size))
X_train = X.iloc[:split_idx]
X_test = X.iloc[split_idx:]
y_train = y.iloc[:split_idx]
y_test = y.iloc[split_idx:]
return X_train, X_test, y_train, y_test
def train_model(horizon, model_type='random_forest', save_model=True):
X, y = load_features_target(horizon)
print(f"\n========== Horizonte {horizon} ==========")
print("Shape X:", X.shape)
print("Shape y:", y.shape)
if len(X) == 0:
print("❌ Dataset vazio. Pulando treino.")
return None
X_train, X_test, y_train, y_test = temporal_train_split(X, y)
if X_train is None or len(X_train) == 0:
print("❌ Sem dados de treino.")
return None
if model_type == 'logistic':
model = LogisticRegression(max_iter=1000, random_state=42)
else:
model = RandomForestClassifier(
n_estimators=100,
random_state=42
)
print("Treinando modelo...")
model.fit(X_train, y_train)
if len(X_test) == 0:
print("⚠️ Sem dados de teste. Modelo treinado mas sem avaliação.")
return model
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Acurácia: {acc:.4f}")
report = classification_report(
y_test,
y_pred,
target_names=['Queda', 'Sobe']
)
print(report)
if save_model:
os.makedirs("models", exist_ok=True)
path = f"models/{model_type}_{horizon}m.pkl"
joblib.dump(model, path)
print("Modelo salvo em:", path)
return model
def train_all_horizons():
horizons = [10, 24, 60, 120]
print("\n===== Random Forest =====")
for h in horizons:
train_model(h, 'random_forest')
print("\n===== Logistic Regression =====")
for h in horizons:
train_model(h, 'logistic') |