AllStreet / src /model_training.py
kauabarros-24
CHORE: Create simple web page
af171b5
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import os
import logging
def load_features_target(horizon):
print(f"{horizon} - HORIZON")
X = pd.read_csv(f"data/features_{horizon}m.csv", index_col=0)
y = pd.read_csv(f"data/target_{horizon}m.csv", index_col=0).squeeze()
print(f"Tô aqui: {X, y}")
return X, y
def temporal_train_split(X, y, test_size=0.2):
n = len(X)
if n == 0:
return None, None, None, None
split_idx = int(n * (1 - test_size))
X_train = X.iloc[:split_idx]
X_test = X.iloc[split_idx:]
y_train = y.iloc[:split_idx]
y_test = y.iloc[split_idx:]
return X_train, X_test, y_train, y_test
def train_model(horizon, model_type='random_forest', save_model=True):
X, y = load_features_target(horizon)
print(f"\n========== Horizonte {horizon} ==========")
print("Shape X:", X.shape)
print("Shape y:", y.shape)
if len(X) == 0:
print("❌ Dataset vazio. Pulando treino.")
return None
X_train, X_test, y_train, y_test = temporal_train_split(X, y)
if X_train is None or len(X_train) == 0:
print("❌ Sem dados de treino.")
return None
if model_type == 'logistic':
model = LogisticRegression(max_iter=1000, random_state=42)
else:
model = RandomForestClassifier(
n_estimators=100,
random_state=42
)
print("Treinando modelo...")
model.fit(X_train, y_train)
if len(X_test) == 0:
print("⚠️ Sem dados de teste. Modelo treinado mas sem avaliação.")
return model
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Acurácia: {acc:.4f}")
report = classification_report(
y_test,
y_pred,
target_names=['Queda', 'Sobe']
)
print(report)
if save_model:
os.makedirs("models", exist_ok=True)
path = f"models/{model_type}_{horizon}m.pkl"
joblib.dump(model, path)
print("Modelo salvo em:", path)
return model
def train_all_horizons():
horizons = [10, 24, 60, 120]
print("\n===== Random Forest =====")
for h in horizons:
train_model(h, 'random_forest')
print("\n===== Logistic Regression =====")
for h in horizons:
train_model(h, 'logistic')