File size: 2,495 Bytes
57789e6
 
 
 
 
43056e4
651b3a9
af171b5
 
 
57789e6
 
af171b5
57789e6
43056e4
af171b5
43056e4
57789e6
af171b5
43056e4
57789e6
af171b5
 
 
 
57789e6
af171b5
 
 
 
 
 
 
43056e4
57789e6
af171b5
43056e4
af171b5
43056e4
af171b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43056e4
 
 
af171b5
 
 
 
 
 
 
43056e4
af171b5
 
 
 
 
57789e6
af171b5
43056e4
af171b5
43056e4
af171b5
 
 
 
 
 
 
43056e4
af171b5
43056e4
af171b5
 
 
 
 
 
 
 
 
 
43056e4
 
af171b5
43056e4
af171b5
 
 
43056e4
 
af171b5
 
 
43056e4
af171b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import os
import logging



def load_features_target(horizon):
    print(f"{horizon} - HORIZON")
    X = pd.read_csv(f"data/features_{horizon}m.csv", index_col=0)
    y = pd.read_csv(f"data/target_{horizon}m.csv", index_col=0).squeeze()
    print(f"Tô aqui: {X, y}")
    return X, y


def temporal_train_split(X, y, test_size=0.2):
    n = len(X)

    if n == 0:
        return None, None, None, None

    split_idx = int(n * (1 - test_size))

    X_train = X.iloc[:split_idx]
    X_test = X.iloc[split_idx:]

    y_train = y.iloc[:split_idx]
    y_test = y.iloc[split_idx:]

    return X_train, X_test, y_train, y_test


def train_model(horizon, model_type='random_forest', save_model=True):

    X, y = load_features_target(horizon)

    print(f"\n========== Horizonte {horizon} ==========")
    print("Shape X:", X.shape)
    print("Shape y:", y.shape)

    if len(X) == 0:
        print("❌ Dataset vazio. Pulando treino.")
        return None

    X_train, X_test, y_train, y_test = temporal_train_split(X, y)

    if X_train is None or len(X_train) == 0:
        print("❌ Sem dados de treino.")
        return None

    if model_type == 'logistic':
        model = LogisticRegression(max_iter=1000, random_state=42)
    else:
        model = RandomForestClassifier(
            n_estimators=100,
            random_state=42
        )

    print("Treinando modelo...")

    model.fit(X_train, y_train)

    if len(X_test) == 0:
        print("⚠️ Sem dados de teste. Modelo treinado mas sem avaliação.")
        return model

    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)

    print(f"Acurácia: {acc:.4f}")

    report = classification_report(
        y_test,
        y_pred,
        target_names=['Queda', 'Sobe']
    )

    print(report)

    if save_model:
        os.makedirs("models", exist_ok=True)

        path = f"models/{model_type}_{horizon}m.pkl"

        joblib.dump(model, path)

        print("Modelo salvo em:", path)

    return model


def train_all_horizons():

    horizons = [10, 24, 60, 120]

    print("\n===== Random Forest =====")

    for h in horizons:
        train_model(h, 'random_forest')

    print("\n===== Logistic Regression =====")

    for h in horizons:
        train_model(h, 'logistic')