File size: 2,795 Bytes

import pandas as pd
import numpy as np
import os


def load_data(ticker, data_dir='data'):

    print("=================================")
    print("Iniciando carregamento de dados")

    ticker_clean = ticker.replace('.', '_')
    filename = f"raw_{ticker_clean}.csv"
    path = os.path.join(data_dir, filename)

    print(f"Carregando arquivo: {path}")

    if not os.path.exists(path):
        raise FileNotFoundError(f"Arquivo não encontrado: {path}")

    df = pd.read_csv(path, index_col=0)

    print("Convertendo índice para datetime...")
    df.index = pd.to_datetime(df.index, errors='coerce')

    print("Removendo datas inválidas...")
    df = df[~df.index.isna()]

    if "Close" not in df.columns:
        print("Colunas disponíveis:", df.columns)
        raise ValueError("Coluna 'Close' não encontrada")

    print("Dataset carregado:", df.shape)

    return df


def create_features_and_target(df, horizon_days):

    print(f"Criando features para horizonte {horizon_days} dias")

    df = df.copy()

    close = df["Close"]

    # retornos passados
    for lag in [5, 10, 20, 50]:
        df[f"ret_{lag}d"] = close.pct_change(lag).shift(1)

    # médias móveis
    df["ma20"] = close.rolling(20).mean()
    df["ma50"] = close.rolling(50).mean()

    # relação preço/média
    df["close_ma20_ratio"] = close / df["ma20"]
    df["close_ma50_ratio"] = close / df["ma50"]

    # target futuro
    future_close = close.shift(-horizon_days)

    df["target"] = (future_close > close).astype(int)

    df.dropna(inplace=True)

    print("Features criadas:", df.shape)

    return df


def prepare_data_for_all_horizons(ticker, horizons=[30, 90, 180, 360]):

    print("=================================")
    print(f"Preparando dados para {ticker}")
    print("=================================")

    os.makedirs("data", exist_ok=True)

    df = load_data(ticker)

    print("Total de dados:", len(df))

    for h in horizons:

        print("---------------------------------")
        print(f"Horizonte: {h} dias")

        df_h = create_features_and_target(df, h)

        feature_cols = [
            col for col in df_h.columns
            if col not in [
                "target",
                "Close",
                "Open",
                "High",
                "Low",
                "Volume",
                "Adj Close"
            ]
        ]

        X = df_h[feature_cols]
        y = df_h["target"]

        X_path = f"data/features_{h}d.csv"
        y_path = f"data/target_{h}d.csv"

        X.to_csv(X_path)
        y.to_csv(y_path, header=["target"])

        print("Features salvas:", X_path)
        print("Target salvo:", y_path)
        print("Amostras:", X.shape[0])


if __name__ == "__main__":

    prepare_data_for_all_horizons("PETR4.SA")