| | import pandas as pd |
| | import numpy as np |
| | import os |
| |
|
| |
|
| | def load_data(ticker, data_dir='data'): |
| |
|
| | print("=================================") |
| | print("Iniciando carregamento de dados") |
| |
|
| | ticker_clean = ticker.replace('.', '_') |
| | filename = f"raw_{ticker_clean}.csv" |
| | path = os.path.join(data_dir, filename) |
| |
|
| | print(f"Carregando arquivo: {path}") |
| |
|
| | if not os.path.exists(path): |
| | raise FileNotFoundError(f"Arquivo não encontrado: {path}") |
| |
|
| | df = pd.read_csv(path, index_col=0) |
| |
|
| | print("Convertendo índice para datetime...") |
| | df.index = pd.to_datetime(df.index, errors='coerce') |
| |
|
| | print("Removendo datas inválidas...") |
| | df = df[~df.index.isna()] |
| |
|
| | if "Close" not in df.columns: |
| | print("Colunas disponíveis:", df.columns) |
| | raise ValueError("Coluna 'Close' não encontrada") |
| |
|
| | print("Dataset carregado:", df.shape) |
| |
|
| | return df |
| |
|
| |
|
| | def create_features_and_target(df, horizon_days): |
| |
|
| | print(f"Criando features para horizonte {horizon_days} dias") |
| |
|
| | df = df.copy() |
| |
|
| | close = df["Close"] |
| |
|
| | |
| | for lag in [5, 10, 20, 50]: |
| | df[f"ret_{lag}d"] = close.pct_change(lag).shift(1) |
| |
|
| | |
| | df["ma20"] = close.rolling(20).mean() |
| | df["ma50"] = close.rolling(50).mean() |
| |
|
| | |
| | df["close_ma20_ratio"] = close / df["ma20"] |
| | df["close_ma50_ratio"] = close / df["ma50"] |
| |
|
| | |
| | future_close = close.shift(-horizon_days) |
| |
|
| | df["target"] = (future_close > close).astype(int) |
| |
|
| | df.dropna(inplace=True) |
| |
|
| | print("Features criadas:", df.shape) |
| |
|
| | return df |
| |
|
| |
|
| | def prepare_data_for_all_horizons(ticker, horizons=[30, 90, 180, 360]): |
| |
|
| | print("=================================") |
| | print(f"Preparando dados para {ticker}") |
| | print("=================================") |
| |
|
| | os.makedirs("data", exist_ok=True) |
| |
|
| | df = load_data(ticker) |
| |
|
| | print("Total de dados:", len(df)) |
| |
|
| | for h in horizons: |
| |
|
| | print("---------------------------------") |
| | print(f"Horizonte: {h} dias") |
| |
|
| | df_h = create_features_and_target(df, h) |
| |
|
| | feature_cols = [ |
| | col for col in df_h.columns |
| | if col not in [ |
| | "target", |
| | "Close", |
| | "Open", |
| | "High", |
| | "Low", |
| | "Volume", |
| | "Adj Close" |
| | ] |
| | ] |
| |
|
| | X = df_h[feature_cols] |
| | y = df_h["target"] |
| |
|
| | X_path = f"data/features_{h}d.csv" |
| | y_path = f"data/target_{h}d.csv" |
| |
|
| | X.to_csv(X_path) |
| | y.to_csv(y_path, header=["target"]) |
| |
|
| | print("Features salvas:", X_path) |
| | print("Target salvo:", y_path) |
| | print("Amostras:", X.shape[0]) |
| |
|
| |
|
| | if __name__ == "__main__": |
| |
|
| | prepare_data_for_all_horizons("PETR4.SA") |