import pandas as pd import numpy as np import os def load_data(ticker, data_dir='data'): print("=================================") print("Iniciando carregamento de dados") ticker_clean = ticker.replace('.', '_') filename = f"raw_{ticker_clean}.csv" path = os.path.join(data_dir, filename) print(f"Carregando arquivo: {path}") if not os.path.exists(path): raise FileNotFoundError(f"Arquivo não encontrado: {path}") df = pd.read_csv(path, index_col=0) print("Convertendo índice para datetime...") df.index = pd.to_datetime(df.index, errors='coerce') print("Removendo datas inválidas...") df = df[~df.index.isna()] if "Close" not in df.columns: print("Colunas disponíveis:", df.columns) raise ValueError("Coluna 'Close' não encontrada") print("Dataset carregado:", df.shape) return df def create_features_and_target(df, horizon_days): print(f"Criando features para horizonte {horizon_days} dias") df = df.copy() close = df["Close"] # retornos passados for lag in [5, 10, 20, 50]: df[f"ret_{lag}d"] = close.pct_change(lag).shift(1) # médias móveis df["ma20"] = close.rolling(20).mean() df["ma50"] = close.rolling(50).mean() # relação preço/média df["close_ma20_ratio"] = close / df["ma20"] df["close_ma50_ratio"] = close / df["ma50"] # target futuro future_close = close.shift(-horizon_days) df["target"] = (future_close > close).astype(int) df.dropna(inplace=True) print("Features criadas:", df.shape) return df def prepare_data_for_all_horizons(ticker, horizons=[30, 90, 180, 360]): print("=================================") print(f"Preparando dados para {ticker}") print("=================================") os.makedirs("data", exist_ok=True) df = load_data(ticker) print("Total de dados:", len(df)) for h in horizons: print("---------------------------------") print(f"Horizonte: {h} dias") df_h = create_features_and_target(df, h) feature_cols = [ col for col in df_h.columns if col not in [ "target", "Close", "Open", "High", "Low", "Volume", "Adj Close" ] ] X = df_h[feature_cols] y = df_h["target"] X_path = f"data/features_{h}d.csv" y_path = f"data/target_{h}d.csv" X.to_csv(X_path) y.to_csv(y_path, header=["target"]) print("Features salvas:", X_path) print("Target salvo:", y_path) print("Amostras:", X.shape[0]) if __name__ == "__main__": prepare_data_for_all_horizons("PETR4.SA")