File size: 2,795 Bytes
57789e6 af171b5 43056e4 af171b5 57789e6 af171b5 43056e4 af171b5 57789e6 af171b5 57789e6 af171b5 57789e6 af171b5 57789e6 af171b5 57789e6 af171b5 57789e6 af171b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | import pandas as pd
import numpy as np
import os
def load_data(ticker, data_dir='data'):
print("=================================")
print("Iniciando carregamento de dados")
ticker_clean = ticker.replace('.', '_')
filename = f"raw_{ticker_clean}.csv"
path = os.path.join(data_dir, filename)
print(f"Carregando arquivo: {path}")
if not os.path.exists(path):
raise FileNotFoundError(f"Arquivo não encontrado: {path}")
df = pd.read_csv(path, index_col=0)
print("Convertendo índice para datetime...")
df.index = pd.to_datetime(df.index, errors='coerce')
print("Removendo datas inválidas...")
df = df[~df.index.isna()]
if "Close" not in df.columns:
print("Colunas disponíveis:", df.columns)
raise ValueError("Coluna 'Close' não encontrada")
print("Dataset carregado:", df.shape)
return df
def create_features_and_target(df, horizon_days):
print(f"Criando features para horizonte {horizon_days} dias")
df = df.copy()
close = df["Close"]
# retornos passados
for lag in [5, 10, 20, 50]:
df[f"ret_{lag}d"] = close.pct_change(lag).shift(1)
# médias móveis
df["ma20"] = close.rolling(20).mean()
df["ma50"] = close.rolling(50).mean()
# relação preço/média
df["close_ma20_ratio"] = close / df["ma20"]
df["close_ma50_ratio"] = close / df["ma50"]
# target futuro
future_close = close.shift(-horizon_days)
df["target"] = (future_close > close).astype(int)
df.dropna(inplace=True)
print("Features criadas:", df.shape)
return df
def prepare_data_for_all_horizons(ticker, horizons=[30, 90, 180, 360]):
print("=================================")
print(f"Preparando dados para {ticker}")
print("=================================")
os.makedirs("data", exist_ok=True)
df = load_data(ticker)
print("Total de dados:", len(df))
for h in horizons:
print("---------------------------------")
print(f"Horizonte: {h} dias")
df_h = create_features_and_target(df, h)
feature_cols = [
col for col in df_h.columns
if col not in [
"target",
"Close",
"Open",
"High",
"Low",
"Volume",
"Adj Close"
]
]
X = df_h[feature_cols]
y = df_h["target"]
X_path = f"data/features_{h}d.csv"
y_path = f"data/target_{h}d.csv"
X.to_csv(X_path)
y.to_csv(y_path, header=["target"])
print("Features salvas:", X_path)
print("Target salvo:", y_path)
print("Amostras:", X.shape[0])
if __name__ == "__main__":
prepare_data_for_all_horizons("PETR4.SA") |