AllStreet / src /feature_engineering.py
kauabarros-24
CHORE: Create simple web page
af171b5
import pandas as pd
import numpy as np
import os
def load_data(ticker, data_dir='data'):
print("=================================")
print("Iniciando carregamento de dados")
ticker_clean = ticker.replace('.', '_')
filename = f"raw_{ticker_clean}.csv"
path = os.path.join(data_dir, filename)
print(f"Carregando arquivo: {path}")
if not os.path.exists(path):
raise FileNotFoundError(f"Arquivo não encontrado: {path}")
df = pd.read_csv(path, index_col=0)
print("Convertendo índice para datetime...")
df.index = pd.to_datetime(df.index, errors='coerce')
print("Removendo datas inválidas...")
df = df[~df.index.isna()]
if "Close" not in df.columns:
print("Colunas disponíveis:", df.columns)
raise ValueError("Coluna 'Close' não encontrada")
print("Dataset carregado:", df.shape)
return df
def create_features_and_target(df, horizon_days):
print(f"Criando features para horizonte {horizon_days} dias")
df = df.copy()
close = df["Close"]
# retornos passados
for lag in [5, 10, 20, 50]:
df[f"ret_{lag}d"] = close.pct_change(lag).shift(1)
# médias móveis
df["ma20"] = close.rolling(20).mean()
df["ma50"] = close.rolling(50).mean()
# relação preço/média
df["close_ma20_ratio"] = close / df["ma20"]
df["close_ma50_ratio"] = close / df["ma50"]
# target futuro
future_close = close.shift(-horizon_days)
df["target"] = (future_close > close).astype(int)
df.dropna(inplace=True)
print("Features criadas:", df.shape)
return df
def prepare_data_for_all_horizons(ticker, horizons=[30, 90, 180, 360]):
print("=================================")
print(f"Preparando dados para {ticker}")
print("=================================")
os.makedirs("data", exist_ok=True)
df = load_data(ticker)
print("Total de dados:", len(df))
for h in horizons:
print("---------------------------------")
print(f"Horizonte: {h} dias")
df_h = create_features_and_target(df, h)
feature_cols = [
col for col in df_h.columns
if col not in [
"target",
"Close",
"Open",
"High",
"Low",
"Volume",
"Adj Close"
]
]
X = df_h[feature_cols]
y = df_h["target"]
X_path = f"data/features_{h}d.csv"
y_path = f"data/target_{h}d.csv"
X.to_csv(X_path)
y.to_csv(y_path, header=["target"])
print("Features salvas:", X_path)
print("Target salvo:", y_path)
print("Amostras:", X.shape[0])
if __name__ == "__main__":
prepare_data_for_all_horizons("PETR4.SA")