|
|
|
|
|
import os |
|
|
import sys |
|
|
import json |
|
|
import torch |
|
|
import torch.nn as nn |
|
|
import torch.optim as optim |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from tqdm import tqdm |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from torch.utils.data import Dataset, DataLoader |
|
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
|
|
try: |
|
|
from src.logger import get_logger |
|
|
logger = get_logger(__name__) |
|
|
except Exception: |
|
|
import logging |
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class LatentTickerDataset(Dataset): |
|
|
def __init__(self, latent_path, ticker_path): |
|
|
self.latents = np.load(latent_path) |
|
|
self.tickers = np.load(ticker_path) |
|
|
assert self.latents.shape[0] == self.tickers.shape[0], "Latents and tickers length mismatch" |
|
|
|
|
|
def __len__(self): |
|
|
return self.latents.shape[0] |
|
|
|
|
|
def __getitem__(self, idx): |
|
|
x = self.latents[idx].astype(np.float32) |
|
|
y = int(self.tickers[idx]) |
|
|
return x, y |
|
|
|
|
|
class ConditionalGenerator(nn.Module): |
|
|
def __init__(self, noise_dim, embed_dim, num_tickers, latent_dim, hidden_dim=128): |
|
|
super().__init__() |
|
|
self.ticker_emb = nn.Embedding(num_tickers, embed_dim) |
|
|
input_dim = noise_dim + embed_dim |
|
|
self.net = nn.Sequential( |
|
|
nn.Linear(input_dim, hidden_dim), |
|
|
nn.LeakyReLU(0.2), |
|
|
nn.Linear(hidden_dim, hidden_dim), |
|
|
nn.LeakyReLU(0.2), |
|
|
nn.Linear(hidden_dim, latent_dim) |
|
|
) |
|
|
|
|
|
def forward(self, z, ticker_ids): |
|
|
emb = self.ticker_emb(ticker_ids) |
|
|
x = torch.cat([z, emb], dim=1) |
|
|
return self.net(x) |
|
|
|
|
|
class ConditionalDiscriminator(nn.Module): |
|
|
def __init__(self, latent_dim, embed_dim, num_tickers, hidden_dim=128): |
|
|
super().__init__() |
|
|
self.ticker_emb = nn.Embedding(num_tickers, embed_dim) |
|
|
input_dim = latent_dim + embed_dim |
|
|
self.net = nn.Sequential( |
|
|
nn.Linear(input_dim, hidden_dim), |
|
|
nn.LeakyReLU(0.2), |
|
|
nn.Linear(hidden_dim, hidden_dim), |
|
|
nn.LeakyReLU(0.2), |
|
|
nn.Linear(hidden_dim, 1) |
|
|
) |
|
|
|
|
|
def forward(self, x, ticker_ids): |
|
|
emb = self.ticker_emb(ticker_ids) |
|
|
x_cat = torch.cat([x, emb], dim=1) |
|
|
return self.net(x_cat) |
|
|
|
|
|
def gradient_penalty_cond(D, real, fake, ticker_ids, device): |
|
|
"""Compute gradient penalty for conditional discriminator D(x, ticker_ids).""" |
|
|
batch_size = real.size(0) |
|
|
alpha = torch.rand(batch_size, 1).to(device) |
|
|
interpolates = (alpha * real + (1 - alpha) * fake).requires_grad_(True) |
|
|
d_interpolates = D(interpolates, ticker_ids) |
|
|
grad_outputs = torch.ones_like(d_interpolates).to(device) |
|
|
gradients = torch.autograd.grad( |
|
|
outputs=d_interpolates, |
|
|
inputs=interpolates, |
|
|
grad_outputs=grad_outputs, |
|
|
create_graph=True, |
|
|
retain_graph=True, |
|
|
only_inputs=True |
|
|
)[0] |
|
|
gradients = gradients.view(batch_size, -1) |
|
|
gp = ((gradients.norm(2, dim=1) - 1) ** 2).mean() |
|
|
return gp |
|
|
|
|
|
if __name__ == "__main__": |
|
|
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
|
latent_path = os.path.join(base_dir, "data", "processed", "latent_vectors.npy") |
|
|
ticker_path = os.path.join(base_dir, "data", "processed", "sequence_tickers.npy") |
|
|
models_dir = os.path.join(base_dir, "models") |
|
|
resources_dir = os.path.join(base_dir, "resources") |
|
|
os.makedirs(models_dir, exist_ok=True) |
|
|
os.makedirs(resources_dir, exist_ok=True) |
|
|
|
|
|
logger.info("Loading latent vectors from: %s", latent_path) |
|
|
latent_vectors = np.load(latent_path) |
|
|
logger.info("Loaded latent vectors shape: %s", latent_vectors.shape) |
|
|
|
|
|
logger.info("Loading sequence ticker IDs from: %s", ticker_path) |
|
|
sequence_tickers = np.load(ticker_path) |
|
|
logger.info("Loaded ticker IDs shape: %s", sequence_tickers.shape) |
|
|
|
|
|
scaler = StandardScaler() |
|
|
latent_scaled = scaler.fit_transform(latent_vectors) |
|
|
scaler_save = {"mean": scaler.mean_.tolist(), "scale": scaler.scale_.tolist()} |
|
|
np.save(os.path.join(resources_dir, "latent_scaler.npy"), scaler_save) |
|
|
logger.info("Saved latent scaler params to resources.") |
|
|
|
|
|
dataset = LatentTickerDataset(latent_path, ticker_path) |
|
|
dataset.latents = latent_scaled |
|
|
batch_size = 256 |
|
|
loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=True, num_workers=2) |
|
|
|
|
|
noise_dim = 64 |
|
|
hidden_dim = 128 |
|
|
n_epochs = 300 |
|
|
lr = 1e-4 |
|
|
lambda_gp = 10 |
|
|
n_critic = 5 |
|
|
embed_dim = 16 |
|
|
|
|
|
latent_dim = latent_scaled.shape[1] |
|
|
num_tickers = int(sequence_tickers.max()) + 1 |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
G = ConditionalGenerator(noise_dim=noise_dim, embed_dim=embed_dim, |
|
|
num_tickers=num_tickers, latent_dim=latent_dim, |
|
|
hidden_dim=hidden_dim).to(device) |
|
|
D = ConditionalDiscriminator(latent_dim=latent_dim, embed_dim=embed_dim, |
|
|
num_tickers=num_tickers, hidden_dim=hidden_dim).to(device) |
|
|
|
|
|
opt_G = optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.9)) |
|
|
opt_D = optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.9)) |
|
|
|
|
|
losses = {"epoch": [], "D_loss": [], "G_loss": []} |
|
|
|
|
|
logger.info("Starting Conditional WGAN-GP training...") |
|
|
for epoch in range(n_epochs): |
|
|
D_losses_epoch = [] |
|
|
G_losses_epoch = [] |
|
|
|
|
|
for real_batch, tickers_batch in tqdm(loader, desc=f"Epoch {epoch+1}/{n_epochs}", leave=False): |
|
|
real = real_batch.to(device) |
|
|
tickers = tickers_batch.to(device).long() |
|
|
bsize = real.size(0) |
|
|
|
|
|
for _ in range(n_critic): |
|
|
z = torch.randn(bsize, noise_dim).to(device) |
|
|
fake = G(z, tickers) |
|
|
|
|
|
d_real = D(real, tickers) |
|
|
d_fake = D(fake.detach(), tickers) |
|
|
|
|
|
gp = gradient_penalty_cond(D, real, fake.detach(), tickers, device) |
|
|
d_loss = -(d_real.mean() - d_fake.mean()) + lambda_gp * gp |
|
|
|
|
|
opt_D.zero_grad() |
|
|
d_loss.backward() |
|
|
opt_D.step() |
|
|
|
|
|
z = torch.randn(bsize, noise_dim).to(device) |
|
|
fake = G(z, tickers) |
|
|
g_loss = -D(fake, tickers).mean() |
|
|
|
|
|
opt_G.zero_grad() |
|
|
g_loss.backward() |
|
|
opt_G.step() |
|
|
|
|
|
D_losses_epoch.append(d_loss.item()) |
|
|
G_losses_epoch.append(g_loss.item()) |
|
|
|
|
|
mean_D = float(np.mean(D_losses_epoch)) if len(D_losses_epoch) else 0.0 |
|
|
mean_G = float(np.mean(G_losses_epoch)) if len(G_losses_epoch) else 0.0 |
|
|
|
|
|
losses["epoch"].append(epoch + 1) |
|
|
losses["D_loss"].append(mean_D) |
|
|
losses["G_loss"].append(mean_G) |
|
|
|
|
|
logger.info(f"[{epoch+1}/{n_epochs}] D_loss={mean_D:.4f}, G_loss={mean_G:.4f}") |
|
|
|
|
|
|
|
|
losses_df = pd.DataFrame(losses) |
|
|
losses_csv_path = os.path.join(resources_dir, "latent_gan_losses.csv") |
|
|
losses_df.to_csv(losses_csv_path, index=False) |
|
|
logger.info("Saved training losses to %s", losses_csv_path) |
|
|
|
|
|
torch.save(G.state_dict(), os.path.join(models_dir, "latent_gan_generator_conditional.pth")) |
|
|
torch.save(D.state_dict(), os.path.join(models_dir, "latent_gan_discriminator_conditional.pth")) |
|
|
logger.info("Saved GAN models to models/") |
|
|
|
|
|
with open(os.path.join(resources_dir, "gan_config.json"), "w") as f: |
|
|
json.dump({ |
|
|
"model": "WGAN-GP-conditional", |
|
|
"noise_dim": noise_dim, |
|
|
"latent_dim": latent_dim, |
|
|
"hidden_dim": hidden_dim, |
|
|
"epochs": n_epochs, |
|
|
"batch_size": batch_size, |
|
|
"lr": lr, |
|
|
"lambda_gp": lambda_gp, |
|
|
"n_critic": n_critic, |
|
|
"embed_dim": embed_dim, |
|
|
"num_tickers": num_tickers |
|
|
}, f, indent=4) |
|
|
|
|
|
logger.info("Saved GAN config to resources/gan_config.json") |
|
|
logger.info("Training completed successfully.") |
|
|
|