|
|
import os
|
|
|
from typing import Tuple, List
|
|
|
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import torch
|
|
|
from torch import nn
|
|
|
from torch_geometric.data import Data, DataLoader
|
|
|
from torch_geometric.nn import GCNConv
|
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
|
|
import matplotlib.pyplot as plt
|
|
|
import datetime as dt
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_it_sector_data_from_csvs(
|
|
|
infy_csv: str,
|
|
|
tcs_csv: str,
|
|
|
nifty_it_csv: str,
|
|
|
) -> Tuple[np.ndarray, np.ndarray, List[pd.Timestamp], List[str]]:
|
|
|
"""Load IT sector data from separate CSV files and build cleaned feature + target tensors.
|
|
|
|
|
|
Methodology alignment
|
|
|
---------------------
|
|
|
- Data Collection: uses OHLCV-style fields from the NSE IT sector file.
|
|
|
- Preprocessing / Cleaning:
|
|
|
* Parse dates and sort.
|
|
|
* Filter to equity series (EQ).
|
|
|
* Remove duplicates and rows with missing / invalid key values.
|
|
|
* Filter out non-trading days (zero / negative volume).
|
|
|
* Forward-fill remaining gaps.
|
|
|
- Derived Indicators:
|
|
|
* Daily returns.
|
|
|
* 5-day moving average of close.
|
|
|
* 20-day rolling volatility of returns.
|
|
|
|
|
|
Returns
|
|
|
-------
|
|
|
features : np.ndarray
|
|
|
Shape [num_dates, num_companies, num_features].
|
|
|
Features per company per day include normalized price/volume and indicators.
|
|
|
targets : np.ndarray
|
|
|
Shape [num_dates, num_companies]. Daily returns per company (prediction target).
|
|
|
dates : list of pd.Timestamp
|
|
|
Trading dates.
|
|
|
companies : list of str
|
|
|
List of company tickers (node names in the graph).
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
infy_df = pd.read_csv(infy_csv)
|
|
|
tcs_df = pd.read_csv(tcs_csv)
|
|
|
index_df = pd.read_csv(nifty_it_csv)
|
|
|
|
|
|
|
|
|
infy_df["Company"] = "INFY"
|
|
|
tcs_df["Company"] = "TCS"
|
|
|
index_df["Company"] = "NIFTY_IT"
|
|
|
|
|
|
|
|
|
|
|
|
for df in [infy_df, tcs_df, index_df]:
|
|
|
df["Date"] = pd.to_datetime(df["Date"])
|
|
|
|
|
|
|
|
|
if "Series" not in index_df.columns:
|
|
|
index_df["Series"] = "EQ"
|
|
|
if "Close" not in index_df.columns and "Close" in index_df.columns:
|
|
|
|
|
|
pass
|
|
|
if "Volume" not in index_df.columns and "Volume" in index_df.columns:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
common_cols = [
|
|
|
"Date",
|
|
|
"Company",
|
|
|
"Series",
|
|
|
"Open",
|
|
|
"High",
|
|
|
"Low",
|
|
|
"Close",
|
|
|
"Volume",
|
|
|
]
|
|
|
|
|
|
|
|
|
for stock_df in [infy_df, tcs_df]:
|
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
index_df = index_df[["Date", "Open", "High", "Low", "Close", "Volume", "Company", "Series"]]
|
|
|
|
|
|
|
|
|
index_df = index_df[["Date", "Company", "Series", "Open", "High", "Low", "Close", "Volume"]]
|
|
|
|
|
|
|
|
|
infy_df = infy_df[["Date", "Company", "Series", "Open", "High", "Low", "Close", "Volume"]]
|
|
|
tcs_df = tcs_df[["Date", "Company", "Series", "Open", "High", "Low", "Close", "Volume"]]
|
|
|
|
|
|
|
|
|
df = pd.concat([infy_df, tcs_df, index_df], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df["Date"] = pd.to_datetime(df["Date"])
|
|
|
|
|
|
|
|
|
if "Series" in df.columns:
|
|
|
df = df[df["Series"] == "EQ"]
|
|
|
|
|
|
|
|
|
df = df.dropna(subset=["Company", "Close", "Volume", "Open", "High", "Low"])
|
|
|
|
|
|
|
|
|
df = df[df["Volume"] > 0]
|
|
|
|
|
|
|
|
|
df = df.drop_duplicates(subset=["Date", "Company"])
|
|
|
|
|
|
|
|
|
df = df.sort_values(["Date", "Company"])
|
|
|
|
|
|
|
|
|
companies = sorted(df["Company"].unique().tolist())
|
|
|
|
|
|
|
|
|
close = df.pivot_table(index="Date", columns="Company", values="Close")
|
|
|
volume = df.pivot_table(index="Date", columns="Company", values="Volume")
|
|
|
|
|
|
|
|
|
close = close[companies]
|
|
|
volume = volume[companies]
|
|
|
|
|
|
|
|
|
close = close.ffill()
|
|
|
volume = volume.ffill()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
returns = close.pct_change().replace([np.inf, -np.inf], np.nan).fillna(0.0)
|
|
|
|
|
|
|
|
|
ma5 = close.rolling(window=5, min_periods=1).mean().ffill()
|
|
|
|
|
|
|
|
|
vol20 = (
|
|
|
returns.rolling(window=20, min_periods=1)
|
|
|
.std()
|
|
|
.replace([np.inf, -np.inf], np.nan)
|
|
|
.fillna(0.0)
|
|
|
.ffill()
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scaler_close = StandardScaler()
|
|
|
scaler_vol = StandardScaler()
|
|
|
scaler_ma5 = StandardScaler()
|
|
|
scaler_vol20 = StandardScaler()
|
|
|
|
|
|
close_scaled = pd.DataFrame(
|
|
|
scaler_close.fit_transform(close.values),
|
|
|
index=close.index,
|
|
|
columns=close.columns,
|
|
|
)
|
|
|
volume_scaled = pd.DataFrame(
|
|
|
scaler_vol.fit_transform(volume.values),
|
|
|
index=volume.index,
|
|
|
columns=volume.columns,
|
|
|
)
|
|
|
ma5_scaled = pd.DataFrame(
|
|
|
scaler_ma5.fit_transform(ma5.values),
|
|
|
index=ma5.index,
|
|
|
columns=ma5.columns,
|
|
|
)
|
|
|
vol20_scaled = pd.DataFrame(
|
|
|
scaler_vol20.fit_transform(vol20.values),
|
|
|
index=vol20.index,
|
|
|
columns=vol20.columns,
|
|
|
)
|
|
|
|
|
|
dates = close.index.to_list()
|
|
|
num_dates = len(dates)
|
|
|
num_companies = len(companies)
|
|
|
|
|
|
|
|
|
|
|
|
num_features = 5
|
|
|
features = np.zeros((num_dates, num_companies, num_features), dtype=np.float32)
|
|
|
|
|
|
for j, c in enumerate(companies):
|
|
|
features[:, j, 0] = close_scaled[c].values
|
|
|
features[:, j, 1] = volume_scaled[c].values
|
|
|
features[:, j, 2] = returns[c].values
|
|
|
features[:, j, 3] = ma5_scaled[c].values
|
|
|
features[:, j, 4] = vol20_scaled[c].values
|
|
|
|
|
|
targets = returns.values.astype(np.float32)
|
|
|
|
|
|
return features, targets, dates, companies
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_correlation_graph(returns: np.ndarray, threshold: float = 0.2) -> torch.Tensor:
|
|
|
"""Build an undirected graph of companies based on return correlations.
|
|
|
|
|
|
Parameters
|
|
|
----------
|
|
|
returns : np.ndarray
|
|
|
Array of shape [num_dates, num_companies] with daily returns.
|
|
|
threshold : float
|
|
|
Minimum absolute correlation to create an edge.
|
|
|
|
|
|
Returns
|
|
|
-------
|
|
|
edge_index : torch.Tensor
|
|
|
Tensor of shape [2, num_edges] in COO format for PyTorch Geometric.
|
|
|
"""
|
|
|
|
|
|
corr = np.corrcoef(returns.T)
|
|
|
num_nodes = corr.shape[0]
|
|
|
|
|
|
edge_index_list = []
|
|
|
for i in range(num_nodes):
|
|
|
for j in range(num_nodes):
|
|
|
if i == j:
|
|
|
continue
|
|
|
if np.abs(corr[i, j]) >= threshold:
|
|
|
edge_index_list.append([i, j])
|
|
|
|
|
|
|
|
|
if len(edge_index_list) == 0:
|
|
|
for i in range(num_nodes):
|
|
|
for j in range(num_nodes):
|
|
|
if i != j:
|
|
|
edge_index_list.append([i, j])
|
|
|
|
|
|
edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous()
|
|
|
return edge_index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TimeSeriesGraphDataset(torch.utils.data.Dataset):
|
|
|
"""Dataset that converts time series into windowed graph snapshots for GNNs.
|
|
|
|
|
|
Each item is a Data object with:
|
|
|
- x: node features [num_nodes, window_size * num_features]
|
|
|
- edge_index: static company correlation graph
|
|
|
- y: target returns [num_nodes]
|
|
|
"""
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
features: np.ndarray,
|
|
|
targets: np.ndarray,
|
|
|
edge_index: torch.Tensor,
|
|
|
window_size: int,
|
|
|
start_t: int,
|
|
|
end_t: int,
|
|
|
) -> None:
|
|
|
super().__init__()
|
|
|
self.features = features
|
|
|
self.targets = targets
|
|
|
self.edge_index = edge_index
|
|
|
self.window_size = window_size
|
|
|
self.start_t = start_t
|
|
|
self.end_t = end_t
|
|
|
|
|
|
def __len__(self) -> int:
|
|
|
return self.end_t - self.start_t
|
|
|
|
|
|
def __getitem__(self, idx: int) -> Data:
|
|
|
t = self.start_t + idx
|
|
|
|
|
|
window_feats = self.features[t - self.window_size : t]
|
|
|
window, num_nodes, num_feat = window_feats.shape
|
|
|
|
|
|
|
|
|
|
|
|
x_seq = window_feats.transpose(1, 0, 2)
|
|
|
y = self.targets[t]
|
|
|
|
|
|
data = Data(
|
|
|
x=torch.from_numpy(x_seq),
|
|
|
edge_index=self.edge_index,
|
|
|
y=torch.from_numpy(y),
|
|
|
)
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GNNTimeSeriesModel(nn.Module):
|
|
|
"""LSTM + GCN hybrid for multi-node time-series regression.
|
|
|
|
|
|
Methodology alignment
|
|
|
---------------------
|
|
|
- Temporal Feature Extraction: shared LSTM encodes each stock's past W days.
|
|
|
- GNN Application: GCN layers propagate information over the inter-stock graph.
|
|
|
- Prediction: per-node regression head outputs next-day return.
|
|
|
"""
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
window_size: int,
|
|
|
num_features: int,
|
|
|
hidden_lstm: int = 64,
|
|
|
hidden_gnn: int = 64,
|
|
|
dropout: float = 0.2,
|
|
|
) -> None:
|
|
|
super().__init__()
|
|
|
self.window_size = window_size
|
|
|
self.num_features = num_features
|
|
|
|
|
|
|
|
|
self.lstm = nn.LSTM(
|
|
|
input_size=num_features,
|
|
|
hidden_size=hidden_lstm,
|
|
|
num_layers=1,
|
|
|
batch_first=False,
|
|
|
)
|
|
|
|
|
|
|
|
|
self.conv1 = GCNConv(hidden_lstm, hidden_gnn)
|
|
|
self.conv2 = GCNConv(hidden_gnn, hidden_gnn)
|
|
|
self.lin = nn.Linear(hidden_gnn, 1)
|
|
|
self.dropout = nn.Dropout(dropout)
|
|
|
|
|
|
def forward(self, x: torch.Tensor, edge_index: torch.Tensor) -> torch.Tensor:
|
|
|
"""Forward pass.
|
|
|
|
|
|
Parameters
|
|
|
----------
|
|
|
x : torch.Tensor
|
|
|
Shape [num_nodes_total_in_batch, window, num_features].
|
|
|
edge_index : torch.Tensor
|
|
|
Graph edges for the batched graph.
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
num_nodes_total, window, num_feat = x.shape
|
|
|
assert (
|
|
|
window == self.window_size and num_feat == self.num_features
|
|
|
), "Input window/feature dims do not match model configuration."
|
|
|
|
|
|
|
|
|
x_seq = x.permute(1, 0, 2)
|
|
|
_, (h_n, _) = self.lstm(x_seq)
|
|
|
|
|
|
|
|
|
h_last = h_n[-1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
x_g = self.conv1(h_last, edge_index)
|
|
|
x_g = torch.relu(x_g)
|
|
|
x_g = self.dropout(x_g)
|
|
|
|
|
|
x_g = self.conv2(x_g, edge_index)
|
|
|
x_g = torch.relu(x_g)
|
|
|
x_g = self.dropout(x_g)
|
|
|
|
|
|
out = self.lin(x_g).squeeze(-1)
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def train_one_epoch(
|
|
|
model: nn.Module,
|
|
|
loader: DataLoader,
|
|
|
optimizer: torch.optim.Optimizer,
|
|
|
device: torch.device,
|
|
|
) -> float:
|
|
|
model.train()
|
|
|
criterion = nn.MSELoss()
|
|
|
total_loss = 0.0
|
|
|
|
|
|
for batch in loader:
|
|
|
batch = batch.to(device)
|
|
|
optimizer.zero_grad()
|
|
|
out = model(batch.x, batch.edge_index)
|
|
|
loss = criterion(out, batch.y)
|
|
|
loss.backward()
|
|
|
optimizer.step()
|
|
|
total_loss += loss.item() * batch.num_graphs
|
|
|
|
|
|
avg_loss = total_loss / len(loader.dataset)
|
|
|
return avg_loss
|
|
|
|
|
|
|
|
|
def evaluate(
|
|
|
model: nn.Module,
|
|
|
loader: DataLoader,
|
|
|
device: torch.device,
|
|
|
):
|
|
|
model.eval()
|
|
|
criterion = nn.MSELoss()
|
|
|
total_loss = 0.0
|
|
|
all_y_true = []
|
|
|
all_y_pred = []
|
|
|
|
|
|
with torch.no_grad():
|
|
|
for batch in loader:
|
|
|
batch = batch.to(device)
|
|
|
out = model(batch.x, batch.edge_index)
|
|
|
loss = criterion(out, batch.y)
|
|
|
total_loss += loss.item() * batch.num_graphs
|
|
|
all_y_true.append(batch.y.cpu().numpy())
|
|
|
all_y_pred.append(out.cpu().numpy())
|
|
|
|
|
|
y_true = np.concatenate(all_y_true)
|
|
|
y_pred = np.concatenate(all_y_pred)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mask = np.isfinite(y_true) & np.isfinite(y_pred)
|
|
|
if mask.sum() == 0:
|
|
|
|
|
|
mse = float("nan")
|
|
|
mae = float("nan")
|
|
|
directional_accuracy = float("nan")
|
|
|
avg_loss = total_loss / max(len(loader.dataset), 1)
|
|
|
return avg_loss, mse, mae, directional_accuracy, y_true, y_pred
|
|
|
|
|
|
y_true_clean = y_true[mask]
|
|
|
y_pred_clean = y_pred[mask]
|
|
|
|
|
|
mse = mean_squared_error(y_true_clean, y_pred_clean)
|
|
|
mae = mean_absolute_error(y_true_clean, y_pred_clean)
|
|
|
|
|
|
directional_accuracy = float((np.sign(y_true_clean) == np.sign(y_pred_clean)).mean())
|
|
|
|
|
|
avg_loss = total_loss / len(loader.dataset)
|
|
|
return avg_loss, mse, mae, directional_accuracy, y_true_clean, y_pred_clean
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compute_naive_baseline_metrics(targets: np.ndarray, train_start: int, train_end: int, val_start: int, val_end: int, test_start: int, test_end: int):
|
|
|
"""Compute a simple baseline: predict zero return (no change) and plot vs actual.
|
|
|
|
|
|
This represents a "before GNN" naive model where we assume next-day return = 0.
|
|
|
"""
|
|
|
|
|
|
y_train = targets[train_start:train_end].reshape(-1)
|
|
|
y_val = targets[val_start:val_end].reshape(-1)
|
|
|
y_test = targets[test_start:test_end].reshape(-1)
|
|
|
|
|
|
|
|
|
y_train_pred = np.zeros_like(y_train)
|
|
|
y_val_pred = np.zeros_like(y_val)
|
|
|
y_test_pred = np.zeros_like(y_test)
|
|
|
|
|
|
train_mse = mean_squared_error(y_train, y_train_pred)
|
|
|
val_mse = mean_squared_error(y_val, y_val_pred)
|
|
|
test_mse = mean_squared_error(y_test, y_test_pred)
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(6, 6))
|
|
|
plt.scatter(y_test, y_test_pred, alpha=0.3, s=10)
|
|
|
plt.xlabel("Actual returns")
|
|
|
plt.ylabel("Predicted returns (baseline: 0)")
|
|
|
plt.title("Baseline (No GNN) Predicted vs Actual Returns")
|
|
|
lims = [min(y_test.min(), y_test_pred.min()), max(y_test.max(), y_test_pred.max())]
|
|
|
plt.plot(lims, lims, "r--", linewidth=1)
|
|
|
plt.tight_layout()
|
|
|
plt.savefig("baseline_pred_vs_actual.png", dpi=200)
|
|
|
plt.close()
|
|
|
|
|
|
print(f"Baseline Train MSE: {train_mse:.6f}, Val MSE: {val_mse:.6f}, Test MSE: {test_mse:.6f}")
|
|
|
print("Saved baseline scatter plot to baseline_pred_vs_actual.png")
|
|
|
|
|
|
|
|
|
def realtime_predict_last_window(
|
|
|
model: nn.Module,
|
|
|
features: np.ndarray,
|
|
|
edge_index: torch.Tensor,
|
|
|
window_size: int,
|
|
|
device: torch.device,
|
|
|
):
|
|
|
"""Generate a real-time style prediction for the latest available day.
|
|
|
|
|
|
This uses the most recent `window_size` days in `features` as if it were "live" data.
|
|
|
"""
|
|
|
model.eval()
|
|
|
num_dates, num_nodes, num_feat = features.shape
|
|
|
if num_dates < window_size:
|
|
|
raise ValueError("Not enough data points for real-time window prediction.")
|
|
|
|
|
|
|
|
|
window_feats = features[num_dates - window_size : num_dates]
|
|
|
window, N, F = window_feats.shape
|
|
|
x_seq = window_feats.transpose(1, 0, 2)
|
|
|
|
|
|
data = Data(
|
|
|
x=torch.from_numpy(x_seq).to(device),
|
|
|
edge_index=edge_index.to(device),
|
|
|
)
|
|
|
|
|
|
with torch.no_grad():
|
|
|
out = model(data.x, data.edge_index).cpu().numpy()
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
infy_csv = "infy_stock.csv"
|
|
|
tcs_csv = "tcs_stock.csv"
|
|
|
nifty_it_csv = "nifty_it_index.csv"
|
|
|
for p in [infy_csv, tcs_csv, nifty_it_csv]:
|
|
|
if not os.path.exists(p):
|
|
|
raise FileNotFoundError(f"Could not find required CSV file: {p}")
|
|
|
|
|
|
print("Loading and preprocessing data from CSVs...")
|
|
|
features, targets, dates, companies = load_it_sector_data_from_csvs(
|
|
|
infy_csv=infy_csv,
|
|
|
tcs_csv=tcs_csv,
|
|
|
nifty_it_csv=nifty_it_csv,
|
|
|
)
|
|
|
num_dates, num_companies, num_features = features.shape
|
|
|
print(f"Num dates: {num_dates}, Num companies (nodes): {num_companies}, Num features: {num_features}")
|
|
|
|
|
|
|
|
|
window_size = 20
|
|
|
if num_dates <= window_size + 1:
|
|
|
raise ValueError("Not enough dates to create time windows. Reduce window_size or use more data.")
|
|
|
|
|
|
first_t = window_size
|
|
|
last_t = num_dates - 1
|
|
|
total_samples = last_t - first_t + 1
|
|
|
|
|
|
train_samples = int(total_samples * 0.7)
|
|
|
val_samples = int(total_samples * 0.15)
|
|
|
test_samples = total_samples - train_samples - val_samples
|
|
|
|
|
|
train_start_t = first_t
|
|
|
train_end_t = train_start_t + train_samples
|
|
|
val_start_t = train_end_t
|
|
|
val_end_t = val_start_t + val_samples
|
|
|
test_start_t = val_end_t
|
|
|
test_end_t = last_t + 1
|
|
|
|
|
|
print(f"Total usable samples: {total_samples}")
|
|
|
print(f"Train: {train_samples}, Val: {val_samples}, Test: {test_samples}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
compute_naive_baseline_metrics(
|
|
|
targets,
|
|
|
train_start=train_start_t,
|
|
|
train_end=train_end_t,
|
|
|
val_start=val_start_t,
|
|
|
val_end=val_end_t,
|
|
|
test_start=test_start_t,
|
|
|
test_end=test_end_t,
|
|
|
)
|
|
|
|
|
|
|
|
|
train_returns = targets[train_start_t:train_end_t]
|
|
|
edge_index = build_correlation_graph(train_returns, threshold=0.2)
|
|
|
print("Edge index shape:", edge_index.shape)
|
|
|
|
|
|
|
|
|
train_dataset = TimeSeriesGraphDataset(
|
|
|
features=features,
|
|
|
targets=targets,
|
|
|
edge_index=edge_index,
|
|
|
window_size=window_size,
|
|
|
start_t=train_start_t,
|
|
|
end_t=train_end_t,
|
|
|
)
|
|
|
|
|
|
val_dataset = TimeSeriesGraphDataset(
|
|
|
features=features,
|
|
|
targets=targets,
|
|
|
edge_index=edge_index,
|
|
|
window_size=window_size,
|
|
|
start_t=val_start_t,
|
|
|
end_t=val_end_t,
|
|
|
)
|
|
|
|
|
|
test_dataset = TimeSeriesGraphDataset(
|
|
|
features=features,
|
|
|
targets=targets,
|
|
|
edge_index=edge_index,
|
|
|
window_size=window_size,
|
|
|
start_t=test_start_t,
|
|
|
end_t=test_end_t,
|
|
|
)
|
|
|
|
|
|
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
|
|
|
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
|
|
|
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
print("Using device:", device)
|
|
|
|
|
|
model = GNNTimeSeriesModel(
|
|
|
window_size=window_size,
|
|
|
num_features=num_features,
|
|
|
hidden_lstm=64,
|
|
|
hidden_gnn=64,
|
|
|
dropout=0.2,
|
|
|
).to(device)
|
|
|
|
|
|
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
|
|
|
|
|
|
num_epochs = 30
|
|
|
best_val_loss = float("inf")
|
|
|
best_state_dict = None
|
|
|
|
|
|
print("Starting training...")
|
|
|
for epoch in range(1, num_epochs + 1):
|
|
|
train_loss = train_one_epoch(model, train_loader, optimizer, device)
|
|
|
val_loss, val_mse, val_mae, val_dir_acc, _, _ = evaluate(model, val_loader, device)
|
|
|
|
|
|
if val_loss < best_val_loss:
|
|
|
best_val_loss = val_loss
|
|
|
best_state_dict = {k: v.cpu().clone() for k, v in model.state_dict().items()}
|
|
|
|
|
|
print(
|
|
|
f"Epoch {epoch:03d} | "
|
|
|
f"Train Loss: {train_loss:.6f} | "
|
|
|
f"Val Loss: {val_loss:.6f}, Val MSE: {val_mse:.6f}, Val MAE: {val_mae:.6f}, "
|
|
|
f"Val DirAcc: {val_dir_acc:.4f}"
|
|
|
)
|
|
|
|
|
|
if best_state_dict is not None:
|
|
|
model.load_state_dict(best_state_dict)
|
|
|
|
|
|
print("Evaluating on test set...")
|
|
|
test_loss, test_mse, test_mae, test_dir_acc, y_true, y_pred = evaluate(model, test_loader, device)
|
|
|
print(
|
|
|
f"Test Loss: {test_loss:.6f}, Test MSE: {test_mse:.6f}, "
|
|
|
f"Test MAE: {test_mae:.6f}, Test DirAcc: {test_dir_acc:.4f}"
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(6, 6))
|
|
|
plt.scatter(y_true, y_pred, alpha=0.3, s=10)
|
|
|
plt.xlabel("Actual returns")
|
|
|
plt.ylabel("Predicted returns")
|
|
|
plt.title("GNN Predicted vs Actual Daily Returns (All IT Stocks)")
|
|
|
lims = [min(y_true.min(), y_pred.min()), max(y_true.max(), y_pred.max())]
|
|
|
plt.plot(lims, lims, "r--", linewidth=1)
|
|
|
plt.tight_layout()
|
|
|
plt.savefig("gnn_it_sector_pred_vs_actual.png", dpi=200)
|
|
|
plt.close()
|
|
|
print("Saved scatter plot to gnn_it_sector_pred_vs_actual.png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
latest_pred = realtime_predict_last_window(
|
|
|
model=model,
|
|
|
features=features,
|
|
|
edge_index=edge_index,
|
|
|
window_size=window_size,
|
|
|
device=device,
|
|
|
)
|
|
|
print("Real-time style next-day return prediction per node (order of companies):")
|
|
|
for comp, val in zip(companies, latest_pred):
|
|
|
print(f" {comp}: {val:.6f}")
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
|