Synthetic_Stock_Data / src /ae_evaluate.py
Raheel Abdul Rehman
Prod Publish
bbf5d55
import os
import sys
import torch
import json
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from src.model import QuarterlyStockDataset, LSTMAutoEncoder # uses updated version with embeddings
if __name__ == "__main__":
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
processed_data_path = os.path.join(base_dir, 'data', 'processed', 'stock_data.parquet')
model_path = os.path.join(base_dir, 'models', 'lstm_autoencoder.pth')
metrics_path = os.path.join(base_dir, 'resources', 'metrics.json')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
df = pd.read_parquet(processed_data_path)
test_df = df[df['Date'] >= '2024-01-01']
sequence_length = 90
test_dataset = QuarterlyStockDataset(test_df, sequence_length=sequence_length)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
best_params = {
'hidden_size': 64,
'latent_dim': 32,
'num_layers': 2,
'embed_dim': 16
}
num_tickers = df['Ticker_Encoded'].nunique()
model = LSTMAutoEncoder(
input_dim=5,
num_tickers=num_tickers,
embed_dim=best_params['embed_dim'],
hidden_size=best_params['hidden_size'],
latent_dim=best_params['latent_dim'],
num_layers=best_params['num_layers']
).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()
all_actual, all_recon = [], []
with torch.no_grad():
for batch_x, batch_ticker in test_loader:
batch_x, batch_ticker = batch_x.to(device), batch_ticker.to(device)
recon = model(batch_x, batch_ticker)
all_actual.append(batch_x.cpu().numpy())
all_recon.append(recon.cpu().numpy())
X_test = np.concatenate(all_actual, axis=0)
X_recon = np.concatenate(all_recon, axis=0)
X_test_flat = X_test.reshape(-1, X_test.shape[-1])
X_recon_flat = X_recon.reshape(-1, X_recon.shape[-1])
mae = mean_absolute_error(X_test_flat, X_recon_flat)
rmse = np.sqrt(mean_squared_error(X_test_flat, X_recon_flat))
r2 = r2_score(X_test_flat, X_recon_flat)
metrics = {
"MAE": float(mae),
"RMSE": float(rmse),
"R2": float(r2)
}
with open(metrics_path, 'w') as f:
json.dump(metrics, f, indent=4)
print(f"✅ Test metrics saved at: {metrics_path}")