import os import sys import torch import json import numpy as np import pandas as pd from torch.utils.data import DataLoader from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score import matplotlib.pyplot as plt sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from src.model import QuarterlyStockDataset, LSTMAutoEncoder # uses updated version with embeddings if __name__ == "__main__": base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) processed_data_path = os.path.join(base_dir, 'data', 'processed', 'stock_data.parquet') model_path = os.path.join(base_dir, 'models', 'lstm_autoencoder.pth') metrics_path = os.path.join(base_dir, 'resources', 'metrics.json') device = 'cuda' if torch.cuda.is_available() else 'cpu' df = pd.read_parquet(processed_data_path) test_df = df[df['Date'] >= '2024-01-01'] sequence_length = 90 test_dataset = QuarterlyStockDataset(test_df, sequence_length=sequence_length) test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) best_params = { 'hidden_size': 64, 'latent_dim': 32, 'num_layers': 2, 'embed_dim': 16 } num_tickers = df['Ticker_Encoded'].nunique() model = LSTMAutoEncoder( input_dim=5, num_tickers=num_tickers, embed_dim=best_params['embed_dim'], hidden_size=best_params['hidden_size'], latent_dim=best_params['latent_dim'], num_layers=best_params['num_layers'] ).to(device) model.load_state_dict(torch.load(model_path, map_location=device)) model.eval() all_actual, all_recon = [], [] with torch.no_grad(): for batch_x, batch_ticker in test_loader: batch_x, batch_ticker = batch_x.to(device), batch_ticker.to(device) recon = model(batch_x, batch_ticker) all_actual.append(batch_x.cpu().numpy()) all_recon.append(recon.cpu().numpy()) X_test = np.concatenate(all_actual, axis=0) X_recon = np.concatenate(all_recon, axis=0) X_test_flat = X_test.reshape(-1, X_test.shape[-1]) X_recon_flat = X_recon.reshape(-1, X_recon.shape[-1]) mae = mean_absolute_error(X_test_flat, X_recon_flat) rmse = np.sqrt(mean_squared_error(X_test_flat, X_recon_flat)) r2 = r2_score(X_test_flat, X_recon_flat) metrics = { "MAE": float(mae), "RMSE": float(rmse), "R2": float(r2) } with open(metrics_path, 'w') as f: json.dump(metrics, f, indent=4) print(f"✅ Test metrics saved at: {metrics_path}")