|
|
import os |
|
|
import sys |
|
|
import torch |
|
|
import json |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from torch.utils.data import DataLoader |
|
|
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score |
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
|
|
|
|
|
from src.model import QuarterlyStockDataset, LSTMAutoEncoder |
|
|
|
|
|
if __name__ == "__main__": |
|
|
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
|
processed_data_path = os.path.join(base_dir, 'data', 'processed', 'stock_data.parquet') |
|
|
model_path = os.path.join(base_dir, 'models', 'lstm_autoencoder.pth') |
|
|
metrics_path = os.path.join(base_dir, 'resources', 'metrics.json') |
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
df = pd.read_parquet(processed_data_path) |
|
|
|
|
|
test_df = df[df['Date'] >= '2024-01-01'] |
|
|
|
|
|
sequence_length = 90 |
|
|
test_dataset = QuarterlyStockDataset(test_df, sequence_length=sequence_length) |
|
|
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) |
|
|
|
|
|
best_params = { |
|
|
'hidden_size': 64, |
|
|
'latent_dim': 32, |
|
|
'num_layers': 2, |
|
|
'embed_dim': 16 |
|
|
} |
|
|
|
|
|
num_tickers = df['Ticker_Encoded'].nunique() |
|
|
|
|
|
model = LSTMAutoEncoder( |
|
|
input_dim=5, |
|
|
num_tickers=num_tickers, |
|
|
embed_dim=best_params['embed_dim'], |
|
|
hidden_size=best_params['hidden_size'], |
|
|
latent_dim=best_params['latent_dim'], |
|
|
num_layers=best_params['num_layers'] |
|
|
).to(device) |
|
|
|
|
|
model.load_state_dict(torch.load(model_path, map_location=device)) |
|
|
model.eval() |
|
|
|
|
|
all_actual, all_recon = [], [] |
|
|
|
|
|
with torch.no_grad(): |
|
|
for batch_x, batch_ticker in test_loader: |
|
|
batch_x, batch_ticker = batch_x.to(device), batch_ticker.to(device) |
|
|
recon = model(batch_x, batch_ticker) |
|
|
all_actual.append(batch_x.cpu().numpy()) |
|
|
all_recon.append(recon.cpu().numpy()) |
|
|
X_test = np.concatenate(all_actual, axis=0) |
|
|
X_recon = np.concatenate(all_recon, axis=0) |
|
|
X_test_flat = X_test.reshape(-1, X_test.shape[-1]) |
|
|
X_recon_flat = X_recon.reshape(-1, X_recon.shape[-1]) |
|
|
|
|
|
mae = mean_absolute_error(X_test_flat, X_recon_flat) |
|
|
rmse = np.sqrt(mean_squared_error(X_test_flat, X_recon_flat)) |
|
|
r2 = r2_score(X_test_flat, X_recon_flat) |
|
|
|
|
|
metrics = { |
|
|
"MAE": float(mae), |
|
|
"RMSE": float(rmse), |
|
|
"R2": float(r2) |
|
|
} |
|
|
|
|
|
with open(metrics_path, 'w') as f: |
|
|
json.dump(metrics, f, indent=4) |
|
|
print(f"✅ Test metrics saved at: {metrics_path}") |
|
|
|