File size: 2,610 Bytes
bbf5d55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import sys
import torch
import json
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from src.model import QuarterlyStockDataset, LSTMAutoEncoder  # uses updated version with embeddings

if __name__ == "__main__":
    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    processed_data_path = os.path.join(base_dir, 'data', 'processed', 'stock_data.parquet')
    model_path = os.path.join(base_dir, 'models', 'lstm_autoencoder.pth')
    metrics_path = os.path.join(base_dir, 'resources', 'metrics.json')

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    df = pd.read_parquet(processed_data_path)

    test_df = df[df['Date'] >= '2024-01-01']

    sequence_length = 90
    test_dataset = QuarterlyStockDataset(test_df, sequence_length=sequence_length)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    best_params = {
        'hidden_size': 64,
        'latent_dim': 32,
        'num_layers': 2,
        'embed_dim': 16   
    }

    num_tickers = df['Ticker_Encoded'].nunique()

    model = LSTMAutoEncoder(
        input_dim=5,
        num_tickers=num_tickers,
        embed_dim=best_params['embed_dim'],
        hidden_size=best_params['hidden_size'],
        latent_dim=best_params['latent_dim'],
        num_layers=best_params['num_layers']
    ).to(device)

    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    all_actual, all_recon = [], []

    with torch.no_grad():
        for batch_x, batch_ticker in test_loader:
            batch_x, batch_ticker = batch_x.to(device), batch_ticker.to(device)
            recon = model(batch_x, batch_ticker)
            all_actual.append(batch_x.cpu().numpy())
            all_recon.append(recon.cpu().numpy())
    X_test = np.concatenate(all_actual, axis=0)
    X_recon = np.concatenate(all_recon, axis=0)
    X_test_flat = X_test.reshape(-1, X_test.shape[-1])
    X_recon_flat = X_recon.reshape(-1, X_recon.shape[-1])

    mae = mean_absolute_error(X_test_flat, X_recon_flat)
    rmse = np.sqrt(mean_squared_error(X_test_flat, X_recon_flat))
    r2 = r2_score(X_test_flat, X_recon_flat)

    metrics = {
        "MAE": float(mae),
        "RMSE": float(rmse),
        "R2": float(r2)
    }

    with open(metrics_path, 'w') as f:
        json.dump(metrics, f, indent=4)
    print(f"✅ Test metrics saved at: {metrics_path}")