Spaces:
Sleeping
Sleeping
| import pandas as pd # type: ignore | |
| import numpy as np # type: ignore | |
| import torch # type: ignore | |
| from torch.utils.data import Dataset # type: ignore | |
| from sklearn.preprocessing import StandardScaler # type: ignore | |
| class VFVDataset(Dataset): | |
| def __init__(self, csv_path, window_size=15): | |
| # 1. Load CSV, skipping the 'Ticker' and empty 'Datetime' rows | |
| # Based on your file, we skip rows 1 and 2 (0-indexed) | |
| df = pd.read_csv(csv_path, skiprows=[1, 2]) | |
| # 2. Force 'Close' to numeric and drop any failed conversions | |
| # The 'Price' column actually contains the Datetime in your CSV | |
| prices = pd.to_numeric(df['Close'], errors='coerce').dropna().values | |
| # 3. Calculate log returns: log(P_t / P_{t-1}) | |
| # This makes the data 'stationary' (meaning it has a constant mean/variance) | |
| returns = pd.Series(prices).pct_change().dropna().values | |
| # 4. Normalize to Z-scores (mean=0, std=1) | |
| # This is vital for Quantum Circuits which are sensitive to input scales | |
| self.scaler = StandardScaler() | |
| returns_scaled = self.scaler.fit_transform(returns.reshape(-1, 1)).flatten() | |
| # 5. Create Sliding Windows of 15 minutes | |
| self.windows = [] | |
| for i in range(len(returns_scaled) - window_size): | |
| self.windows.append(returns_scaled[i : i + window_size]) | |
| self.windows = torch.tensor(np.array(self.windows), dtype=torch.float32) | |
| print(f"Dataset Loaded: {len(self.windows)} windows of {window_size} minutes.") | |
| def __len__(self): | |
| return len(self.windows) | |
| def __getitem__(self, idx): | |
| return self.windows[idx] |