Spaces:
Sleeping
Sleeping
File size: 1,784 Bytes
c52261f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | import torch
import pandas as pd
from typing import Tuple, Union
from numpy.typing import ArrayLike
class NAMDataset(torch.utils.data.Dataset):
def __init__(self,
X: Union[ArrayLike, pd.DataFrame],
y: Union[ArrayLike, pd.DataFrame],
w: Union[ArrayLike, pd.DataFrame] = None):
"""Dataset for NAMs that handles default weights.
Args:
X (Union[ArrayLike, pd.DataFrame]): Feature array.
y (Union[ArrayLike, pd.DataFrame]): Target array.
w (Union[ArrayLike, pd.DataFrame]): Weight array.
"""
if isinstance(X, pd.DataFrame):
X = X.to_numpy()
if isinstance(y, (pd.DataFrame, pd.Series)):
y = y.to_numpy()
self.X = torch.tensor(X, requires_grad=False, dtype=torch.float)
self.y = torch.tensor(y, requires_grad=False, dtype=torch.float)
if not w:
self.w = torch.clone(self.y)
self.w[~torch.isnan(self.w)] = 1.0
self.w[torch.isnan(self.w)] = 0.0
else:
self.w = torch.tensor(w, requires_grad=False, dtype=torch.float)
if len(self.y.shape) > 1:
# In multitask setting, set missing labels to 0. The loss
# contributions from these examples will get zeroed out downstream
# but leaving nan values will cause a crash.
self.y[self.y != self.y] = 0.0
else:
# Create task dimension in single task setting for consistency.
self.y = self.y.unsqueeze(1)
self.w = self.w.unsqueeze(1)
def __len__(self):
return len(self.X)
def __getitem__(self, idx: int) -> Tuple[ArrayLike, ...]:
return self.X[idx], self.y[idx], self.w[idx] |