File size: 1,784 Bytes
c52261f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import torch

import pandas as pd

from typing import Tuple, Union
from numpy.typing import ArrayLike


class NAMDataset(torch.utils.data.Dataset):

    def __init__(self,
                 X: Union[ArrayLike, pd.DataFrame],
                 y: Union[ArrayLike, pd.DataFrame],
                 w: Union[ArrayLike, pd.DataFrame] = None):
        """Dataset for NAMs that handles default weights.

        Args:
            X (Union[ArrayLike, pd.DataFrame]): Feature array.
            y (Union[ArrayLike, pd.DataFrame]): Target array.
            w (Union[ArrayLike, pd.DataFrame]): Weight array.
        """
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        if isinstance(y, (pd.DataFrame, pd.Series)):
            y = y.to_numpy()
            
        self.X = torch.tensor(X, requires_grad=False, dtype=torch.float)
        self.y = torch.tensor(y, requires_grad=False, dtype=torch.float)

        if not w:
            self.w = torch.clone(self.y)
            self.w[~torch.isnan(self.w)] = 1.0
            self.w[torch.isnan(self.w)] = 0.0
        else:
            self.w = torch.tensor(w, requires_grad=False, dtype=torch.float)

        if len(self.y.shape) > 1:
            # In multitask setting, set missing labels to 0. The loss
            # contributions from these examples will get zeroed out downstream
            # but leaving nan values will cause a crash.
            self.y[self.y != self.y] = 0.0
        else:
            # Create task dimension in single task setting for consistency.
            self.y = self.y.unsqueeze(1)
            self.w = self.w.unsqueeze(1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx: int) -> Tuple[ArrayLike, ...]:
        return self.X[idx], self.y[idx], self.w[idx]