Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

evaluation/AASIST/.ipynb_checkpoints/AASIST_util-checkpoint.py +1038 -0
evaluation/AASIST/AASIST_util.py +1065 -0
evaluation/AASIST/S1_best.pth +3 -0
evaluation/AASIST/S2_best.pth +3 -0
evaluation/AASIST/S3_best.pth +3 -0
evaluation/AASIST/S4_best.pth +3 -0
evaluation/AASIST/S5_best.pth +3 -0
evaluation/AASIST/__pycache__/AASIST_util.cpython-310.pyc +0 -0
evaluation/AASIST/__pycache__/AASIST_util.cpython-39.pyc +0 -0

evaluation/AASIST/.ipynb_checkpoints/AASIST_util-checkpoint.py ADDED Viewed

	@@ -0,0 +1,1038 @@

+"""
+AASIST
+Copyright (c) 2021-present NAVER Corp.
+MIT license
+"""
+import random
+from typing import Union
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+import sys
+import os
+import argparse
+import torch.optim as optim
+import torchaudio
+from torch.utils.data import Dataset, DataLoader
+from tqdm import tqdm
+import torchaudio.transforms as T
+from collections import defaultdict
+import torch.multiprocessing
+torch.multiprocessing.set_sharing_strategy('file_system')
+def extract_system_id(wavname):
+    """Extrait l'identifiant du système à partir du nom du fichier."""
+    return wavname.split('-')[0]
+def pad(x, max_len=64600):
+    """ Padding ou découpage d'un signal audio """
+    x_len = x.shape[0]
+    if x_len >= max_len:
+        return x[:max_len]
+    num_repeats = int(max_len / x_len) + 1
+    padded_x = np.tile(x, (num_repeats))[:max_len]
+    return padded_x
+def pad_random(x: np.ndarray, max_len: int = 64600):
+    """ Découpe aléatoire si trop long, padding si trop court """
+    x_len = x.shape[0]
+    if x_len >= max_len:
+        stt = np.random.randint(x_len - max_len)
+        return x[stt:stt + max_len]
+    num_repeats = int(max_len / x_len) + 1
+    padded_x = np.tile(x, (num_repeats))[:max_len]
+    return padded_x
+# ==========================================================
+# Chargement des données (Dataset)
+# ==========================================================
+class MyDataset(Dataset):
+    def __init__(self, wavdir, mos_list="", target_sample_rate=16000):
+        self.mos_lookup = {}
+        if mos_list:
+            with open(mos_list, 'r') as f:
+                for line in f:
+                    parts = line.strip().split(',')
+                    wavname = parts[0]
+                    mos = float(parts[1])
+                    self.mos_lookup[wavname] = mos
+        self.wavdir = wavdir
+        wavnames=os.listdir(self.wavdir)
+        self.wavnames = [f_name for f_name in wavnames if f_name.endswith(".wav")]
+        self.target_sample_rate = target_sample_rate
+    def __getitem__(self, idx):
+        wavname = self.wavnames[idx]
+        wavpath = os.path.join(self.wavdir, wavname)
+        wav, sample_rate = torchaudio.load(wavpath)
+        if sample_rate != self.target_sample_rate:
+            resampler = T.Resample(orig_freq=sample_rate, new_freq=self.target_sample_rate)
+            wav = resampler(wav)
+        if wavname in self.mos_lookup:
+            score = self.mos_lookup[wavname]
+        else:
+            score = 0 #TODO: it should be manage more properly
+        return wav, score, wavname
+    def __len__(self):
+        return len(self.wavnames)
+    def collate_fn(self, batch):
+        """ Padding et tronquage des séquences audio pour normaliser à 64600 frames """
+        wavs, scores, wavnames = zip(*batch)
+        max_len = 64600
+        output_wavs = []
+        for wav in wavs:
+            wav_np = wav.squeeze(0).cpu().numpy()  # Enlève la dimension channel (1,) et met sur CPU
+            padded_wav = pad_random(wav_np, max_len)
+            padded_wav = torch.tensor(padded_wav, dtype=torch.float32).unsqueeze(0)  # Remettre la dimension (1, time)
+            output_wavs.append(padded_wav)
+        output_wavs = torch.stack(output_wavs, dim=0)  # [batch_size, 1, 64600]
+        scores = torch.tensor(scores, dtype=torch.float32)
+        return output_wavs, scores, wavnames
+class GraphAttentionLayer(nn.Module):
+    def __init__(self, in_dim, out_dim, **kwargs):
+        super().__init__()
+        # attention map
+        self.att_proj = nn.Linear(in_dim, out_dim)
+        self.att_weight = self._init_new_params(out_dim, 1)
+        # project
+        self.proj_with_att = nn.Linear(in_dim, out_dim)
+        self.proj_without_att = nn.Linear(in_dim, out_dim)
+        # batch norm
+        self.bn = nn.BatchNorm1d(out_dim)
+        # dropout for inputs
+        self.input_drop = nn.Dropout(p=0.2)
+        # activate
+        self.act = nn.SELU(inplace=True)
+        # temperature
+        self.temp = 1.
+        if "temperature" in kwargs:
+            self.temp = kwargs["temperature"]
+    def forward(self, x):
+        '''
+        x   :(#bs, #node, #dim)
+        '''
+        # apply input dropout
+        x = self.input_drop(x)
+        # derive attention map
+        att_map = self._derive_att_map(x)
+        # projection
+        x = self._project(x, att_map)
+        # apply batch norm
+        x = self._apply_BN(x)
+        x = self.act(x)
+        return x
+    def _pairwise_mul_nodes(self, x):
+        '''
+        Calculates pairwise multiplication of nodes.
+        - for attention map
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, #dim)
+        '''
+        nb_nodes = x.size(1)
+        x = x.unsqueeze(2).expand(-1, -1, nb_nodes, -1)
+        x_mirror = x.transpose(1, 2)
+        return x * x_mirror
+    def _derive_att_map(self, x):
+        '''
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, 1)
+        '''
+        att_map = self._pairwise_mul_nodes(x)
+        # size: (#bs, #node, #node, #dim_out)
+        att_map = torch.tanh(self.att_proj(att_map))
+        # size: (#bs, #node, #node, 1)
+        att_map = torch.matmul(att_map, self.att_weight)
+        # apply temperature
+        att_map = att_map / self.temp
+        att_map = F.softmax(att_map, dim=-2)
+        return att_map
+    def _project(self, x, att_map):
+        x1 = self.proj_with_att(torch.matmul(att_map.squeeze(-1), x))
+        x2 = self.proj_without_att(x)
+        return x1 + x2
+    def _apply_BN(self, x):
+        org_size = x.size()
+        x = x.view(-1, org_size[-1])
+        x = self.bn(x)
+        x = x.view(org_size)
+        return x
+    def _init_new_params(self, *size):
+        out = nn.Parameter(torch.FloatTensor(*size))
+        nn.init.xavier_normal_(out)
+        return out
+class HtrgGraphAttentionLayer(nn.Module):
+    def __init__(self, in_dim, out_dim, **kwargs):
+        super().__init__()
+        self.proj_type1 = nn.Linear(in_dim, in_dim)
+        self.proj_type2 = nn.Linear(in_dim, in_dim)
+        # attention map
+        self.att_proj = nn.Linear(in_dim, out_dim)
+        self.att_projM = nn.Linear(in_dim, out_dim)
+        self.att_weight11 = self._init_new_params(out_dim, 1)
+        self.att_weight22 = self._init_new_params(out_dim, 1)
+        self.att_weight12 = self._init_new_params(out_dim, 1)
+        self.att_weightM = self._init_new_params(out_dim, 1)
+        # project
+        self.proj_with_att = nn.Linear(in_dim, out_dim)
+        self.proj_without_att = nn.Linear(in_dim, out_dim)
+        self.proj_with_attM = nn.Linear(in_dim, out_dim)
+        self.proj_without_attM = nn.Linear(in_dim, out_dim)
+        # batch norm
+        self.bn = nn.BatchNorm1d(out_dim)
+        # dropout for inputs
+        self.input_drop = nn.Dropout(p=0.2)
+        # activate
+        self.act = nn.SELU(inplace=True)
+        # temperature
+        self.temp = 1.
+        if "temperature" in kwargs:
+            self.temp = kwargs["temperature"]
+    def forward(self, x1, x2, master=None):
+        '''
+        x1  :(#bs, #node, #dim)
+        x2  :(#bs, #node, #dim)
+        '''
+        num_type1 = x1.size(1)
+        num_type2 = x2.size(1)
+        x1 = self.proj_type1(x1)
+        x2 = self.proj_type2(x2)
+        x = torch.cat([x1, x2], dim=1)
+        if master is None:
+            master = torch.mean(x, dim=1, keepdim=True)
+        # apply input dropout
+        x = self.input_drop(x)
+        # derive attention map
+        att_map = self._derive_att_map(x, num_type1, num_type2)
+        # directional edge for master node
+        master = self._update_master(x, master)
+        # projection
+        x = self._project(x, att_map)
+        # apply batch norm
+        x = self._apply_BN(x)
+        x = self.act(x)
+        x1 = x.narrow(1, 0, num_type1)
+        x2 = x.narrow(1, num_type1, num_type2)
+        return x1, x2, master
+    def _update_master(self, x, master):
+        att_map = self._derive_att_map_master(x, master)
+        master = self._project_master(x, master, att_map)
+        return master
+    def _pairwise_mul_nodes(self, x):
+        '''
+        Calculates pairwise multiplication of nodes.
+        - for attention map
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, #dim)
+        '''
+        nb_nodes = x.size(1)
+        x = x.unsqueeze(2).expand(-1, -1, nb_nodes, -1)
+        x_mirror = x.transpose(1, 2)
+        return x * x_mirror
+    def _derive_att_map_master(self, x, master):
+        '''
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, 1)
+        '''
+        att_map = x * master
+        att_map = torch.tanh(self.att_projM(att_map))
+        att_map = torch.matmul(att_map, self.att_weightM)
+        # apply temperature
+        att_map = att_map / self.temp
+        att_map = F.softmax(att_map, dim=-2)
+        return att_map
+    def _derive_att_map(self, x, num_type1, num_type2):
+        '''
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, 1)
+        '''
+        att_map = self._pairwise_mul_nodes(x)
+        # size: (#bs, #node, #node, #dim_out)
+        att_map = torch.tanh(self.att_proj(att_map))
+        # size: (#bs, #node, #node, 1)
+        att_board = torch.zeros_like(att_map[:, :, :, 0]).unsqueeze(-1)
+        att_board[:, :num_type1, :num_type1, :] = torch.matmul(
+            att_map[:, :num_type1, :num_type1, :], self.att_weight11)
+        att_board[:, num_type1:, num_type1:, :] = torch.matmul(
+            att_map[:, num_type1:, num_type1:, :], self.att_weight22)
+        att_board[:, :num_type1, num_type1:, :] = torch.matmul(
+            att_map[:, :num_type1, num_type1:, :], self.att_weight12)
+        att_board[:, num_type1:, :num_type1, :] = torch.matmul(
+            att_map[:, num_type1:, :num_type1, :], self.att_weight12)
+        att_map = att_board
+        # att_map = torch.matmul(att_map, self.att_weight12)
+        # apply temperature
+        att_map = att_map / self.temp
+        att_map = F.softmax(att_map, dim=-2)
+        return att_map
+    def _project(self, x, att_map):
+        x1 = self.proj_with_att(torch.matmul(att_map.squeeze(-1), x))
+        x2 = self.proj_without_att(x)
+        return x1 + x2
+    def _project_master(self, x, master, att_map):
+        x1 = self.proj_with_attM(torch.matmul(
+            att_map.squeeze(-1).unsqueeze(1), x))
+        x2 = self.proj_without_attM(master)
+        return x1 + x2
+    def _apply_BN(self, x):
+        org_size = x.size()
+        x = x.view(-1, org_size[-1])
+        x = self.bn(x)
+        x = x.view(org_size)
+        return x
+    def _init_new_params(self, *size):
+        out = nn.Parameter(torch.FloatTensor(*size))
+        nn.init.xavier_normal_(out)
+        return out
+class GraphPool(nn.Module):
+    def __init__(self, k: float, in_dim: int, p: Union[float, int]):
+        super().__init__()
+        self.k = k
+        self.sigmoid = nn.Sigmoid()
+        self.proj = nn.Linear(in_dim, 1)
+        self.drop = nn.Dropout(p=p) if p > 0 else nn.Identity()
+        self.in_dim = in_dim
+    def forward(self, h):
+        Z = self.drop(h)
+        weights = self.proj(Z)
+        scores = self.sigmoid(weights)
+        new_h = self.top_k_graph(scores, h, self.k)
+        return new_h
+    def top_k_graph(self, scores, h, k):
+        """
+        args
+        =====
+        scores: attention-based weights (#bs, #node, 1)
+        h: graph data (#bs, #node, #dim)
+        k: ratio of remaining nodes, (float)
+        returns
+        =====
+        h: graph pool applied data (#bs, #node', #dim)
+        """
+        _, n_nodes, n_feat = h.size()
+        n_nodes = max(int(n_nodes * k), 1)
+        _, idx = torch.topk(scores, n_nodes, dim=1)
+        idx = idx.expand(-1, -1, n_feat)
+        h = h * scores
+        h = torch.gather(h, 1, idx)
+        return h
+class CONV(nn.Module):
+    @staticmethod
+    def to_mel(hz):
+        return 2595 * np.log10(1 + hz / 700)
+    @staticmethod
+    def to_hz(mel):
+        return 700 * (10**(mel / 2595) - 1)
+    def __init__(self,
+                 out_channels,
+                 kernel_size,
+                 sample_rate=16000,
+                 in_channels=1,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 bias=False,
+                 groups=1,
+                 mask=False):
+        super().__init__()
+        if in_channels != 1:
+            msg = "SincConv only support one input channel (here, in_channels = {%i})" % (
+                in_channels)
+            raise ValueError(msg)
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.sample_rate = sample_rate
+        # Forcing the filters to be odd (i.e, perfectly symmetrics)
+        if kernel_size % 2 == 0:
+            self.kernel_size = self.kernel_size + 1
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.mask = mask
+        if bias:
+            raise ValueError('SincConv does not support bias.')
+        if groups > 1:
+            raise ValueError('SincConv does not support groups.')
+        NFFT = 512
+        f = int(self.sample_rate / 2) * np.linspace(0, 1, int(NFFT / 2) + 1)
+        fmel = self.to_mel(f)
+        fmelmax = np.max(fmel)
+        fmelmin = np.min(fmel)
+        filbandwidthsmel = np.linspace(fmelmin, fmelmax, self.out_channels + 1)
+        filbandwidthsf = self.to_hz(filbandwidthsmel)
+        self.mel = filbandwidthsf
+        self.hsupp = torch.arange(-(self.kernel_size - 1) / 2,
+                                  (self.kernel_size - 1) / 2 + 1)
+        self.band_pass = torch.zeros(self.out_channels, self.kernel_size)
+        for i in range(len(self.mel) - 1):
+            fmin = self.mel[i]
+            fmax = self.mel[i + 1]
+            hHigh = (2*fmax/self.sample_rate) * \
+                np.sinc(2*fmax*self.hsupp/self.sample_rate)
+            hLow = (2*fmin/self.sample_rate) * \
+                np.sinc(2*fmin*self.hsupp/self.sample_rate)
+            hideal = hHigh - hLow
+            self.band_pass[i, :] = Tensor(np.hamming(
+                self.kernel_size)) * Tensor(hideal)
+    def forward(self, x, mask=False):
+        band_pass_filter = self.band_pass.clone().to(x.device)
+        if mask:
+            A = np.random.uniform(0, 20)
+            A = int(A)
+            A0 = random.randint(0, band_pass_filter.shape[0] - A)
+            band_pass_filter[A0:A0 + A, :] = 0
+        else:
+            band_pass_filter = band_pass_filter
+        self.filters = (band_pass_filter).view(self.out_channels, 1,
+                                               self.kernel_size)
+        return F.conv1d(x,
+                        self.filters,
+                        stride=self.stride,
+                        padding=self.padding,
+                        dilation=self.dilation,
+                        bias=None,
+                        groups=1)
+class Residual_block(nn.Module):
+    def __init__(self, nb_filts, first=False):
+        super().__init__()
+        self.first = first
+        if not self.first:
+            self.bn1 = nn.BatchNorm2d(num_features=nb_filts[0])
+        self.conv1 = nn.Conv2d(in_channels=nb_filts[0],
+                               out_channels=nb_filts[1],
+                               kernel_size=(2, 3),
+                               padding=(1, 1),
+                               stride=1)
+        self.selu = nn.SELU(inplace=True)
+        self.bn2 = nn.BatchNorm2d(num_features=nb_filts[1])
+        self.conv2 = nn.Conv2d(in_channels=nb_filts[1],
+                               out_channels=nb_filts[1],
+                               kernel_size=(2, 3),
+                               padding=(0, 1),
+                               stride=1)
+        if nb_filts[0] != nb_filts[1]:
+            self.downsample = True
+            self.conv_downsample = nn.Conv2d(in_channels=nb_filts[0],
+                                             out_channels=nb_filts[1],
+                                             padding=(0, 1),
+                                             kernel_size=(1, 3),
+                                             stride=1)
+        else:
+            self.downsample = False
+        self.mp = nn.MaxPool2d((1, 3))  # self.mp = nn.MaxPool2d((1,4))
+    def forward(self, x):
+        identity = x
+        if not self.first:
+            out = self.bn1(x)
+            out = self.selu(out)
+        else:
+            out = x
+        out = self.conv1(x)
+        # print('out',out.shape)
+        out = self.bn2(out)
+        out = self.selu(out)
+        # print('out',out.shape)
+        out = self.conv2(out)
+        #print('conv2 out',out.shape)
+        if self.downsample:
+            identity = self.conv_downsample(identity)
+        out += identity
+        out = self.mp(out)
+        return out
+class Model(nn.Module):
+    def __init__(self, d_args):
+        super().__init__()
+        self.d_args = d_args
+        filts = d_args["filts"]
+        gat_dims = d_args["gat_dims"]
+        pool_ratios = d_args["pool_ratios"]
+        temperatures = d_args["temperatures"]
+        self.conv_time = CONV(out_channels=filts[0],
+                              kernel_size=d_args["first_conv"],
+                              in_channels=1)
+        self.first_bn = nn.BatchNorm2d(num_features=1)
+        self.drop = nn.Dropout(0.5, inplace=True)
+        self.drop_way = nn.Dropout(0.2, inplace=True)
+        self.selu = nn.SELU(inplace=True)
+        self.encoder = nn.Sequential(
+            nn.Sequential(Residual_block(nb_filts=filts[1], first=True)),
+            nn.Sequential(Residual_block(nb_filts=filts[2])),
+            nn.Sequential(Residual_block(nb_filts=filts[3])),
+            nn.Sequential(Residual_block(nb_filts=filts[4])),
+            nn.Sequential(Residual_block(nb_filts=filts[4])),
+            nn.Sequential(Residual_block(nb_filts=filts[4])))
+        self.pos_S = nn.Parameter(torch.randn(1, 23, filts[-1][-1]))
+        self.master1 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))
+        self.master2 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))
+        self.GAT_layer_S = GraphAttentionLayer(filts[-1][-1],
+                                               gat_dims[0],
+                                               temperature=temperatures[0])
+        self.GAT_layer_T = GraphAttentionLayer(filts[-1][-1],
+                                               gat_dims[0],
+                                               temperature=temperatures[1])
+        self.HtrgGAT_layer_ST11 = HtrgGraphAttentionLayer(
+            gat_dims[0], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST12 = HtrgGraphAttentionLayer(
+            gat_dims[1], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST21 = HtrgGraphAttentionLayer(
+            gat_dims[0], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST22 = HtrgGraphAttentionLayer(
+            gat_dims[1], gat_dims[1], temperature=temperatures[2])
+        self.pool_S = GraphPool(pool_ratios[0], gat_dims[0], 0.3)
+        self.pool_T = GraphPool(pool_ratios[1], gat_dims[0], 0.3)
+        self.pool_hS1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hT1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hS2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hT2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        if "output_cls" in d_args:
+            self.out_layer = nn.Linear(5 * gat_dims[1], d_args["output_cls"])
+        else:
+            self.out_layer = nn.Linear(5 * gat_dims[1], 2)
+    def forward(self, x, Freq_aug=False):
+        x = x.unsqueeze(1)
+        x = self.conv_time(x, mask=Freq_aug)
+        x = x.unsqueeze(dim=1)
+        x = F.max_pool2d(torch.abs(x), (3, 3))
+        x = self.first_bn(x)
+        x = self.selu(x)
+        # get embeddings using encoder
+        # (#bs, #filt, #spec, #seq)
+        e = self.encoder(x)
+        # spectral GAT (GAT-S)
+        e_S, _ = torch.max(torch.abs(e), dim=3)  # max along time
+        e_S = e_S.transpose(1, 2) + self.pos_S
+        gat_S = self.GAT_layer_S(e_S)
+        out_S = self.pool_S(gat_S)  # (#bs, #node, #dim)
+        # temporal GAT (GAT-T)
+        e_T, _ = torch.max(torch.abs(e), dim=2)  # max along freq
+        e_T = e_T.transpose(1, 2)
+        gat_T = self.GAT_layer_T(e_T)
+        out_T = self.pool_T(gat_T)
+        # learnable master node
+        master1 = self.master1.expand(x.size(0), -1, -1)
+        master2 = self.master2.expand(x.size(0), -1, -1)
+        # inference 1
+        out_T1, out_S1, master1 = self.HtrgGAT_layer_ST11(
+            out_T, out_S, master=self.master1)
+        out_S1 = self.pool_hS1(out_S1)
+        out_T1 = self.pool_hT1(out_T1)
+        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST12(
+            out_T1, out_S1, master=master1)
+        out_T1 = out_T1 + out_T_aug
+        out_S1 = out_S1 + out_S_aug
+        master1 = master1 + master_aug
+        # inference 2
+        out_T2, out_S2, master2 = self.HtrgGAT_layer_ST21(
+            out_T, out_S, master=self.master2)
+        out_S2 = self.pool_hS2(out_S2)
+        out_T2 = self.pool_hT2(out_T2)
+        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST22(
+            out_T2, out_S2, master=master2)
+        out_T2 = out_T2 + out_T_aug
+        out_S2 = out_S2 + out_S_aug
+        master2 = master2 + master_aug
+        out_T1 = self.drop_way(out_T1)
+        out_T2 = self.drop_way(out_T2)
+        out_S1 = self.drop_way(out_S1)
+        out_S2 = self.drop_way(out_S2)
+        master1 = self.drop_way(master1)
+        master2 = self.drop_way(master2)
+        out_T = torch.max(out_T1, out_T2)
+        out_S = torch.max(out_S1, out_S2)
+        master = torch.max(master1, master2)
+        T_max, _ = torch.max(torch.abs(out_T), dim=1)
+        T_avg = torch.mean(out_T, dim=1)
+        S_max, _ = torch.max(torch.abs(out_S), dim=1)
+        S_avg = torch.mean(out_S, dim=1)
+        last_hidden = torch.cat(
+            [T_max, T_avg, S_max, S_avg, master.squeeze(1)], dim=1)
+        last_hidden = self.drop(last_hidden)
+        output = self.out_layer(last_hidden)
+        output=F.softmax(output,dim=1)
+        return last_hidden, output
+def obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold):
+    # False alarm and miss rates for ASV
+    Pfa_asv = sum(non_asv >= asv_threshold) / non_asv.size
+    Pmiss_asv = sum(tar_asv < asv_threshold) / tar_asv.size
+    # Rate of rejecting spoofs in ASV
+    if spoof_asv.size == 0:
+        Pmiss_spoof_asv = None
+        Pfa_spoof_asv = None
+    else:
+        Pmiss_spoof_asv = np.sum(spoof_asv < asv_threshold) / spoof_asv.size
+        Pfa_spoof_asv = np.sum(spoof_asv >= asv_threshold) / spoof_asv.size
+    return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv
+def obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold):
+    # False alarm and miss rates for ASV
+    Pfa_asv = sum(non_asv >= asv_threshold) / non_asv.size
+    Pmiss_asv = sum(tar_asv < asv_threshold) / tar_asv.size
+    # Rate of rejecting spoofs in ASV
+    if spoof_asv.size == 0:
+        Pmiss_spoof_asv = None
+        Pfa_spoof_asv = None
+    else:
+        Pmiss_spoof_asv = np.sum(spoof_asv < asv_threshold) / spoof_asv.size
+        Pfa_spoof_asv = np.sum(spoof_asv >= asv_threshold) / spoof_asv.size
+    return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv
+def compute_det_curve(target_scores, nontarget_scores):
+    n_scores = target_scores.size + nontarget_scores.size
+    all_scores = np.concatenate((target_scores, nontarget_scores))
+    labels = np.concatenate(
+        (np.ones(target_scores.size), np.zeros(nontarget_scores.size)))
+    # Sort labels based on scores
+    indices = np.argsort(all_scores, kind='mergesort')
+    labels = labels[indices]
+    # Compute false rejection and false acceptance rates
+    tar_trial_sums = np.cumsum(labels)
+    nontarget_trial_sums = nontarget_scores.size - \
+        (np.arange(1, n_scores + 1) - tar_trial_sums)
+    # false rejection rates
+    frr = np.concatenate(
+        (np.atleast_1d(0), tar_trial_sums / target_scores.size))
+    far = np.concatenate((np.atleast_1d(1), nontarget_trial_sums /
+                          nontarget_scores.size))  # false acceptance rates
+    # Thresholds are the sorted scores
+    thresholds = np.concatenate(
+        (np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))
+    return frr, far, thresholds
+def compute_Pmiss_Pfa_Pspoof_curves(tar_scores, non_scores, spf_scores):
+    # Concatenate all scores and designate arbitrary labels 1=target, 0=nontarget, -1=spoof
+    all_scores = np.concatenate((tar_scores, non_scores, spf_scores))
+    labels = np.concatenate((np.ones(tar_scores.size), np.zeros(non_scores.size), -1*np.ones(spf_scores.size)))
+    # Sort labels based on scores
+    indices = np.argsort(all_scores, kind='mergesort')
+    labels = labels[indices]
+    # Cumulative sums
+    tar_sums    = np.cumsum(labels==1)
+    non_sums    = np.cumsum(labels==0)
+    spoof_sums  = np.cumsum(labels==-1)
+    Pmiss       = np.concatenate((np.atleast_1d(0), tar_sums / tar_scores.size))
+    Pfa_non     = np.concatenate((np.atleast_1d(1), 1 - (non_sums / non_scores.size)))
+    Pfa_spoof   = np.concatenate((np.atleast_1d(1), 1 - (spoof_sums / spf_scores.size)))
+    thresholds  = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))  # Thresholds are the sorted scores
+    return Pmiss, Pfa_non, Pfa_spoof, thresholds
+def compute_eer(target_scores, nontarget_scores):
+    """ Returns equal error rate (EER) and the corresponding threshold. """
+    frr, far, thresholds = compute_det_curve(target_scores, nontarget_scores)
+    abs_diffs = np.abs(frr - far)
+    min_index = np.argmin(abs_diffs)
+    eer = np.mean((frr[min_index], far[min_index]))
+    return eer, frr, far, thresholds
+def compute_mindcf(frr, far, thresholds, Pspoof, Cmiss, Cfa):
+    min_c_det = float("inf")
+    min_c_det_threshold = thresholds
+    p_target = 1- Pspoof
+    for i in range(0, len(frr)):
+        # Weighted sum of false negative and false positive errors.
+        c_det = Cmiss * frr[i] * p_target + Cfa * far[i] * (1 - p_target)
+        if c_det < min_c_det:
+            min_c_det = c_det
+            min_c_det_threshold = thresholds[i]
+    # See Equations (3) and (4).  Now we normalize the cost.
+    c_def = min(Cmiss * p_target, Cfa * (1 - p_target))
+    min_dcf = min_c_det / c_def
+    return min_dcf, min_c_det_threshold
+def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv,
+                 Pmiss_spoof_asv, cost_model, print_cost):
+    # Sanity check of cost parameters
+    if cost_model['Cfa_asv'] < 0 or cost_model['Cmiss_asv'] < 0 or \
+            cost_model['Cfa_cm'] < 0 or cost_model['Cmiss_cm'] < 0:
+        print('WARNING: Usually the cost values should be positive!')
+    if cost_model['Ptar'] < 0 or cost_model['Pnon'] < 0 or cost_model['Pspoof'] < 0 or \
+            np.abs(cost_model['Ptar'] + cost_model['Pnon'] + cost_model['Pspoof'] - 1) > 1e-10:
+        sys.exit(
+            'ERROR: Your prior probabilities should be positive and sum up to one.'
+        )
+    # Unless we evaluate worst-case model, we need to have some spoof tests against asv
+    if Pmiss_spoof_asv is None:
+        sys.exit(
+            'ERROR: you should provide miss rate of spoof tests against your ASV system.'
+        )
+    # Sanity check of scores
+    combined_scores = np.concatenate((bonafide_score_cm, spoof_score_cm))
+    if np.isnan(combined_scores).any() or np.isinf(combined_scores).any():
+        sys.exit('ERROR: Your scores contain nan or inf.')
+    # Sanity check that inputs are scores and not decisions
+    n_uniq = np.unique(combined_scores).size
+    if n_uniq < 3:
+        sys.exit(
+            'ERROR: You should provide soft CM scores - not binary decisions')
+    # Obtain miss and false alarm rates of CM
+    Pmiss_cm, Pfa_cm, CM_thresholds = compute_det_curve(
+        bonafide_score_cm, spoof_score_cm)
+    # Constants - see ASVspoof 2019 evaluation plan
+    C1 = cost_model['Ptar'] * (cost_model['Cmiss_cm'] - cost_model['Cmiss_asv'] * Pmiss_asv) - \
+        cost_model['Pnon'] * cost_model['Cfa_asv'] * Pfa_asv
+    C2 = cost_model['Cfa_cm'] * cost_model['Pspoof'] * (1 - Pmiss_spoof_asv)
+    # Sanity check of the weights
+    if C1 < 0 or C2 < 0:
+        sys.exit(
+            'You should never see this error but I cannot evalute tDCF with negative weights - please check whether your ASV error rates are correctly computed?'
+        )
+    # Obtain t-DCF curve for all thresholds
+    tDCF = C1 * Pmiss_cm + C2 * Pfa_cm
+    # Normalized t-DCF
+    tDCF_norm = tDCF / np.minimum(C1, C2)
+    # Everything should be fine if reaching here.
+    if print_cost:
+        print('t-DCF evaluation from [Nbona={}, Nspoof={}] trials\n'.format(
+            bonafide_score_cm.size, spoof_score_cm.size))
+        print('t-DCF MODEL')
+        print('   Ptar         = {:8.5f} (Prior probability of target user)'.
+              format(cost_model['Ptar']))
+        print(
+            '   Pnon         = {:8.5f} (Prior probability of nontarget user)'.
+            format(cost_model['Pnon']))
+        print(
+            '   Pspoof       = {:8.5f} (Prior probability of spoofing attack)'.
+            format(cost_model['Pspoof']))
+        print(
+            '   Cfa_asv      = {:8.5f} (Cost of ASV falsely accepting a nontarget)'
+            .format(cost_model['Cfa_asv']))
+        print(
+            '   Cmiss_asv    = {:8.5f} (Cost of ASV falsely rejecting target speaker)'
+            .format(cost_model['Cmiss_asv']))
+        print(
+            '   Cfa_cm       = {:8.5f} (Cost of CM falsely passing a spoof to ASV system)'
+            .format(cost_model['Cfa_cm']))
+        print(
+            '   Cmiss_cm     = {:8.5f} (Cost of CM falsely blocking target utterance which never reaches ASV)'
+            .format(cost_model['Cmiss_cm']))
+        print(
+            '\n   Implied normalized t-DCF function (depends on t-DCF parameters and ASV errors), s=CM threshold)'
+        )
+        if C2 == np.minimum(C1, C2):
+            print(
+                '   tDCF_norm(s) = {:8.5f} x Pmiss_cm(s) + Pfa_cm(s)\n'.format(
+                    C1 / C2))
+        else:
+            print(
+                '   tDCF_norm(s) = Pmiss_cm(s) + {:8.5f} x Pfa_cm(s)\n'.format(
+                    C2 / C1))
+    return tDCF_norm, CM_thresholds
+def calculate_CLLR(target_llrs, nontarget_llrs):
+    """
+    Calculate the CLLR of the scores.
+    Parameters:
+    target_llrs (list or numpy array): Log-likelihood ratios for target trials.
+    nontarget_llrs (list or numpy array): Log-likelihood ratios for non-target trials.
+    Returns:
+    float: The calculated CLLR value.
+    """
+    def negative_log_sigmoid(lodds):
+        """
+        Calculate the negative log of the sigmoid function.
+        Parameters:
+        lodds (numpy array): Log-odds values.
+        Returns:
+        numpy array: The negative log of the sigmoid values.
+        """
+        return np.log1p(np.exp(-lodds))
+    # Convert the input lists to numpy arrays if they are not already
+    target_llrs = np.array(target_llrs)
+    nontarget_llrs = np.array(nontarget_llrs)
+    # Calculate the CLLR value
+    cllr = 0.5 * (np.mean(negative_log_sigmoid(target_llrs)) + np.mean(negative_log_sigmoid(-nontarget_llrs))) / np.log(2)
+    return cllr
+def compute_Pmiss_Pfa_Pspoof_curves(tar_scores, non_scores, spf_scores):
+    # Concatenate all scores and designate arbitrary labels 1=target, 0=nontarget, -1=spoof
+    all_scores = np.concatenate((tar_scores, non_scores, spf_scores))
+    labels = np.concatenate((np.ones(tar_scores.size), np.zeros(non_scores.size), -1*np.ones(spf_scores.size)))
+    # Sort labels based on scores
+    indices = np.argsort(all_scores, kind='mergesort')
+    labels = labels[indices]
+    # Cumulative sums
+    tar_sums    = np.cumsum(labels==1)
+    non_sums    = np.cumsum(labels==0)
+    spoof_sums  = np.cumsum(labels==-1)
+    Pmiss       = np.concatenate((np.atleast_1d(0), tar_sums / tar_scores.size))
+    Pfa_non     = np.concatenate((np.atleast_1d(1), 1 - (non_sums / non_scores.size)))
+    Pfa_spoof   = np.concatenate((np.atleast_1d(1), 1 - (spoof_sums / spf_scores.size)))
+    thresholds  = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))  # Thresholds are the sorted scores
+    return Pmiss, Pfa_non, Pfa_spoof, thresholds
+def compute_teer(Pmiss_CM, Pfa_CM, tau_CM, Pmiss_ASV, Pfa_non_ASV, Pfa_spf_ASV, tau_ASV):
+    # Different spoofing prevalence priors (rho) parameters values
+    rho_vals            = [0,0.5,1]
+    tEER_val    = np.empty([len(rho_vals),len(tau_ASV)], dtype=float)
+    for rho_idx, rho_spf in enumerate(rho_vals):
+        # Table to store the CM threshold index, per each of the ASV operating points
+        tEER_idx_CM = np.empty(len(tau_ASV), dtype=int)
+        tEER_path   = np.empty([len(rho_vals),len(tau_ASV),2], dtype=float)
+        # Tables to store the t-EER, total Pfa and total miss valuees along the t-EER path
+        Pmiss_total = np.empty(len(tau_ASV), dtype=float)
+        Pfa_total   = np.empty(len(tau_ASV), dtype=float)
+        min_tEER    = np.inf
+        argmin_tEER = np.empty(2)
+        # best intersection point
+        xpoint_crit_best = np.inf
+        xpoint = np.empty(2)
+        # Loop over all possible ASV thresholds
+        for tau_ASV_idx, tau_ASV_val in enumerate(tau_ASV):
+            # Tandem miss and fa rates as defined in the manuscript
+            Pmiss_tdm = Pmiss_CM + (1 - Pmiss_CM) * Pmiss_ASV[tau_ASV_idx]
+            Pfa_tdm   = (1 - rho_spf) * (1 - Pmiss_CM) * Pfa_non_ASV[tau_ASV_idx] + rho_spf * Pfa_CM * Pfa_spf_ASV[tau_ASV_idx]
+            # Store only the INDEX of the CM threshold (for the current ASV threshold)
+            h = Pmiss_tdm - Pfa_tdm
+            tmp = np.argmin(abs(h))
+            tEER_idx_CM[tau_ASV_idx] = tmp
+            if Pmiss_ASV[tau_ASV_idx] < (1 - rho_spf) * Pfa_non_ASV[tau_ASV_idx] + rho_spf * Pfa_spf_ASV[tau_ASV_idx]:
+                Pmiss_total[tau_ASV_idx] = Pmiss_tdm[tmp]
+                Pfa_total[tau_ASV_idx] = Pfa_tdm[tmp]
+                tEER_val[rho_idx,tau_ASV_idx] = np.mean([Pfa_total[tau_ASV_idx], Pmiss_total[tau_ASV_idx]])
+                tEER_path[rho_idx,tau_ASV_idx, 0] = tau_ASV_val
+                tEER_path[rho_idx,tau_ASV_idx, 1] = tau_CM[tmp]
+                if tEER_val[rho_idx,tau_ASV_idx] < min_tEER:
+                    min_tEER = tEER_val[rho_idx,tau_ASV_idx]
+                    argmin_tEER[0] = tau_ASV_val
+                    argmin_tEER[1] = tau_CM[tmp]
+                # Check how close we are to the INTERSECTION POINT for different prior (rho) values:
+                LHS = Pfa_non_ASV[tau_ASV_idx]/Pfa_spf_ASV[tau_ASV_idx]
+                RHS = Pfa_CM[tmp]/(1 - Pmiss_CM[tmp])
+                crit = abs(LHS - RHS)
+                if crit < xpoint_crit_best:
+                    xpoint_crit_best = crit
+                    xpoint[0] = tau_ASV_val
+                    xpoint[1] = tau_CM[tmp]
+                    xpoint_tEER = Pfa_spf_ASV[tau_ASV_idx]*Pfa_CM[tmp]
+            else:
+                # Not in allowed region
+                tEER_path[rho_idx,tau_ASV_idx, 0] = np.nan
+                tEER_path[rho_idx,tau_ASV_idx, 1] = np.nan
+                Pmiss_total[tau_ASV_idx] = np.nan
+                Pfa_total[tau_ASV_idx] = np.nan
+                tEER_val[rho_idx,tau_ASV_idx] = np.nan
+        return xpoint_tEER*100

evaluation/AASIST/AASIST_util.py ADDED Viewed

	@@ -0,0 +1,1065 @@

+"""
+AASIST
+Copyright (c) 2021-present NAVER Corp.
+MIT license
+"""
+import random
+from typing import Union
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+import sys
+import os
+import argparse
+import torch.optim as optim
+import torchaudio
+from torch.utils.data import Dataset, DataLoader
+from tqdm import tqdm
+import torchaudio.transforms as T
+from collections import defaultdict
+import torch.multiprocessing
+torch.multiprocessing.set_sharing_strategy('file_system')
+def load_aasist_model(ckpt_path, device):
+    model_config = {
+        "architecture": "AASIST",
+        "nb_samp": 64600,
+        "first_conv": 128,
+        "filts": [70, [1, 32], [32, 32], [32, 64], [64, 64]],
+        "gat_dims": [64, 32],
+        "pool_ratios": [0.5, 0.7, 0.5, 0.5],
+        "temperatures": [2.0, 2.0, 100.0, 100.0],
+        "output_cls": 25
+    }
+    net = Model(model_config).to(device)
+    checkpoint = torch.load(ckpt_path, map_location=device)
+    net.load_state_dict(checkpoint)
+    net.eval()
+    return net
+def aasist_evaluate(models, audio):
+    score = []
+    for model in models:
+        _, probb = model(audio)
+        score.append(probb[0, 0:1].item())
+    return np.mean(score)
+def extract_system_id(wavname):
+    """Extrait l'identifiant du système à partir du nom du fichier."""
+    return wavname.split('-')[0]
+def pad(x, max_len=64600):
+    """ Padding ou découpage d'un signal audio """
+    x_len = x.shape[0]
+    if x_len >= max_len:
+        return x[:max_len]
+    num_repeats = int(max_len / x_len) + 1
+    padded_x = np.tile(x, (num_repeats))[:max_len]
+    return padded_x
+def pad_random(x: np.ndarray, max_len: int = 64600):
+    """ Découpe aléatoire si trop long, padding si trop court """
+    x_len = x.shape[0]
+    if x_len >= max_len:
+        stt = np.random.randint(x_len - max_len)
+        return x[stt:stt + max_len]
+    num_repeats = int(max_len / x_len) + 1
+    padded_x = np.tile(x, (num_repeats))[:max_len]
+    return padded_x
+# ==========================================================
+# Chargement des données (Dataset)
+# ==========================================================
+class MyDataset(Dataset):
+    def __init__(self, wavdir, mos_list="", target_sample_rate=16000):
+        self.mos_lookup = {}
+        if mos_list:
+            with open(mos_list, 'r') as f:
+                for line in f:
+                    parts = line.strip().split(',')
+                    wavname = parts[0]
+                    mos = float(parts[1])
+                    self.mos_lookup[wavname] = mos
+        self.wavdir = wavdir
+        wavnames=os.listdir(self.wavdir)
+        self.wavnames = [f_name for f_name in wavnames if f_name.endswith(".wav")]
+        self.target_sample_rate = target_sample_rate
+    def __getitem__(self, idx):
+        wavname = self.wavnames[idx]
+        wavpath = os.path.join(self.wavdir, wavname)
+        wav, sample_rate = torchaudio.load(wavpath)
+        if sample_rate != self.target_sample_rate:
+            resampler = T.Resample(orig_freq=sample_rate, new_freq=self.target_sample_rate)
+            wav = resampler(wav)
+        if wavname in self.mos_lookup:
+            score = self.mos_lookup[wavname]
+        else:
+            score = 0 #TODO: it should be manage more properly
+        return wav, score, wavname
+    def __len__(self):
+        return len(self.wavnames)
+    def collate_fn(self, batch):
+        """ Padding et tronquage des séquences audio pour normaliser à 64600 frames """
+        wavs, scores, wavnames = zip(*batch)
+        max_len = 64600
+        output_wavs = []
+        for wav in wavs:
+            wav_np = wav.squeeze(0).cpu().numpy()  # Enlève la dimension channel (1,) et met sur CPU
+            padded_wav = pad_random(wav_np, max_len)
+            padded_wav = torch.tensor(padded_wav, dtype=torch.float32).unsqueeze(0)  # Remettre la dimension (1, time)
+            output_wavs.append(padded_wav)
+        output_wavs = torch.stack(output_wavs, dim=0)  # [batch_size, 1, 64600]
+        scores = torch.tensor(scores, dtype=torch.float32)
+        return output_wavs, scores, wavnames
+class GraphAttentionLayer(nn.Module):
+    def __init__(self, in_dim, out_dim, **kwargs):
+        super().__init__()
+        # attention map
+        self.att_proj = nn.Linear(in_dim, out_dim)
+        self.att_weight = self._init_new_params(out_dim, 1)
+        # project
+        self.proj_with_att = nn.Linear(in_dim, out_dim)
+        self.proj_without_att = nn.Linear(in_dim, out_dim)
+        # batch norm
+        self.bn = nn.BatchNorm1d(out_dim)
+        # dropout for inputs
+        self.input_drop = nn.Dropout(p=0.2)
+        # activate
+        self.act = nn.SELU(inplace=True)
+        # temperature
+        self.temp = 1.
+        if "temperature" in kwargs:
+            self.temp = kwargs["temperature"]
+    def forward(self, x):
+        '''
+        x   :(#bs, #node, #dim)
+        '''
+        # apply input dropout
+        x = self.input_drop(x)
+        # derive attention map
+        att_map = self._derive_att_map(x)
+        # projection
+        x = self._project(x, att_map)
+        # apply batch norm
+        x = self._apply_BN(x)
+        x = self.act(x)
+        return x
+    def _pairwise_mul_nodes(self, x):
+        '''
+        Calculates pairwise multiplication of nodes.
+        - for attention map
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, #dim)
+        '''
+        nb_nodes = x.size(1)
+        x = x.unsqueeze(2).expand(-1, -1, nb_nodes, -1)
+        x_mirror = x.transpose(1, 2)
+        return x * x_mirror
+    def _derive_att_map(self, x):
+        '''
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, 1)
+        '''
+        att_map = self._pairwise_mul_nodes(x)
+        # size: (#bs, #node, #node, #dim_out)
+        att_map = torch.tanh(self.att_proj(att_map))
+        # size: (#bs, #node, #node, 1)
+        att_map = torch.matmul(att_map, self.att_weight)
+        # apply temperature
+        att_map = att_map / self.temp
+        att_map = F.softmax(att_map, dim=-2)
+        return att_map
+    def _project(self, x, att_map):
+        x1 = self.proj_with_att(torch.matmul(att_map.squeeze(-1), x))
+        x2 = self.proj_without_att(x)
+        return x1 + x2
+    def _apply_BN(self, x):
+        org_size = x.size()
+        x = x.view(-1, org_size[-1])
+        x = self.bn(x)
+        x = x.view(org_size)
+        return x
+    def _init_new_params(self, *size):
+        out = nn.Parameter(torch.FloatTensor(*size))
+        nn.init.xavier_normal_(out)
+        return out
+class HtrgGraphAttentionLayer(nn.Module):
+    def __init__(self, in_dim, out_dim, **kwargs):
+        super().__init__()
+        self.proj_type1 = nn.Linear(in_dim, in_dim)
+        self.proj_type2 = nn.Linear(in_dim, in_dim)
+        # attention map
+        self.att_proj = nn.Linear(in_dim, out_dim)
+        self.att_projM = nn.Linear(in_dim, out_dim)
+        self.att_weight11 = self._init_new_params(out_dim, 1)
+        self.att_weight22 = self._init_new_params(out_dim, 1)
+        self.att_weight12 = self._init_new_params(out_dim, 1)
+        self.att_weightM = self._init_new_params(out_dim, 1)
+        # project
+        self.proj_with_att = nn.Linear(in_dim, out_dim)
+        self.proj_without_att = nn.Linear(in_dim, out_dim)
+        self.proj_with_attM = nn.Linear(in_dim, out_dim)
+        self.proj_without_attM = nn.Linear(in_dim, out_dim)
+        # batch norm
+        self.bn = nn.BatchNorm1d(out_dim)
+        # dropout for inputs
+        self.input_drop = nn.Dropout(p=0.2)
+        # activate
+        self.act = nn.SELU(inplace=True)
+        # temperature
+        self.temp = 1.
+        if "temperature" in kwargs:
+            self.temp = kwargs["temperature"]
+    def forward(self, x1, x2, master=None):
+        '''
+        x1  :(#bs, #node, #dim)
+        x2  :(#bs, #node, #dim)
+        '''
+        num_type1 = x1.size(1)
+        num_type2 = x2.size(1)
+        x1 = self.proj_type1(x1)
+        x2 = self.proj_type2(x2)
+        x = torch.cat([x1, x2], dim=1)
+        if master is None:
+            master = torch.mean(x, dim=1, keepdim=True)
+        # apply input dropout
+        x = self.input_drop(x)
+        # derive attention map
+        att_map = self._derive_att_map(x, num_type1, num_type2)
+        # directional edge for master node
+        master = self._update_master(x, master)
+        # projection
+        x = self._project(x, att_map)
+        # apply batch norm
+        x = self._apply_BN(x)
+        x = self.act(x)
+        x1 = x.narrow(1, 0, num_type1)
+        x2 = x.narrow(1, num_type1, num_type2)
+        return x1, x2, master
+    def _update_master(self, x, master):
+        att_map = self._derive_att_map_master(x, master)
+        master = self._project_master(x, master, att_map)
+        return master
+    def _pairwise_mul_nodes(self, x):
+        '''
+        Calculates pairwise multiplication of nodes.
+        - for attention map
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, #dim)
+        '''
+        nb_nodes = x.size(1)
+        x = x.unsqueeze(2).expand(-1, -1, nb_nodes, -1)
+        x_mirror = x.transpose(1, 2)
+        return x * x_mirror
+    def _derive_att_map_master(self, x, master):
+        '''
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, 1)
+        '''
+        att_map = x * master
+        att_map = torch.tanh(self.att_projM(att_map))
+        att_map = torch.matmul(att_map, self.att_weightM)
+        # apply temperature
+        att_map = att_map / self.temp
+        att_map = F.softmax(att_map, dim=-2)
+        return att_map
+    def _derive_att_map(self, x, num_type1, num_type2):
+        '''
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, 1)
+        '''
+        att_map = self._pairwise_mul_nodes(x)
+        # size: (#bs, #node, #node, #dim_out)
+        att_map = torch.tanh(self.att_proj(att_map))
+        # size: (#bs, #node, #node, 1)
+        att_board = torch.zeros_like(att_map[:, :, :, 0]).unsqueeze(-1)
+        att_board[:, :num_type1, :num_type1, :] = torch.matmul(
+            att_map[:, :num_type1, :num_type1, :], self.att_weight11)
+        att_board[:, num_type1:, num_type1:, :] = torch.matmul(
+            att_map[:, num_type1:, num_type1:, :], self.att_weight22)
+        att_board[:, :num_type1, num_type1:, :] = torch.matmul(
+            att_map[:, :num_type1, num_type1:, :], self.att_weight12)
+        att_board[:, num_type1:, :num_type1, :] = torch.matmul(
+            att_map[:, num_type1:, :num_type1, :], self.att_weight12)
+        att_map = att_board
+        # att_map = torch.matmul(att_map, self.att_weight12)
+        # apply temperature
+        att_map = att_map / self.temp
+        att_map = F.softmax(att_map, dim=-2)
+        return att_map
+    def _project(self, x, att_map):
+        x1 = self.proj_with_att(torch.matmul(att_map.squeeze(-1), x))
+        x2 = self.proj_without_att(x)
+        return x1 + x2
+    def _project_master(self, x, master, att_map):
+        x1 = self.proj_with_attM(torch.matmul(
+            att_map.squeeze(-1).unsqueeze(1), x))
+        x2 = self.proj_without_attM(master)
+        return x1 + x2
+    def _apply_BN(self, x):
+        org_size = x.size()
+        x = x.view(-1, org_size[-1])
+        x = self.bn(x)
+        x = x.view(org_size)
+        return x
+    def _init_new_params(self, *size):
+        out = nn.Parameter(torch.FloatTensor(*size))
+        nn.init.xavier_normal_(out)
+        return out
+class GraphPool(nn.Module):
+    def __init__(self, k: float, in_dim: int, p: Union[float, int]):
+        super().__init__()
+        self.k = k
+        self.sigmoid = nn.Sigmoid()
+        self.proj = nn.Linear(in_dim, 1)
+        self.drop = nn.Dropout(p=p) if p > 0 else nn.Identity()
+        self.in_dim = in_dim
+    def forward(self, h):
+        Z = self.drop(h)
+        weights = self.proj(Z)
+        scores = self.sigmoid(weights)
+        new_h = self.top_k_graph(scores, h, self.k)
+        return new_h
+    def top_k_graph(self, scores, h, k):
+        """
+        args
+        =====
+        scores: attention-based weights (#bs, #node, 1)
+        h: graph data (#bs, #node, #dim)
+        k: ratio of remaining nodes, (float)
+        returns
+        =====
+        h: graph pool applied data (#bs, #node', #dim)
+        """
+        _, n_nodes, n_feat = h.size()
+        n_nodes = max(int(n_nodes * k), 1)
+        _, idx = torch.topk(scores, n_nodes, dim=1)
+        idx = idx.expand(-1, -1, n_feat)
+        h = h * scores
+        h = torch.gather(h, 1, idx)
+        return h
+class CONV(nn.Module):
+    @staticmethod
+    def to_mel(hz):
+        return 2595 * np.log10(1 + hz / 700)
+    @staticmethod
+    def to_hz(mel):
+        return 700 * (10**(mel / 2595) - 1)
+    def __init__(self,
+                 out_channels,
+                 kernel_size,
+                 sample_rate=16000,
+                 in_channels=1,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 bias=False,
+                 groups=1,
+                 mask=False):
+        super().__init__()
+        if in_channels != 1:
+            msg = "SincConv only support one input channel (here, in_channels = {%i})" % (
+                in_channels)
+            raise ValueError(msg)
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.sample_rate = sample_rate
+        # Forcing the filters to be odd (i.e, perfectly symmetrics)
+        if kernel_size % 2 == 0:
+            self.kernel_size = self.kernel_size + 1
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.mask = mask
+        if bias:
+            raise ValueError('SincConv does not support bias.')
+        if groups > 1:
+            raise ValueError('SincConv does not support groups.')
+        NFFT = 512
+        f = int(self.sample_rate / 2) * np.linspace(0, 1, int(NFFT / 2) + 1)
+        fmel = self.to_mel(f)
+        fmelmax = np.max(fmel)
+        fmelmin = np.min(fmel)
+        filbandwidthsmel = np.linspace(fmelmin, fmelmax, self.out_channels + 1)
+        filbandwidthsf = self.to_hz(filbandwidthsmel)
+        self.mel = filbandwidthsf
+        self.hsupp = torch.arange(-(self.kernel_size - 1) / 2,
+                                  (self.kernel_size - 1) / 2 + 1)
+        self.band_pass = torch.zeros(self.out_channels, self.kernel_size)
+        for i in range(len(self.mel) - 1):
+            fmin = self.mel[i]
+            fmax = self.mel[i + 1]
+            hHigh = (2*fmax/self.sample_rate) * \
+                np.sinc(2*fmax*self.hsupp/self.sample_rate)
+            hLow = (2*fmin/self.sample_rate) * \
+                np.sinc(2*fmin*self.hsupp/self.sample_rate)
+            hideal = hHigh - hLow
+            self.band_pass[i, :] = Tensor(np.hamming(
+                self.kernel_size)) * Tensor(hideal)
+    def forward(self, x, mask=False):
+        band_pass_filter = self.band_pass.clone().to(x.device)
+        if mask:
+            A = np.random.uniform(0, 20)
+            A = int(A)
+            A0 = random.randint(0, band_pass_filter.shape[0] - A)
+            band_pass_filter[A0:A0 + A, :] = 0
+        else:
+            band_pass_filter = band_pass_filter
+        self.filters = (band_pass_filter).view(self.out_channels, 1,
+                                               self.kernel_size)
+        return F.conv1d(x,
+                        self.filters,
+                        stride=self.stride,
+                        padding=self.padding,
+                        dilation=self.dilation,
+                        bias=None,
+                        groups=1)
+class Residual_block(nn.Module):
+    def __init__(self, nb_filts, first=False):
+        super().__init__()
+        self.first = first
+        if not self.first:
+            self.bn1 = nn.BatchNorm2d(num_features=nb_filts[0])
+        self.conv1 = nn.Conv2d(in_channels=nb_filts[0],
+                               out_channels=nb_filts[1],
+                               kernel_size=(2, 3),
+                               padding=(1, 1),
+                               stride=1)
+        self.selu = nn.SELU(inplace=True)
+        self.bn2 = nn.BatchNorm2d(num_features=nb_filts[1])
+        self.conv2 = nn.Conv2d(in_channels=nb_filts[1],
+                               out_channels=nb_filts[1],
+                               kernel_size=(2, 3),
+                               padding=(0, 1),
+                               stride=1)
+        if nb_filts[0] != nb_filts[1]:
+            self.downsample = True
+            self.conv_downsample = nn.Conv2d(in_channels=nb_filts[0],
+                                             out_channels=nb_filts[1],
+                                             padding=(0, 1),
+                                             kernel_size=(1, 3),
+                                             stride=1)
+        else:
+            self.downsample = False
+        self.mp = nn.MaxPool2d((1, 3))  # self.mp = nn.MaxPool2d((1,4))
+    def forward(self, x):
+        identity = x
+        if not self.first:
+            out = self.bn1(x)
+            out = self.selu(out)
+        else:
+            out = x
+        out = self.conv1(x)
+        # print('out',out.shape)
+        out = self.bn2(out)
+        out = self.selu(out)
+        # print('out',out.shape)
+        out = self.conv2(out)
+        #print('conv2 out',out.shape)
+        if self.downsample:
+            identity = self.conv_downsample(identity)
+        out += identity
+        out = self.mp(out)
+        return out
+class Model(nn.Module):
+    def __init__(self, d_args):
+        super().__init__()
+        self.d_args = d_args
+        filts = d_args["filts"]
+        gat_dims = d_args["gat_dims"]
+        pool_ratios = d_args["pool_ratios"]
+        temperatures = d_args["temperatures"]
+        self.conv_time = CONV(out_channels=filts[0],
+                              kernel_size=d_args["first_conv"],
+                              in_channels=1)
+        self.first_bn = nn.BatchNorm2d(num_features=1)
+        self.drop = nn.Dropout(0.5, inplace=True)
+        self.drop_way = nn.Dropout(0.2, inplace=True)
+        self.selu = nn.SELU(inplace=True)
+        self.encoder = nn.Sequential(
+            nn.Sequential(Residual_block(nb_filts=filts[1], first=True)),
+            nn.Sequential(Residual_block(nb_filts=filts[2])),
+            nn.Sequential(Residual_block(nb_filts=filts[3])),
+            nn.Sequential(Residual_block(nb_filts=filts[4])),
+            nn.Sequential(Residual_block(nb_filts=filts[4])),
+            nn.Sequential(Residual_block(nb_filts=filts[4])))
+        self.pos_S = nn.Parameter(torch.randn(1, 23, filts[-1][-1]))
+        self.master1 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))
+        self.master2 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))
+        self.GAT_layer_S = GraphAttentionLayer(filts[-1][-1],
+                                               gat_dims[0],
+                                               temperature=temperatures[0])
+        self.GAT_layer_T = GraphAttentionLayer(filts[-1][-1],
+                                               gat_dims[0],
+                                               temperature=temperatures[1])
+        self.HtrgGAT_layer_ST11 = HtrgGraphAttentionLayer(
+            gat_dims[0], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST12 = HtrgGraphAttentionLayer(
+            gat_dims[1], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST21 = HtrgGraphAttentionLayer(
+            gat_dims[0], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST22 = HtrgGraphAttentionLayer(
+            gat_dims[1], gat_dims[1], temperature=temperatures[2])
+        self.pool_S = GraphPool(pool_ratios[0], gat_dims[0], 0.3)
+        self.pool_T = GraphPool(pool_ratios[1], gat_dims[0], 0.3)
+        self.pool_hS1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hT1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hS2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hT2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        if "output_cls" in d_args:
+            self.out_layer = nn.Linear(5 * gat_dims[1], d_args["output_cls"])
+        else:
+            self.out_layer = nn.Linear(5 * gat_dims[1], 2)
+    def forward(self, x, Freq_aug=False):
+        x = x.unsqueeze(1)
+        x = self.conv_time(x, mask=Freq_aug)
+        x = x.unsqueeze(dim=1)
+        x = F.max_pool2d(torch.abs(x), (3, 3))
+        x = self.first_bn(x)
+        x = self.selu(x)
+        # get embeddings using encoder
+        # (#bs, #filt, #spec, #seq)
+        e = self.encoder(x)
+        # spectral GAT (GAT-S)
+        e_S, _ = torch.max(torch.abs(e), dim=3)  # max along time
+        e_S = e_S.transpose(1, 2) + self.pos_S
+        gat_S = self.GAT_layer_S(e_S)
+        out_S = self.pool_S(gat_S)  # (#bs, #node, #dim)
+        # temporal GAT (GAT-T)
+        e_T, _ = torch.max(torch.abs(e), dim=2)  # max along freq
+        e_T = e_T.transpose(1, 2)
+        gat_T = self.GAT_layer_T(e_T)
+        out_T = self.pool_T(gat_T)
+        # learnable master node
+        master1 = self.master1.expand(x.size(0), -1, -1)
+        master2 = self.master2.expand(x.size(0), -1, -1)
+        # inference 1
+        out_T1, out_S1, master1 = self.HtrgGAT_layer_ST11(
+            out_T, out_S, master=self.master1)
+        out_S1 = self.pool_hS1(out_S1)
+        out_T1 = self.pool_hT1(out_T1)
+        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST12(
+            out_T1, out_S1, master=master1)
+        out_T1 = out_T1 + out_T_aug
+        out_S1 = out_S1 + out_S_aug
+        master1 = master1 + master_aug
+        # inference 2
+        out_T2, out_S2, master2 = self.HtrgGAT_layer_ST21(
+            out_T, out_S, master=self.master2)
+        out_S2 = self.pool_hS2(out_S2)
+        out_T2 = self.pool_hT2(out_T2)
+        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST22(
+            out_T2, out_S2, master=master2)
+        out_T2 = out_T2 + out_T_aug
+        out_S2 = out_S2 + out_S_aug
+        master2 = master2 + master_aug
+        out_T1 = self.drop_way(out_T1)
+        out_T2 = self.drop_way(out_T2)
+        out_S1 = self.drop_way(out_S1)
+        out_S2 = self.drop_way(out_S2)
+        master1 = self.drop_way(master1)
+        master2 = self.drop_way(master2)
+        out_T = torch.max(out_T1, out_T2)
+        out_S = torch.max(out_S1, out_S2)
+        master = torch.max(master1, master2)
+        T_max, _ = torch.max(torch.abs(out_T), dim=1)
+        T_avg = torch.mean(out_T, dim=1)
+        S_max, _ = torch.max(torch.abs(out_S), dim=1)
+        S_avg = torch.mean(out_S, dim=1)
+        last_hidden = torch.cat(
+            [T_max, T_avg, S_max, S_avg, master.squeeze(1)], dim=1)
+        last_hidden = self.drop(last_hidden)
+        output = self.out_layer(last_hidden)
+        output=F.softmax(output,dim=1)
+        return last_hidden, output
+def obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold):
+    # False alarm and miss rates for ASV
+    Pfa_asv = sum(non_asv >= asv_threshold) / non_asv.size
+    Pmiss_asv = sum(tar_asv < asv_threshold) / tar_asv.size
+    # Rate of rejecting spoofs in ASV
+    if spoof_asv.size == 0:
+        Pmiss_spoof_asv = None
+        Pfa_spoof_asv = None
+    else:
+        Pmiss_spoof_asv = np.sum(spoof_asv < asv_threshold) / spoof_asv.size
+        Pfa_spoof_asv = np.sum(spoof_asv >= asv_threshold) / spoof_asv.size
+    return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv
+def obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold):
+    # False alarm and miss rates for ASV
+    Pfa_asv = sum(non_asv >= asv_threshold) / non_asv.size
+    Pmiss_asv = sum(tar_asv < asv_threshold) / tar_asv.size
+    # Rate of rejecting spoofs in ASV
+    if spoof_asv.size == 0:
+        Pmiss_spoof_asv = None
+        Pfa_spoof_asv = None
+    else:
+        Pmiss_spoof_asv = np.sum(spoof_asv < asv_threshold) / spoof_asv.size
+        Pfa_spoof_asv = np.sum(spoof_asv >= asv_threshold) / spoof_asv.size
+    return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv
+def compute_det_curve(target_scores, nontarget_scores):
+    n_scores = target_scores.size + nontarget_scores.size
+    all_scores = np.concatenate((target_scores, nontarget_scores))
+    labels = np.concatenate(
+        (np.ones(target_scores.size), np.zeros(nontarget_scores.size)))
+    # Sort labels based on scores
+    indices = np.argsort(all_scores, kind='mergesort')
+    labels = labels[indices]
+    # Compute false rejection and false acceptance rates
+    tar_trial_sums = np.cumsum(labels)
+    nontarget_trial_sums = nontarget_scores.size - \
+        (np.arange(1, n_scores + 1) - tar_trial_sums)
+    # false rejection rates
+    frr = np.concatenate(
+        (np.atleast_1d(0), tar_trial_sums / target_scores.size))
+    far = np.concatenate((np.atleast_1d(1), nontarget_trial_sums /
+                          nontarget_scores.size))  # false acceptance rates
+    # Thresholds are the sorted scores
+    thresholds = np.concatenate(
+        (np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))
+    return frr, far, thresholds
+def compute_Pmiss_Pfa_Pspoof_curves(tar_scores, non_scores, spf_scores):
+    # Concatenate all scores and designate arbitrary labels 1=target, 0=nontarget, -1=spoof
+    all_scores = np.concatenate((tar_scores, non_scores, spf_scores))
+    labels = np.concatenate((np.ones(tar_scores.size), np.zeros(non_scores.size), -1*np.ones(spf_scores.size)))
+    # Sort labels based on scores
+    indices = np.argsort(all_scores, kind='mergesort')
+    labels = labels[indices]
+    # Cumulative sums
+    tar_sums    = np.cumsum(labels==1)
+    non_sums    = np.cumsum(labels==0)
+    spoof_sums  = np.cumsum(labels==-1)
+    Pmiss       = np.concatenate((np.atleast_1d(0), tar_sums / tar_scores.size))
+    Pfa_non     = np.concatenate((np.atleast_1d(1), 1 - (non_sums / non_scores.size)))
+    Pfa_spoof   = np.concatenate((np.atleast_1d(1), 1 - (spoof_sums / spf_scores.size)))
+    thresholds  = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))  # Thresholds are the sorted scores
+    return Pmiss, Pfa_non, Pfa_spoof, thresholds
+def compute_eer(target_scores, nontarget_scores):
+    """ Returns equal error rate (EER) and the corresponding threshold. """
+    frr, far, thresholds = compute_det_curve(target_scores, nontarget_scores)
+    abs_diffs = np.abs(frr - far)
+    min_index = np.argmin(abs_diffs)
+    eer = np.mean((frr[min_index], far[min_index]))
+    return eer, frr, far, thresholds
+def compute_mindcf(frr, far, thresholds, Pspoof, Cmiss, Cfa):
+    min_c_det = float("inf")
+    min_c_det_threshold = thresholds
+    p_target = 1- Pspoof
+    for i in range(0, len(frr)):
+        # Weighted sum of false negative and false positive errors.
+        c_det = Cmiss * frr[i] * p_target + Cfa * far[i] * (1 - p_target)
+        if c_det < min_c_det:
+            min_c_det = c_det
+            min_c_det_threshold = thresholds[i]
+    # See Equations (3) and (4).  Now we normalize the cost.
+    c_def = min(Cmiss * p_target, Cfa * (1 - p_target))
+    min_dcf = min_c_det / c_def
+    return min_dcf, min_c_det_threshold
+def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv,
+                 Pmiss_spoof_asv, cost_model, print_cost):
+    # Sanity check of cost parameters
+    if cost_model['Cfa_asv'] < 0 or cost_model['Cmiss_asv'] < 0 or \
+            cost_model['Cfa_cm'] < 0 or cost_model['Cmiss_cm'] < 0:
+        print('WARNING: Usually the cost values should be positive!')
+    if cost_model['Ptar'] < 0 or cost_model['Pnon'] < 0 or cost_model['Pspoof'] < 0 or \
+            np.abs(cost_model['Ptar'] + cost_model['Pnon'] + cost_model['Pspoof'] - 1) > 1e-10:
+        sys.exit(
+            'ERROR: Your prior probabilities should be positive and sum up to one.'
+        )
+    # Unless we evaluate worst-case model, we need to have some spoof tests against asv
+    if Pmiss_spoof_asv is None:
+        sys.exit(
+            'ERROR: you should provide miss rate of spoof tests against your ASV system.'
+        )
+    # Sanity check of scores
+    combined_scores = np.concatenate((bonafide_score_cm, spoof_score_cm))
+    if np.isnan(combined_scores).any() or np.isinf(combined_scores).any():
+        sys.exit('ERROR: Your scores contain nan or inf.')
+    # Sanity check that inputs are scores and not decisions
+    n_uniq = np.unique(combined_scores).size
+    if n_uniq < 3:
+        sys.exit(
+            'ERROR: You should provide soft CM scores - not binary decisions')
+    # Obtain miss and false alarm rates of CM
+    Pmiss_cm, Pfa_cm, CM_thresholds = compute_det_curve(
+        bonafide_score_cm, spoof_score_cm)
+    # Constants - see ASVspoof 2019 evaluation plan
+    C1 = cost_model['Ptar'] * (cost_model['Cmiss_cm'] - cost_model['Cmiss_asv'] * Pmiss_asv) - \
+        cost_model['Pnon'] * cost_model['Cfa_asv'] * Pfa_asv
+    C2 = cost_model['Cfa_cm'] * cost_model['Pspoof'] * (1 - Pmiss_spoof_asv)
+    # Sanity check of the weights
+    if C1 < 0 or C2 < 0:
+        sys.exit(
+            'You should never see this error but I cannot evalute tDCF with negative weights - please check whether your ASV error rates are correctly computed?'
+        )
+    # Obtain t-DCF curve for all thresholds
+    tDCF = C1 * Pmiss_cm + C2 * Pfa_cm
+    # Normalized t-DCF
+    tDCF_norm = tDCF / np.minimum(C1, C2)
+    # Everything should be fine if reaching here.
+    if print_cost:
+        print('t-DCF evaluation from [Nbona={}, Nspoof={}] trials\n'.format(
+            bonafide_score_cm.size, spoof_score_cm.size))
+        print('t-DCF MODEL')
+        print('   Ptar         = {:8.5f} (Prior probability of target user)'.
+              format(cost_model['Ptar']))
+        print(
+            '   Pnon         = {:8.5f} (Prior probability of nontarget user)'.
+            format(cost_model['Pnon']))
+        print(
+            '   Pspoof       = {:8.5f} (Prior probability of spoofing attack)'.
+            format(cost_model['Pspoof']))
+        print(
+            '   Cfa_asv      = {:8.5f} (Cost of ASV falsely accepting a nontarget)'
+            .format(cost_model['Cfa_asv']))
+        print(
+            '   Cmiss_asv    = {:8.5f} (Cost of ASV falsely rejecting target speaker)'
+            .format(cost_model['Cmiss_asv']))
+        print(
+            '   Cfa_cm       = {:8.5f} (Cost of CM falsely passing a spoof to ASV system)'
+            .format(cost_model['Cfa_cm']))
+        print(
+            '   Cmiss_cm     = {:8.5f} (Cost of CM falsely blocking target utterance which never reaches ASV)'
+            .format(cost_model['Cmiss_cm']))
+        print(
+            '\n   Implied normalized t-DCF function (depends on t-DCF parameters and ASV errors), s=CM threshold)'
+        )
+        if C2 == np.minimum(C1, C2):
+            print(
+                '   tDCF_norm(s) = {:8.5f} x Pmiss_cm(s) + Pfa_cm(s)\n'.format(
+                    C1 / C2))
+        else:
+            print(
+                '   tDCF_norm(s) = Pmiss_cm(s) + {:8.5f} x Pfa_cm(s)\n'.format(
+                    C2 / C1))
+    return tDCF_norm, CM_thresholds
+def calculate_CLLR(target_llrs, nontarget_llrs):
+    """
+    Calculate the CLLR of the scores.
+    Parameters:
+    target_llrs (list or numpy array): Log-likelihood ratios for target trials.
+    nontarget_llrs (list or numpy array): Log-likelihood ratios for non-target trials.
+    Returns:
+    float: The calculated CLLR value.
+    """
+    def negative_log_sigmoid(lodds):
+        """
+        Calculate the negative log of the sigmoid function.
+        Parameters:
+        lodds (numpy array): Log-odds values.
+        Returns:
+        numpy array: The negative log of the sigmoid values.
+        """
+        return np.log1p(np.exp(-lodds))
+    # Convert the input lists to numpy arrays if they are not already
+    target_llrs = np.array(target_llrs)
+    nontarget_llrs = np.array(nontarget_llrs)
+    # Calculate the CLLR value
+    cllr = 0.5 * (np.mean(negative_log_sigmoid(target_llrs)) + np.mean(negative_log_sigmoid(-nontarget_llrs))) / np.log(2)
+    return cllr
+def compute_Pmiss_Pfa_Pspoof_curves(tar_scores, non_scores, spf_scores):
+    # Concatenate all scores and designate arbitrary labels 1=target, 0=nontarget, -1=spoof
+    all_scores = np.concatenate((tar_scores, non_scores, spf_scores))
+    labels = np.concatenate((np.ones(tar_scores.size), np.zeros(non_scores.size), -1*np.ones(spf_scores.size)))
+    # Sort labels based on scores
+    indices = np.argsort(all_scores, kind='mergesort')
+    labels = labels[indices]
+    # Cumulative sums
+    tar_sums    = np.cumsum(labels==1)
+    non_sums    = np.cumsum(labels==0)
+    spoof_sums  = np.cumsum(labels==-1)
+    Pmiss       = np.concatenate((np.atleast_1d(0), tar_sums / tar_scores.size))
+    Pfa_non     = np.concatenate((np.atleast_1d(1), 1 - (non_sums / non_scores.size)))
+    Pfa_spoof   = np.concatenate((np.atleast_1d(1), 1 - (spoof_sums / spf_scores.size)))
+    thresholds  = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))  # Thresholds are the sorted scores
+    return Pmiss, Pfa_non, Pfa_spoof, thresholds
+def compute_teer(Pmiss_CM, Pfa_CM, tau_CM, Pmiss_ASV, Pfa_non_ASV, Pfa_spf_ASV, tau_ASV):
+    # Different spoofing prevalence priors (rho) parameters values
+    rho_vals            = [0,0.5,1]
+    tEER_val    = np.empty([len(rho_vals),len(tau_ASV)], dtype=float)
+    for rho_idx, rho_spf in enumerate(rho_vals):
+        # Table to store the CM threshold index, per each of the ASV operating points
+        tEER_idx_CM = np.empty(len(tau_ASV), dtype=int)
+        tEER_path   = np.empty([len(rho_vals),len(tau_ASV),2], dtype=float)
+        # Tables to store the t-EER, total Pfa and total miss valuees along the t-EER path
+        Pmiss_total = np.empty(len(tau_ASV), dtype=float)
+        Pfa_total   = np.empty(len(tau_ASV), dtype=float)
+        min_tEER    = np.inf
+        argmin_tEER = np.empty(2)
+        # best intersection point
+        xpoint_crit_best = np.inf
+        xpoint = np.empty(2)
+        # Loop over all possible ASV thresholds
+        for tau_ASV_idx, tau_ASV_val in enumerate(tau_ASV):
+            # Tandem miss and fa rates as defined in the manuscript
+            Pmiss_tdm = Pmiss_CM + (1 - Pmiss_CM) * Pmiss_ASV[tau_ASV_idx]
+            Pfa_tdm   = (1 - rho_spf) * (1 - Pmiss_CM) * Pfa_non_ASV[tau_ASV_idx] + rho_spf * Pfa_CM * Pfa_spf_ASV[tau_ASV_idx]
+            # Store only the INDEX of the CM threshold (for the current ASV threshold)
+            h = Pmiss_tdm - Pfa_tdm
+            tmp = np.argmin(abs(h))
+            tEER_idx_CM[tau_ASV_idx] = tmp
+            if Pmiss_ASV[tau_ASV_idx] < (1 - rho_spf) * Pfa_non_ASV[tau_ASV_idx] + rho_spf * Pfa_spf_ASV[tau_ASV_idx]:
+                Pmiss_total[tau_ASV_idx] = Pmiss_tdm[tmp]
+                Pfa_total[tau_ASV_idx] = Pfa_tdm[tmp]
+                tEER_val[rho_idx,tau_ASV_idx] = np.mean([Pfa_total[tau_ASV_idx], Pmiss_total[tau_ASV_idx]])
+                tEER_path[rho_idx,tau_ASV_idx, 0] = tau_ASV_val
+                tEER_path[rho_idx,tau_ASV_idx, 1] = tau_CM[tmp]
+                if tEER_val[rho_idx,tau_ASV_idx] < min_tEER:
+                    min_tEER = tEER_val[rho_idx,tau_ASV_idx]
+                    argmin_tEER[0] = tau_ASV_val
+                    argmin_tEER[1] = tau_CM[tmp]
+                # Check how close we are to the INTERSECTION POINT for different prior (rho) values:
+                LHS = Pfa_non_ASV[tau_ASV_idx]/Pfa_spf_ASV[tau_ASV_idx]
+                RHS = Pfa_CM[tmp]/(1 - Pmiss_CM[tmp])
+                crit = abs(LHS - RHS)
+                if crit < xpoint_crit_best:
+                    xpoint_crit_best = crit
+                    xpoint[0] = tau_ASV_val
+                    xpoint[1] = tau_CM[tmp]
+                    xpoint_tEER = Pfa_spf_ASV[tau_ASV_idx]*Pfa_CM[tmp]
+            else:
+                # Not in allowed region
+                tEER_path[rho_idx,tau_ASV_idx, 0] = np.nan
+                tEER_path[rho_idx,tau_ASV_idx, 1] = np.nan
+                Pmiss_total[tau_ASV_idx] = np.nan
+                Pfa_total[tau_ASV_idx] = np.nan
+                tEER_val[rho_idx,tau_ASV_idx] = np.nan
+        return xpoint_tEER*100

evaluation/AASIST/S1_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b36eddfdb4fa2c1dbdf00e57e34b83e841218872da6c6d6f97f9616182a9f876
+size 1277933

evaluation/AASIST/S2_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a333d6c7d7a40cfdb25f69d4ac2dd2bc3731ba71ec3adf58e2dd837bbe1eef93
+size 1277933

evaluation/AASIST/S3_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3eaf2873d0b367721d96ea2407539f19e52700eb0c3c8f6dcf16e9603b02739f
+size 1277933

evaluation/AASIST/S4_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29cf1672b9bdde392de88aa875ca7ea915d750d4ac3d8ed5c93c5e691a3939dd
+size 1277933

evaluation/AASIST/S5_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49c34c6bcadfee296cce9b3ff3ea9e0d7852f39c7cef3ad8b02c16ac213c2427
+size 1277933

evaluation/AASIST/__pycache__/AASIST_util.cpython-310.pyc ADDED Viewed

Binary file (24.6 kB). View file

evaluation/AASIST/__pycache__/AASIST_util.cpython-39.pyc ADDED Viewed

Binary file (24.5 kB). View file