Duplicate from Speech-Arena-2025/DF_Arena_1B_V_1

Browse files

Co-authored-by: Speech Arena <Speech-Arena-2025@users.noreply.huggingface.co>

Files changed (13) hide show

.gitattributes +35 -0
.gitignore +0 -0
LICENSE.txt +65 -0
README.md +98 -0
backbone.py +62 -0
config.json +26 -0
configuration_antispoofing.py +9 -0
conformer.py +284 -0
feature_extraction_antispoofing.py +36 -0
modeling_antispoofing.py +21 -0
pipeline_antispoofing.py +42 -0
preprocessor_config.json +4 -0
pytorch_model.bin +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

File without changes

LICENSE.txt ADDED Viewed

	@@ -0,0 +1,65 @@

+===========================================
+Portions of this software are derived from third-party projects distributed under the MIT License.
+These portions remain under their original MIT terms (see Section 1 below).
+All original contributions and modifications are provided under a
+Non-Commercial License as described in Section 2 below.
+For commercial use, a separate commercial license agreement is required (see Section 3).
+----------------------------------------------------------------------
+Section 1: Upstream Code (MIT License)
+----------------------------------------------------------------------
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the “Software”), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+----------------------------------------------------------------------
+Section 2: Original Contributions (Non-Commercial License)
+----------------------------------------------------------------------
+Permission is hereby granted to use, copy, modify, and distribute the original
+contributions, in source or binary form, for research
+and non-commercial purposes only, subject to the following conditions:
+1. Any distribution of this software must include this license text in full.
+2. Any derivative work must clearly indicate the modifications made and retain
+   the non-commercial restriction.
+3. No part of this software may be sold, licensed, or used in a commercial
+   product or service without prior written permission.
+4. Non-commercial use includes academic research, teaching, and personal experimentation.
+THE ORIGINAL CONTRIBUTIONS ARE PROVIDED “AS IS” WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+----------------------------------------------------------------------
+Section 3: Commercial Licensing
+----------------------------------------------------------------------
+Commercial use of this software, including but not limited to use in products,
+services, or for-profit research, requires a separate commercial license.
+To inquire about commercial licensing, please contact:
+Email: ajinkya.kulkarni@idiap.ch
+----------------------------------------------------------------------
+END OF LICENSE
+----------------------------------------------------------------------

README.md ADDED Viewed

	@@ -0,0 +1,98 @@

+---
+language:
+- en
+tags:
+- audio
+- audio-classification
+- antispoofing
+- deepfake-detection
+- speech
+license: other
+pipeline_tag: audio-classification
+---
+# DF Arena 1B - Antispoofing Model
+We are excited to release DF Arena 1B Universal Antispoofing model 🔥trained on traditional speech antispoofing datasets in addition to singing and environmental deepfake data.
+Check out the release on [DF Arena leaderboard](https://huggingface.co/spaces/Speech-Arena-2025/Speech-DF-Arena)
+# Training Data
+- **ASVspoof 2019, 2024**
+- **Codecfake**
+- **LibriSeVoc**
+- **DFADD**
+- **CTRSVDD**
+- **SpoofCeleb**
+- **MLAAD**
+- **EnvSDD**
+## Usage
+```python
+from transformers import pipeline
+import librosa
+ #load model
+pipe = pipeline("antispoofing", model="Speech-Arena-2025/DF_Arena_1B_V_1", trust_remote_code=True, device='cuda')
+audio, sr = librosa.load("sample.wav", sr=16000)
+result = pipe(audio)
+print(result)
+# Output:
+{'label': 'spoof', 'logits': [[1.5515458583831787, -1.2254822254180908]], 'score': 0.9414217472076416, 'all_scores': {'spoof': 0.9414217472076416, 'bonafide': 0.05857823044061661}}
+```
+# Evaluation
+| Dataset                | EER (%) | F1-score | Accuracy (%) |
+|-------------------------|----------|-----------|---------------|
+| dfadd               | 0.00     | 0.9993    | 99.97         |
+| add_2023_round_2    | 11.54    | 0.9188    | 88.46         |
+| codecfake           | 8.37     | 0.8695    | 91.63         |
+| asvspoof_2021_la    | 4.66     | 0.8037    | 95.34         |
+| in_the_wild         | 0.91     | 0.9928    | 99.10         |
+| asvspoof_2019       | 1.14     | 0.9473    | 98.86         |
+| add_2022_track_1    | 22.21    | 0.6678    | 77.79         |
+| fake_or_real        | 2.92     | 0.9711    | 97.11         |
+| asvspoof_2024       | 17.25    | 0.6615    | 82.75         |
+| add_2022_track_3    | 2.20     | 0.9357    | 97.80         |
+| add_2023_round_1    | 5.08     | 0.9639    | 94.92         |
+| librisevoc          | 0.15     | 0.9958    | 99.84         |
+| asvspoof_2021_df    | 1.75     | 0.7577    | 98.25         |
+| sonar               | 1.09     | 0.9903    | 98.89         |
+| Average               | 5.919     | 0.8863    | 94.079         |
+| Pooled              | 9.52     | 0.81    | 90.47         |
+## License
+We use a non-commercial license which can be found [here](./LICENSE.txt)
+## Contact
+For questions or issues, please open an issue on the model repository or contact us at ajinkya.kulkarni@idiap.ch.
+Stay tuned for upcoming versions of our models!
+## Citation
+If you use this model in your work, it can be cited as :
+```bibtex
+@misc{kulkarni2026compactsslbackbonesmatter,
+      title={Do Compact SSL Backbones Matter for Audio Deepfake Detection? A Controlled Study with RAPTOR},
+      author={Ajinkya Kulkarni and Sandipana Dowerah and Atharva Kulkarni and Tanel Alumäe and Mathew Magimai Doss},
+      year={2026},
+      eprint={2603.06164},
+      archivePrefix={arXiv},
+      primaryClass={cs.SD},
+      url={https://arxiv.org/abs/2603.06164},
+}
+```

backbone.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+from transformers import Wav2Vec2Model, Wav2Vec2Config
+from .conformer import FinalConformer
+class DF_Arena_1B(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.ssl_model = Wav2Vec2Model(Wav2Vec2Config.from_pretrained("facebook/wav2vec2-xls-r-1b"))
+        self.ssl_model.config.output_hidden_states = True
+        self.first_bn = nn.BatchNorm2d(num_features=1)
+        self.selu = nn.SELU(inplace=True)
+        self.fc0 = nn.Linear(1280, 1) #1280 for 1b, 1920 for 2b
+        self.sig = nn.Sigmoid()
+        self.conformer = FinalConformer(emb_size=1280, heads=4, ffmult=4, exp_fac=2, kernel_size=31, n_encoders=4)
+        # Learnable attention weights
+        self.attn_scores = nn.Linear(1280, 1, bias=False)
+    def get_attenF1Dpooling(self, x):
+        #print(x.shape, 'x shape in attnF1Dpooling')
+        logits = self.attn_scores(x)
+        weights = torch.softmax(logits, dim=1)  # (B, T, 1)
+        pooled = torch.sum(weights * x, dim=1, keepdim=True)  # (B, 1, D)
+        return pooled
+    def get_attenF1D(self, layerResult):
+        poollayerResult = []
+        fullf = []
+        for layer in layerResult:
+            # layer shape: (B, D, T)
+            #layery = layer.permute(0, 2, 1)  # (B, T, D)
+            layery = self.get_attenF1Dpooling(layer)  # (B, 1, D)
+            poollayerResult.append(layery)
+            fullf.append(layer.unsqueeze(1))  # (B, 1, D, T)
+        layery = torch.cat(poollayerResult, dim=1)      # (B, L, D)
+        fullfeature = torch.cat(fullf, dim=1)          # (B, L, D, T)
+        return layery, fullfeature
+    def forward(self, x):
+        out_ssl = self.ssl_model(x.unsqueeze(0)) #layerresult = [(x,z),24个] x(201,1,1024) z(1,201,201)
+        y0, fullfeature = self.get_attenF1D(out_ssl.hidden_states)
+        y0 = self.fc0(y0)
+        y0 = self.sig(y0)
+        y0 = y0.view(y0.shape[0], y0.shape[1], y0.shape[2], -1)
+        fullfeature = fullfeature * y0
+        fullfeature = torch.sum(fullfeature, 1)
+        fullfeature = fullfeature.unsqueeze(dim=1)
+        fullfeature = self.first_bn(fullfeature)
+        fullfeature = self.selu(fullfeature)
+        output, _ = self.conformer(fullfeature.squeeze(1))
+        return output

config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "architectures": ["DF-Arena-1B-V0.1"],
+  "model_type": "antispoofing",
+  "num_labels": 2,
+  "id2label": {
+    "1": "bonafide",
+    "0": "spoof"
+  },
+  "label2id": {
+    "bonafide": 1,
+    "spoof": 0
+  },
+  "auto_map": {
+    "AutoConfig": "configuration_antispoofing.DF_Arena_1B_Config",
+    "AutoModel": "modeling_antispoofing.DF_Arena_1B_Antispoofing",
+    "AutoFeatureExtractor": "feature_extraction_antispoofing.AntispoofingFeatureExtractor"
+  },
+  "custom_pipelines": {
+    "antispoofing": {
+      "impl": "pipeline_antispoofing.AntispoofingPipeline",
+      "pt": ["AutoModel"]
+    }
+  }
+}

configuration_antispoofing.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from transformers import PretrainedConfig
+class DF_Arena_1B_Config(PretrainedConfig):
+    model_type = "antispoofing"
+    def __init__(self, num_labels=2, sample_rate=16000, **kwargs):
+        super().__init__(**kwargs)
+        self.num_labels = num_labels
+        self.sample_rate = sample_rate
+        self.out_dim = 1024

conformer.py ADDED Viewed

	@@ -0,0 +1,284 @@

+import math
+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+import torch
+import torch.nn as nn
+from torch.nn.modules.transformer import _get_clones
+from torch import Tensor
+from einops import rearrange
+from einops.layers.torch import Rearrange
+# helper functions
+def exists(val):
+    return val is not None
+def default(val, d):
+    return val if exists(val) else d
+def calc_same_padding(kernel_size):
+    pad = kernel_size // 2
+    return (pad, pad - (kernel_size + 1) % 2)
+# helper classes
+class Swish(nn.Module):
+    def forward(self, x):
+        return x * x.sigmoid()
+class GLU(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+    def forward(self, x):
+        out, gate = x.chunk(2, dim=self.dim)
+        return out * gate.sigmoid()
+class DepthWiseConv1d(nn.Module):
+    def __init__(self, chan_in, chan_out, kernel_size, padding):
+        super().__init__()
+        self.padding = padding
+        self.conv = nn.Conv1d(chan_in, chan_out, kernel_size, groups = chan_in)
+    def forward(self, x):
+        x = F.pad(x, self.padding)
+        return self.conv(x)
+# attention, feedforward, and conv module
+class Scale(nn.Module):
+    def __init__(self, scale, fn):
+        super().__init__()
+        self.fn = fn
+        self.scale = scale
+    def forward(self, x, **kwargs):
+        return self.fn(x, **kwargs) * self.scale
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.fn = fn
+        self.norm = nn.LayerNorm(dim)
+    def forward(self, x, **kwargs):
+        x = self.norm(x)
+        return self.fn(x, **kwargs)
+class Attention(nn.Module):
+    # Head Token attention: https://arxiv.org/pdf/2210.05958.pdf
+    def __init__(self, dim, heads=8, dim_head=64, qkv_bias=False, dropout=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = heads
+        inner_dim = dim_head * heads
+        self.scale = dim_head ** -0.5
+        self.qkv = nn.Linear(dim, inner_dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(dropout)
+        self.proj = nn.Linear(inner_dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.act = nn.GELU()
+        self.ht_proj = nn.Linear(dim_head, dim,bias=True)
+        self.ht_norm = nn.LayerNorm(dim_head)
+        self.pos_embed = nn.Parameter(torch.zeros(1, self.num_heads, dim))
+    def forward(self, x, mask=None):
+        B, N, C = x.shape
+        # head token
+        head_pos = self.pos_embed.expand(x.shape[0], -1, -1)
+        x_ = x.reshape(B, -1, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+        x_ = x_.mean(dim=2)  # now the shape is [B, h, 1, d//h]
+        x_ = self.ht_proj(x_).reshape(B, -1, self.num_heads, C // self.num_heads)
+        x_ = self.act(self.ht_norm(x_)).flatten(2)
+        x_ = x_ + head_pos
+        x = torch.cat([x, x_], dim=1)
+        # normal mhsa
+        qkv = self.qkv(x).reshape(B, N+self.num_heads, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]   # make torchscript happy (cannot use tensor as tuple)
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        # attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N+self.num_heads, C)
+        x = self.proj(x)
+        # merge head tokens into cls token
+        cls, patch, ht = torch.split(x, [1, N-1, self.num_heads], dim=1)
+        cls = cls + torch.mean(ht, dim=1, keepdim=True) + torch.mean(patch, dim=1, keepdim=True)
+        x = torch.cat([cls, patch], dim=1)
+        x = self.proj_drop(x)
+        return x, attn
+class FeedForward(nn.Module):
+    def __init__(
+        self,
+        dim,
+        mult = 4,
+        dropout = 0.
+    ):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, dim * mult),
+            Swish(),
+            nn.Dropout(dropout),
+            nn.Linear(dim * mult, dim),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        return self.net(x)
+class ConformerConvModule(nn.Module):
+    def __init__(
+        self,
+        dim,
+        causal = False,
+        expansion_factor = 2,
+        kernel_size = 31,
+        dropout = 0.
+    ):
+        super().__init__()
+        inner_dim = dim * expansion_factor
+        padding = calc_same_padding(kernel_size) if not causal else (kernel_size - 1, 0)
+        self.net = nn.Sequential(
+            nn.LayerNorm(dim),
+            Rearrange('b n c -> b c n'),
+            nn.Conv1d(dim, inner_dim * 2, 1),
+            GLU(dim=1),
+            DepthWiseConv1d(inner_dim, inner_dim, kernel_size = kernel_size, padding = padding),
+            nn.BatchNorm1d(inner_dim) if not causal else nn.Identity(),
+            Swish(),
+            nn.Conv1d(inner_dim, dim, 1),
+            Rearrange('b c n -> b n c'),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        return self.net(x)
+# Conformer Block
+class ConformerBlock(nn.Module):
+    def __init__(
+        self,
+        *,
+        dim,
+        dim_head = 64,
+        heads = 8,
+        ff_mult = 4,
+        conv_expansion_factor = 2,
+        conv_kernel_size = 31,
+        attn_dropout = 0.,
+        ff_dropout = 0.,
+        conv_dropout = 0.,
+        conv_causal = False
+    ):
+        super().__init__()
+        self.ff1 = FeedForward(dim = dim, mult = ff_mult, dropout = ff_dropout)
+        self.attn = Attention(dim = dim, dim_head = dim_head, heads = heads, dropout = attn_dropout)
+        self.conv = ConformerConvModule(dim = dim, causal = conv_causal, expansion_factor = conv_expansion_factor, kernel_size = conv_kernel_size, dropout = conv_dropout)
+        self.ff2 = FeedForward(dim = dim, mult = ff_mult, dropout = ff_dropout)
+        self.attn = PreNorm(dim, self.attn)
+        self.ff1 = Scale(0.5, PreNorm(dim, self.ff1))
+        self.ff2 = Scale(0.5, PreNorm(dim, self.ff2))
+        self.post_norm = nn.LayerNorm(dim)
+    def forward(self, x, mask = None):
+        x = self.ff1(x) + x
+        attn_x, attn_weight = self.attn(x, mask = mask)
+        x = attn_x + x
+        x = self.conv(x) + x
+        x = self.ff2(x) + x
+        x = self.post_norm(x)
+        return x, attn_weight
+# Conformer
+class Conformer(nn.Module):
+    def __init__(
+        self,
+        dim,
+        *,
+        depth,
+        dim_head = 64,
+        heads = 8,
+        ff_mult = 4,
+        conv_expansion_factor = 2,
+        conv_kernel_size = 31,
+        attn_dropout = 0.,
+        ff_dropout = 0.,
+        conv_dropout = 0.,
+        conv_causal = False
+    ):
+        super().__init__()
+        self.dim = dim
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            self.layers.append(ConformerBlock(
+                dim = dim,
+                dim_head = dim_head,
+                heads = heads,
+                ff_mult = ff_mult,
+                conv_expansion_factor = conv_expansion_factor,
+                conv_kernel_size = conv_kernel_size,
+                conv_causal = conv_causal
+            ))
+    def forward(self, x):
+        for block in self.layers:
+            x = block(x)
+        return x
+def sinusoidal_embedding(n_channels, dim):
+    pe = torch.FloatTensor([[p / (10000 ** (2 * (i // 2) / dim)) for i in range(dim)]
+                            for p in range(n_channels)])
+    pe[:, 0::2] = torch.sin(pe[:, 0::2])
+    pe[:, 1::2] = torch.cos(pe[:, 1::2])
+    return pe.unsqueeze(0)
+class FinalConformer(nn.Module):
+  def __init__(self, emb_size=128, heads=4, ffmult=4, exp_fac=2, kernel_size=16, n_encoders=1):
+    super(FinalConformer, self).__init__()
+    self.dim_head=int(emb_size/heads)
+    self.dim=emb_size
+    self.heads=heads
+    self.kernel_size=kernel_size
+    self.n_encoders=n_encoders
+    self.positional_emb = nn.Parameter(sinusoidal_embedding(10000, emb_size), requires_grad=False)
+    self.encoder_blocks=_get_clones(ConformerBlock( dim = emb_size, dim_head=self.dim_head, heads= heads,
+    ff_mult = ffmult, conv_expansion_factor = exp_fac, conv_kernel_size = kernel_size),
+    n_encoders)
+    self.class_token = nn.Parameter(torch.rand(1, emb_size))
+    self.fc5 = nn.Linear(emb_size, 2)
+  def forward(self, x): # x shape [bs, tiempo, frecuencia]
+    x = x + self.positional_emb[:, :x.size(1), :]
+    x = torch.stack([torch.vstack((self.class_token, x[i])) for i in range(len(x))])#[bs,1+tiempo,emb_size]
+    list_attn_weight = []
+    for layer in self.encoder_blocks:
+            x, attn_weight = layer(x) #[bs,1+tiempo,emb_size]
+            list_attn_weight.append(attn_weight)
+    embedding=x[:,0,:] #[bs, emb_size]
+    out=self.fc5(embedding) #[bs,2]
+    return out, list_attn_weight

feature_extraction_antispoofing.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from transformers import SequenceFeatureExtractor
+import numpy as np
+import torch
+class AntispoofingFeatureExtractor(SequenceFeatureExtractor):
+    def __init__(
+        self,
+        feature_size=1,
+        sampling_rate=16000,
+        padding_value=0.0,
+        return_attention_mask=True,
+        **kwargs
+    ):
+        super().__init__(
+            feature_size=feature_size,
+            sampling_rate=sampling_rate,
+            padding_value=padding_value,
+            **kwargs
+        )
+        self.return_attention_mask = return_attention_mask
+    def __call__(self, audio, sampling_rate=None, return_tensors=True, **kwargs):
+        audio = self.pad(audio, 64600)
+        audio = torch.Tensor(audio)
+        return {
+            "input_values": audio
+        }
+    def pad(self, x, max_len):
+        x_len = x.shape[0]
+        if x_len >= max_len:
+            return x[:max_len]
+        num_repeats = int(max_len / x_len)+1
+        padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
+        return padded_x

modeling_antispoofing.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import torch
+import torch.nn as nn
+from transformers import PreTrainedModel
+from .configuration_antispoofing import DF_Arena_1B_Config
+from .backbone import DF_Arena_1B
+from .feature_extraction_antispoofing import AntispoofingFeatureExtractor
+class DF_Arena_1B_Antispoofing(PreTrainedModel):
+    config_class = DF_Arena_1B_Config
+    def __init__(self, config: DF_Arena_1B_Config):
+        super().__init__(config)
+        self.feature_extractor = AntispoofingFeatureExtractor()
+        # your backbone here (CNN/TDNN/Wav2Vec front-end, etc.)
+        self.backbone = DF_Arena_1B()
+        self.post_init()
+    def forward(self, input_values, attention_mask=None):
+        # input_values: (batch, time) float32 waveform @ config.sample_rate
+        logits = self.backbone(input_values)
+        return {"logits": logits}

pipeline_antispoofing.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from transformers import Pipeline
+import torch
+from .feature_extraction_antispoofing import AntispoofingFeatureExtractor
+class AntispoofingPipeline(Pipeline):
+    def __init__(self, model, **kwargs):
+        super().__init__(model=model, **kwargs)
+        self.feature_extractor = AntispoofingFeatureExtractor()
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        postprocess_kwargs = {}
+        if "sampling_rate" in kwargs:
+            preprocess_kwargs["sampling_rate"] = kwargs["sampling_rate"]
+        return preprocess_kwargs, {}, postprocess_kwargs
+    def preprocess(self, audio, sampling_rate=16000):
+        audio = self.feature_extractor(audio)['input_values']
+        inputs = {"input_values": audio}
+        return inputs
+    def _forward(self, model_inputs):
+        outputs = self.model(**model_inputs)
+        return outputs
+    def postprocess(self, model_outputs):
+        logits = model_outputs['logits']
+        probs = torch.nn.functional.softmax(logits, dim=-1)
+        predicted_class = torch.argmax(probs, dim=-1).item()
+        confidence = probs[0][predicted_class].item()
+        return {
+            "label": self.model.config.id2label[predicted_class],
+            "logits": logits.tolist(),
+            "score": confidence,
+            "all_scores": {
+                self.model.config.id2label[i]: probs[0][i].item()
+                for i in range(len(probs[0]))
+            }
+        }

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "feature_extractor_type": "AntispoofingFeatureExtractor",
+  "processor_class": "feature_extraction_antispoofing.AntispoofingFeatureExtractor"
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:780bc14fd4c15e65d58efdef728427cf03cd29cd60be528e97badf8c89087988
+size 4591794734