| """Standalone evaluation for Res2TCNGuard. |
| |
| The network definition lives in ``_net.py`` (in this repo). This script loads |
| the pretrained checkpoint ``best_1.495.pth`` and scores audio, returning a |
| bona-fide score where **higher = more bona fide**. |
| |
| Dependencies: torch, numpy (plus soundfile + scipy for the file demo below). |
| |
| python evaluate.py path/to/audio.wav |
| """ |
| from __future__ import annotations |
|
|
| import numpy as np |
| import torch |
|
|
| from _net import TestModel |
|
|
| CUT = 64600 |
| SAMPLE_RATE = 16000 |
|
|
|
|
| def pad_fixed(x: np.ndarray, max_len: int = CUT) -> np.ndarray: |
| """Deterministic window: first ``max_len`` samples; tile-repeat if shorter. |
| |
| This is exactly the windowing used to produce the Arena scores (no random |
| crop), so results are reproducible. |
| """ |
| x = np.asarray(x, dtype=np.float32).reshape(-1) |
| n = x.shape[0] |
| if n >= max_len: |
| return x[:max_len] |
| reps = max_len // n + 1 |
| return np.tile(x, reps)[:max_len].astype(np.float32) |
|
|
|
|
| def load_model(ckpt: str = "best_1.495.pth", device: str = "cpu") -> TestModel: |
| model = TestModel() |
| sd = torch.load(ckpt, map_location="cpu") |
| sd = sd.get("state_dict", sd) |
| model.load_state_dict(sd, strict=True) |
| return model.eval().to(device) |
|
|
|
|
| @torch.no_grad() |
| def score(model: TestModel, audio: np.ndarray, device: str = "cpu") -> float: |
| """Score one utterance (float32 mono 16 kHz waveform). Higher = bona fide.""" |
| x = torch.from_numpy(pad_fixed(audio))[None].to(device) |
| _, logits = model(x) |
| return float(logits[0, 1]) |
|
|
|
|
| if __name__ == "__main__": |
| import sys |
| from math import gcd |
|
|
| import soundfile as sf |
| from scipy.signal import resample_poly |
|
|
| audio, sr = sf.read(sys.argv[1]) |
| if audio.ndim == 2: |
| audio = audio.mean(axis=1) |
| audio = audio.astype(np.float32) |
| if sr != SAMPLE_RATE: |
| g = gcd(int(sr), SAMPLE_RATE) |
| audio = resample_poly(audio, SAMPLE_RATE // g, int(sr) // g).astype(np.float32) |
|
|
| model = load_model(device="cpu") |
| print(f"bona-fide score: {score(model, audio):.6f} (higher = more bona fide)") |
|
|