Spaces:

MeysamSh
/

Speech-DeepFake-Detection

Running

App Files Files Community

MeysamSh commited on Nov 20, 2025

Commit

c49e455

1 Parent(s): 56053bc

Add application file

Browse files

Files changed (8) hide show

AASIST_ASVspoof5_Exp4_CL.conf +40 -0
Web/index.html +162 -0
Web/recorder.js +357 -0
Web/script.js +498 -0
Web/styles.css +25 -0
calculate_modules.py +333 -0
docker-compose.yml +21 -0
model_utils.py +671 -0

AASIST_ASVspoof5_Exp4_CL.conf ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+    "database_path": "/lium/corpus/vrac/asini/deepfake_dataset/ASVspoof5_2024/",
+    "train_path": "ASVspoof5.train.metadata.txt",
+    "dev_path": "ASVspoof5.dev.metadata.txt",
+    "model_path": "./models/weights/AASIST/Exp4_CL/best.pth",
+    "score_file_dir":"exp_result/AASIST_ASVspoof5_Exp4_eval_train_ep50_bs64/eval_scores_using_best_dev_model_onTrain.txt",
+    "split_num":5,
+    "accumulating":"False",
+    "re_init_optim":"False",
+    "train_wav_path":"flac_T/",
+    "dev_wav_path":"flac_D/",
+    "debug_mode": "False",
+    "batch_size": 64,
+    "num_epochs": 20,
+    "loss": "CCE",
+    "track": "LA",
+    "eval_all_best": "True",
+    "eval_output": "eval_scores_using_best_dev_model.txt",
+    "cudnn_deterministic_toggle": "True",
+    "cudnn_benchmark_toggle": "False",
+    "model_config": {
+        "architecture": "AASIST",
+        "nb_samp": 64600,
+        "first_conv": 128,
+        "filts": [70, [1, 32], [32, 32], [32, 64], [64, 64]],
+        "gat_dims": [64, 32],
+        "pool_ratios": [0.5, 0.7, 0.5, 0.5],
+        "temperatures": [2.0, 2.0, 100.0, 100.0],
+        "output_cls": 9
+    },
+    "optim_config": {
+        "optimizer": "adam",
+        "amsgrad": "False",
+        "base_lr": 0.0001,
+        "lr_min": 0.000005,
+        "betas": [0.9, 0.999],
+        "weight_decay": 0.0001,
+        "scheduler": "cosine"
+    }
+}

Web/index.html ADDED Viewed

	@@ -0,0 +1,162 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Audio Analysis API</title>
+  <link rel="stylesheet" href="styles.css">
+  <!-- Bootstrap CSS -->
+  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
+  <style>
+    body {
+      background-color: #f8f9fa;
+      padding: 20px;
+    }
+    .container {
+      max-width: 800px;
+      margin: 0 auto;
+      background: #fff;
+      padding: 30px;
+      border-radius: 10px;
+      box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    h1 {
+      text-align: center;
+      margin-bottom: 20px;
+      color: #333;
+      font-weight: bold;
+    }
+    h2 {
+      color: #555;
+      margin-bottom: 20px;
+      font-size: 1.5rem;
+    }
+    .btn {
+      margin: 5px;
+      font-weight: 500;
+    }
+    #recordingsList {
+      margin-top: 20px;
+    }
+    .response {
+      margin-top: 20px;
+      padding: 15px;
+      background-color: #e9ecef;
+      border-radius: 5px;
+      color: #333;
+      font-size: 1.1rem;
+    }
+    .metadata {
+      margin-top: 20px;
+      padding: 15px;
+      background-color: #f1f3f4;
+      border-radius: 5px;
+      color: #333;
+      font-size: 1.1rem;
+    }
+    .list-group-item {
+      display: flex;
+      justify-content: space-between;
+      align-items: center;
+    }
+    .list-group-item a {
+      text-decoration: none;
+      color: #0d6efd;
+    }
+    .list-group-item a:hover {
+      text-decoration: underline;
+    }
+    #controls {
+      margin-bottom: 20px;
+    }
+    #formats {
+      font-size: 0.9rem;
+      color: #666;
+      margin-bottom: 10px;
+    }
+  </style>
+</head>
+<body>
+  <div class="container">
+    <h1>Audio Analysis API</h1>
+    <h2>Upload or Record Audio Files</h2>
+    <!-- Form for Uploading Files -->
+    <form id="upload-form" class="mb-4">
+      <div class="mb-3">
+        <input type="file" id="audio-file" class="form-control" accept="audio/*" multiple />
+      </div>
+      <button type="button" id="upload-button" class="btn btn-primary w-100">Upload & Analyze</button>
+    </form>
+    <hr>
+    <!-- Buttons for Recording Audio -->
+    <div id="controls" class="mb-4 text-center">
+      <button id="recordButton" class="btn btn-success">Record</button>
+      <button id="pauseButton" class="btn btn-warning" disabled>Pause</button>
+      <button id="stopButton" class="btn btn-danger" disabled>Stop</button>
+    </div>
+    <div id="formats" class="mb-3 text-center">Format: Start recording to see sample rate</div>
+    <p class="text-center"><strong>Recordings:</strong></p>
+    <ol id="recordingsList" class="list-group"></ol>
+    <!-- Metadata Display -->
+    <div class="metadata mt-4">
+      <h3>File Metadata</h3>
+      <!-- Dropdown Filters -->
+      <div class="mb-3 d-flex flex-wrap gap-3">
+        <i>Choisir un Label</i>
+        <select id="filter-label" class="form-select">
+          <option value="">All Labels</option>
+        </select>
+        <i>Choisir un System</i>
+        <select id="filter-system" class="form-select">
+          <option value="">All Systems</option>
+        </select>
+        <i>Choisir un Codec</i>
+        <select id="filter-codec" class="form-select">
+          <option value="">All Codecs</option>
+        </select>
+        <i>Choisir un Genre</i>
+        <select id="filter-genre" class="form-select">
+          <option value="">All Genres</option>
+        </select>
+        <i>Choisir une Année</i>
+        <select id="filter-year" class="form-select">
+          <option value="">All Years</option>
+        </select>
+      </div>
+      <div id="metadata-display"></div>
+    </div>
+    <!-- Response Display -->
+    <div class="response mt-4">
+      <h3>Analysis Results</h3>
+      <div id="response"></div>
+    </div>
+  </div>
+  <!-- Load Recorder.js and your script.js -->
+  <script src="recorder.js"></script>
+  <script src="script.js"></script>
+</body>
+</html>

Web/recorder.js ADDED Viewed

	@@ -0,0 +1,357 @@

+(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Recorder = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
+"use strict";
+module.exports = require("./recorder").Recorder;
+},{"./recorder":2}],2:[function(require,module,exports){
+'use strict';
+var _createClass = (function () {
+    function defineProperties(target, props) {
+        for (var i = 0; i < props.length; i++) {
+            var descriptor = props[i];descriptor.enumerable = descriptor.enumerable || false;descriptor.configurable = true;if ("value" in descriptor) descriptor.writable = true;Object.defineProperty(target, descriptor.key, descriptor);
+        }
+    }return function (Constructor, protoProps, staticProps) {
+        if (protoProps) defineProperties(Constructor.prototype, protoProps);if (staticProps) defineProperties(Constructor, staticProps);return Constructor;
+    };
+})();
+Object.defineProperty(exports, "__esModule", {
+    value: true
+});
+exports.Recorder = undefined;
+var _inlineWorker = require('inline-worker');
+var _inlineWorker2 = _interopRequireDefault(_inlineWorker);
+function _interopRequireDefault(obj) {
+    return obj && obj.__esModule ? obj : { default: obj };
+}
+function _classCallCheck(instance, Constructor) {
+    if (!(instance instanceof Constructor)) {
+        throw new TypeError("Cannot call a class as a function");
+    }
+}
+var Recorder = exports.Recorder = (function () {
+    function Recorder(source, cfg) {
+        var _this = this;
+        _classCallCheck(this, Recorder);
+        this.config = {
+            bufferLen: 4096,
+            numChannels: 2,
+            mimeType: 'audio/wav'
+        };
+        this.recording = false;
+        this.callbacks = {
+            getBuffer: [],
+            exportWAV: []
+        };
+        Object.assign(this.config, cfg);
+        this.context = source.context;
+        this.node = (this.context.createScriptProcessor || this.context.createJavaScriptNode).call(this.context, this.config.bufferLen, this.config.numChannels, this.config.numChannels);
+        this.node.onaudioprocess = function (e) {
+            if (!_this.recording) return;
+            var buffer = [];
+            for (var channel = 0; channel < _this.config.numChannels; channel++) {
+                buffer.push(e.inputBuffer.getChannelData(channel));
+            }
+            _this.worker.postMessage({
+                command: 'record',
+                buffer: buffer
+            });
+        };
+        source.connect(this.node);
+        this.node.connect(this.context.destination); //this should not be necessary
+        var self = {};
+        this.worker = new _inlineWorker2.default(function () {
+            var recLength = 0,
+                recBuffers = [],
+                sampleRate = undefined,
+                numChannels = undefined;
+            self.onmessage = function (e) {
+                switch (e.data.command) {
+                    case 'init':
+                        init(e.data.config);
+                        break;
+                    case 'record':
+                        record(e.data.buffer);
+                        break;
+                    case 'exportWAV':
+                        exportWAV(e.data.type);
+                        break;
+                    case 'getBuffer':
+                        getBuffer();
+                        break;
+                    case 'clear':
+                        clear();
+                        break;
+                }
+            };
+            function init(config) {
+                sampleRate = config.sampleRate;
+                numChannels = config.numChannels;
+                initBuffers();
+            }
+            function record(inputBuffer) {
+                for (var channel = 0; channel < numChannels; channel++) {
+                    recBuffers[channel].push(inputBuffer[channel]);
+                }
+                recLength += inputBuffer[0].length;
+            }
+            function exportWAV(type) {
+                var buffers = [];
+                for (var channel = 0; channel < numChannels; channel++) {
+                    buffers.push(mergeBuffers(recBuffers[channel], recLength));
+                }
+                var interleaved = undefined;
+                if (numChannels === 2) {
+                    interleaved = interleave(buffers[0], buffers[1]);
+                } else {
+                    interleaved = buffers[0];
+                }
+                var dataview = encodeWAV(interleaved);
+                var audioBlob = new Blob([dataview], { type: type });
+                self.postMessage({ command: 'exportWAV', data: audioBlob });
+            }
+            function getBuffer() {
+                var buffers = [];
+                for (var channel = 0; channel < numChannels; channel++) {
+                    buffers.push(mergeBuffers(recBuffers[channel], recLength));
+                }
+                self.postMessage({ command: 'getBuffer', data: buffers });
+            }
+            function clear() {
+                recLength = 0;
+                recBuffers = [];
+                initBuffers();
+            }
+            function initBuffers() {
+                for (var channel = 0; channel < numChannels; channel++) {
+                    recBuffers[channel] = [];
+                }
+            }
+            function mergeBuffers(recBuffers, recLength) {
+                var result = new Float32Array(recLength);
+                var offset = 0;
+                for (var i = 0; i < recBuffers.length; i++) {
+                    result.set(recBuffers[i], offset);
+                    offset += recBuffers[i].length;
+                }
+                return result;
+            }
+            function interleave(inputL, inputR) {
+                var length = inputL.length + inputR.length;
+                var result = new Float32Array(length);
+                var index = 0,
+                    inputIndex = 0;
+                while (index < length) {
+                    result[index++] = inputL[inputIndex];
+                    result[index++] = inputR[inputIndex];
+                    inputIndex++;
+                }
+                return result;
+            }
+            function floatTo16BitPCM(output, offset, input) {
+                for (var i = 0; i < input.length; i++, offset += 2) {
+                    var s = Math.max(-1, Math.min(1, input[i]));
+                    output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
+                }
+            }
+            function writeString(view, offset, string) {
+                for (var i = 0; i < string.length; i++) {
+                    view.setUint8(offset + i, string.charCodeAt(i));
+                }
+            }
+            function encodeWAV(samples) {
+                var buffer = new ArrayBuffer(44 + samples.length * 2);
+                var view = new DataView(buffer);
+                /* RIFF identifier */
+                writeString(view, 0, 'RIFF');
+                /* RIFF chunk length */
+                view.setUint32(4, 36 + samples.length * 2, true);
+                /* RIFF type */
+                writeString(view, 8, 'WAVE');
+                /* format chunk identifier */
+                writeString(view, 12, 'fmt ');
+                /* format chunk length */
+                view.setUint32(16, 16, true);
+                /* sample format (raw) */
+                view.setUint16(20, 1, true);
+                /* channel count */
+                view.setUint16(22, numChannels, true);
+                /* sample rate */
+                view.setUint32(24, sampleRate, true);
+                /* byte rate (sample rate * block align) */
+                view.setUint32(28, sampleRate * 4, true);
+                /* block align (channel count * bytes per sample) */
+                view.setUint16(32, numChannels * 2, true);
+                /* bits per sample */
+                view.setUint16(34, 16, true);
+                /* data chunk identifier */
+                writeString(view, 36, 'data');
+                /* data chunk length */
+                view.setUint32(40, samples.length * 2, true);
+                floatTo16BitPCM(view, 44, samples);
+                return view;
+            }
+        }, self);
+        this.worker.postMessage({
+            command: 'init',
+            config: {
+                sampleRate: this.context.sampleRate,
+                numChannels: this.config.numChannels
+            }
+        });
+        this.worker.onmessage = function (e) {
+            var cb = _this.callbacks[e.data.command].pop();
+            if (typeof cb == 'function') {
+                cb(e.data.data);
+            }
+        };
+    }
+    _createClass(Recorder, [{
+        key: 'record',
+        value: function record() {
+            this.recording = true;
+        }
+    }, {
+        key: 'stop',
+        value: function stop() {
+            this.recording = false;
+        }
+    }, {
+        key: 'clear',
+        value: function clear() {
+            this.worker.postMessage({ command: 'clear' });
+        }
+    }, {
+        key: 'getBuffer',
+        value: function getBuffer(cb) {
+            cb = cb || this.config.callback;
+            if (!cb) throw new Error('Callback not set');
+            this.callbacks.getBuffer.push(cb);
+            this.worker.postMessage({ command: 'getBuffer' });
+        }
+    }, {
+        key: 'exportWAV',
+        value: function exportWAV(cb, mimeType) {
+            mimeType = mimeType || this.config.mimeType;
+            cb = cb || this.config.callback;
+            if (!cb) throw new Error('Callback not set');
+            this.callbacks.exportWAV.push(cb);
+            this.worker.postMessage({
+                command: 'exportWAV',
+                type: mimeType
+            });
+        }
+    }], [{
+        key: 'forceDownload',
+        value: function forceDownload(blob, filename) {
+            var url = (window.URL || window.webkitURL).createObjectURL(blob);
+            var link = window.document.createElement('a');
+            link.href = url;
+            link.download = filename || 'output.wav';
+            var click = document.createEvent("Event");
+            click.initEvent("click", true, true);
+            link.dispatchEvent(click);
+        }
+    }]);
+    return Recorder;
+})();
+exports.default = Recorder;
+},{"inline-worker":3}],3:[function(require,module,exports){
+"use strict";
+module.exports = require("./inline-worker");
+},{"./inline-worker":4}],4:[function(require,module,exports){
+(function (global){
+"use strict";
+var _createClass = (function () { function defineProperties(target, props) { for (var key in props) { var prop = props[key]; prop.configurable = true; if (prop.value) prop.writable = true; } Object.defineProperties(target, props); } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })();
+var _classCallCheck = function (instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } };
+var WORKER_ENABLED = !!(global === global.window && global.URL && global.Blob && global.Worker);
+var InlineWorker = (function () {
+  function InlineWorker(func, self) {
+    var _this = this;
+    _classCallCheck(this, InlineWorker);
+    if (WORKER_ENABLED) {
+      var functionBody = func.toString().trim().match(/^function\s*\w*\s*\([\w\s,]*\)\s*{([\w\W]*?)}$/)[1];
+      var url = global.URL.createObjectURL(new global.Blob([functionBody], { type: "text/javascript" }));
+      return new global.Worker(url);
+    }
+    this.self = self;
+    this.self.postMessage = function (data) {
+      setTimeout(function () {
+        _this.onmessage({ data: data });
+      }, 0);
+    };
+    setTimeout(function () {
+      func.call(self);
+    }, 0);
+  }
+  _createClass(InlineWorker, {
+    postMessage: {
+      value: function postMessage(data) {
+        var _this = this;
+        setTimeout(function () {
+          _this.self.onmessage({ data: data });
+        }, 0);
+      }
+    }
+  });
+  return InlineWorker;
+})();
+module.exports = InlineWorker;
+}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
+},{}]},{},[1])(1)
+});

Web/script.js ADDED Viewed

	@@ -0,0 +1,498 @@

+const uploadButton = document.getElementById('upload-button');
+const audioFileInput = document.getElementById('audio-file');
+const recordButton = document.getElementById('recordButton');
+const stopButton = document.getElementById('stopButton');
+const pauseButton = document.getElementById('pauseButton');
+const responseDiv = document.getElementById('response');
+const metadataDisplay = document.getElementById('metadata-display');
+let gumStream;
+let rec;
+let input;
+let audioContext;
+function startAudioContext() {
+    if (!audioContext) {
+      audioContext = new (window.AudioContext || window.webkitAudioContext)();
+    } else if (audioContext.state === 'suspended') {
+      audioContext.resume().then(() => {
+        console.log('AudioContext repris');
+      });
+    }
+  }
+// Fonction pour rééchantillonner l'audio à 16 kHz
+async function resampleAudio(blob, targetSampleRate = 16000) {
+    return new Promise((resolve, reject) => {
+      const reader = new FileReader();
+      reader.onload = async () => {
+        const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+        const buffer = await audioContext.decodeAudioData(reader.result);
+        // Créer un nouvel AudioContext avec le taux d'échantillonnage cible
+        const offlineContext = new OfflineAudioContext(
+          buffer.numberOfChannels,
+          buffer.length * (targetSampleRate / buffer.sampleRate),
+          targetSampleRate
+        );
+        // Créer une source audio avec le buffer original
+        const source = offlineContext.createBufferSource();
+        source.buffer = buffer;
+        // Connecter la source au contexte offline
+        source.connect(offlineContext.destination);
+        source.start();
+        // Rendre l'audio
+        const resampledBuffer = await offlineContext.startRendering();
+        // Convertir le buffer rééchantillonné en WAV
+        const wavBlob = bufferToWav(resampledBuffer);
+        resolve(wavBlob);
+      };
+      reader.onerror = reject;
+      reader.readAsArrayBuffer(blob);
+    });
+}
+// Fonction pour convertir un AudioBuffer en WAV
+function bufferToWav(buffer) {
+  const numChannels = buffer.numberOfChannels;
+  const sampleRate = buffer.sampleRate;
+  const length = buffer.length * numChannels * 2; // 2 bytes par échantillon
+  const data = new Float32Array(length);
+  // Interleave les canaux
+  for (let channel = 0; channel < numChannels; channel++) {
+    const channelData = buffer.getChannelData(channel);
+    for (let i = 0; i < channelData.length; i++) {
+      data[i * numChannels + channel] = channelData[i];
+    }
+  }
+  // Encoder en WAV
+  const wavBlob = encodeWAV(data, sampleRate, numChannels);
+  return wavBlob;
+}
+// Fonction pour encoder des données audio en WAV
+function encodeWAV(samples, sampleRate, numChannels) {
+  const buffer = new ArrayBuffer(44 + samples.length * 2);
+  const view = new DataView(buffer);
+  // Écrire l'en-tête WAV
+  writeString(view, 0, 'RIFF');
+  view.setUint32(4, 36 + samples.length * 2, true);
+  writeString(view, 8, 'WAVE');
+  writeString(view, 12, 'fmt ');
+  view.setUint32(16, 16, true);
+  view.setUint16(20, 1, true); // Format PCM
+  view.setUint16(22, numChannels, true);
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, sampleRate * numChannels * 2, true);
+  view.setUint16(32, numChannels * 2, true);
+  view.setUint16(34, 16, true); // Bits par échantillon
+  writeString(view, 36, 'data');
+  view.setUint32(40, samples.length * 2, true);
+  // Écrire les échantillons audio
+  floatTo16BitPCM(view, 44, samples);
+  return new Blob([view], { type: 'audio/wav' });
+}
+// Fonction utilitaire pour écrire une chaîne dans un DataView
+function writeString(view, offset, string) {
+  for (let i = 0; i < string.length; i++) {
+    view.setUint8(offset + i, string.charCodeAt(i));
+  }
+}
+// Fonction utilitaire pour convertir des échantillons flottants en PCM 16 bits
+function floatTo16BitPCM(view, offset, input) {
+  for (let i = 0; i < input.length; i++, offset += 2) {
+    const s = Math.max(-1, Math.min(1, input[i]));
+    view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
+  }
+}
+// Function to fetch metadata from the text file
+async function fetchMetadata() {
+    try {
+        const response = await fetch('../metadata.txt'); // Assurez-vous que le fichier est accessible
+        if (!response.ok) {
+            throw new Error('Failed to fetch metadata');
+        }
+        const text = await response.text();
+        console.log('Metadata file content:', text); // Debugging
+        // Split text into lines
+        const lines = text.split('\n').map(line => line.trim()).filter(line => line !== '');
+        if (lines.length < 2) {
+            throw new Error('Metadata file is empty or malformed');
+        }
+        // Extract headers
+        const headers = lines[0].split(';').map(h => h.trim().toLowerCase());
+        // Extract data
+        const metadata = lines.slice(1).map(line => {
+            const values = line.split(';').map(value => value.trim());
+            let entry = {};
+            headers.forEach((header, index) => {
+                entry[header] = values[index] || 'N/A'; // Default to 'N/A' if missing data
+            });
+            return entry;
+        });
+        console.log('Parsed Metadata:', metadata); // Debugging
+        return metadata;
+    } catch (error) {
+        console.error('Error fetching metadata:', error);
+        return [];
+    }
+}
+function populateFilters() {
+    const predefinedValues = {
+        label: ["spoof", "genuine"],
+        system: ["bonafide"].concat(Array.from({ length: 19 }, (_, i) => `A${String(i + 1).padStart(2, '0')}`)),
+        codec: ["FLAC", "WAV", "MP3"],
+        genre: ["male", "female"],
+        year: ["2020", "2021", "2022", "2023", "2024", "2025"]
+    };
+    Object.keys(predefinedValues).forEach(key => {
+        populateDropdown(`filter-${key}`, predefinedValues[key]);
+    });
+}
+function populateDropdown(id, values) {
+    const select = document.getElementById(id);
+    select.innerHTML = '<option value="">All</option>'; // Ajouter l'option "All" par défaut
+    values.forEach(value => {
+        const option = document.createElement("option");
+        option.value = value;
+        option.textContent = value.charAt(0).toUpperCase() + value.slice(1); // Majuscule initiale
+        select.appendChild(option);
+    });
+    select.addEventListener("change", filterMetadata);
+}
+function filterMetadata() {
+    const selectedLabel = document.getElementById("filter-label").value.toLowerCase();
+    const selectedSystem = document.getElementById("filter-system").value.toLowerCase();
+    const selectedCodec = document.getElementById("filter-codec").value.toLowerCase();
+    const selectedGenre = document.getElementById("filter-genre").value.toLowerCase();
+    const selectedYear = document.getElementById("filter-year").value.toLowerCase();
+    fetchMetadata().then(metadata => {
+        const filteredMetadata = metadata.filter(entry =>
+            (selectedLabel === "" || entry.label.toLowerCase() === selectedLabel) &&
+            (selectedSystem === "" || entry.system.toLowerCase() === selectedSystem) &&
+            (selectedCodec === "" || entry.codec.toLowerCase() === selectedCodec) &&
+            (selectedGenre === "" || entry.genre.toLowerCase() === selectedGenre) &&
+            (selectedYear === "" || entry.year.toLowerCase() === selectedYear)
+        );
+        displayMetadata(null, metadata, true); // Mode filtrage
+    });
+}
+function displayMetadata(files, metadata, filteredOnly = false) {
+    metadataDisplay.innerHTML = ''; // Nettoyer l'affichage avant de remplir
+    // Si on ne filtre pas et qu'aucun fichier n'est sélectionné, afficher tout
+    if (!filteredOnly && (!files || files.length === 0)) {
+        metadataDisplay.innerHTML = '<p>No files selected.</p>';
+        return;
+    }
+    let filteredMetadata;
+    if (filteredOnly) {
+        // Appliquer les filtres des drop-downs
+        const selectedLabel = document.getElementById("filter-label").value.toLowerCase();
+        const selectedSystem = document.getElementById("filter-system").value.toLowerCase();
+        const selectedCodec = document.getElementById("filter-codec").value.toLowerCase();
+        const selectedGenre = document.getElementById("filter-genre").value.toLowerCase();
+        const selectedYear = document.getElementById("filter-year").value.toLowerCase();
+        filteredMetadata = metadata.filter(entry =>
+            (selectedLabel === "" || entry.label.toLowerCase() === selectedLabel) &&
+            (selectedSystem === "" || entry.system.toLowerCase() === selectedSystem) &&
+            (selectedCodec === "" || entry.codec.toLowerCase() === selectedCodec) &&
+            (selectedGenre === "" || entry.genre.toLowerCase() === selectedGenre) &&
+            (selectedYear === "" || entry.year.toLowerCase() === selectedYear)
+        );
+    } else {
+        // Obtenir la liste des fichiers sélectionnés
+        const selectedFiles = Array.from(files).map(file => file.name.trim().toLowerCase());
+        // Filtrer les métadonnées pour ne garder que celles des fichiers sélectionnés
+        filteredMetadata = metadata.filter(entry => selectedFiles.includes(entry.filedir.trim().toLowerCase()));
+    }
+    // Vérifier si aucun résultat après filtrage
+    if (filteredMetadata.length === 0) {
+        metadataDisplay.innerHTML = '<p>No metadata found.</p>';
+        return;
+    }
+    // Création du tableau Bootstrap
+    const table = document.createElement('table');
+    table.classList.add('table', 'table-striped', 'table-bordered');
+    // Création de l'en-tête du tableau
+    const headerRow = document.createElement('tr');
+    Object.keys(filteredMetadata[0]).forEach(headerText => {
+        const header = document.createElement('th');
+        header.textContent = headerText.charAt(0).toUpperCase() + headerText.slice(1);
+        headerRow.appendChild(header);
+    });
+    table.appendChild(headerRow);
+    // Remplir le tableau avec les métadonnées filtrées
+    filteredMetadata.forEach(entry => {
+        const row = document.createElement('tr');
+        Object.values(entry).forEach(value => {
+            const cell = document.createElement('td');
+            cell.textContent = value;
+            row.appendChild(cell);
+        });
+        table.appendChild(row);
+    });
+    // Ajouter le tableau à la section d'affichage des métadonnées
+    metadataDisplay.appendChild(table);
+}
+document.addEventListener('DOMContentLoaded', async () => {
+    populateFilters(); // Charger les valeurs fixes dans les drop-downs
+    const metadata = await fetchMetadata();
+    displayMetadata(metadata);
+});
+async function uploadAudio(files) {
+    if (!files || files.length === 0) {
+        alert('Please select or record files first!');
+        return;
+    }
+    const formData = new FormData();
+    const filesArray = Array.from(files);
+    for (let i = 0; i < filesArray.length; i++) {
+        formData.append('files', filesArray[i]);
+    }
+    responseDiv.textContent = 'Uploading and analyzing audio...';
+    try {
+        const metadataObj = await fetchMetadata();
+        displayMetadata(filesArray, metadataObj); // Afficher uniquement les métadonnées des fichiers sélectionnés
+        const response = await fetch('http://127.0.0.1:8000/predict/', {
+            method: 'POST',
+            body: formData,
+        });
+        if (!response.ok) {
+            const errorData = await response.json();
+            throw new Error(`Server error: ${errorData.message || response.statusText}`);
+        }
+        const data = await response.json();
+        responseDiv.innerHTML = '';
+        data.forEach((result, index) => {
+            const resultDiv = document.createElement('div');
+            resultDiv.innerHTML = `File: <b>${result.filename}</b>, Label: <b>${result.label}</b>, Confidence: <b>${result.confidence}</b>`;
+            responseDiv.appendChild(resultDiv);
+        });
+    } catch (error) {
+        console.error('Error:', error);
+        responseDiv.textContent = 'Error: ' + error.message;
+    }
+}
+uploadButton.addEventListener('click', () => {
+    const files = audioFileInput.files;
+    if (!files || files.length === 0) {
+        alert('Please select files first!');
+        return;
+    }
+    uploadAudio(files);
+});
+// Start Recording
+recordButton.addEventListener('click', async () => {
+    startAudioContext(); // Initialiser ou reprendre l'AudioContext
+    console.log('Recording started');
+    const constraints = { audio: true, video: false };
+    try {
+      gumStream = await navigator.mediaDevices.getUserMedia(constraints);
+      console.log('Microphone access granted');
+      input = audioContext.createMediaStreamSource(gumStream);
+      console.log('Audio source created');
+      // Initialize Recorder.js
+      rec = new Recorder(input, { numChannels: 1 });
+      console.log('Recorder initialized');
+      // Start recording
+      rec.record();
+      console.log('Recording started');
+      // Update button states
+      recordButton.disabled = true;
+      stopButton.disabled = false;
+      pauseButton.disabled = false;
+    } catch (error) {
+      console.error('Error accessing microphone:', error);
+      alert('Error accessing microphone: ' + error.message);
+    }
+  });
+  function stopRecording() {
+    console.log('stopRecording called');
+    // Désactiver les boutons
+    stopButton.disabled = true;
+    recordButton.disabled = false;
+    pauseButton.disabled = true;
+    pauseButton.innerHTML = 'Pause';
+    // Arrêter l'enregistrement
+    rec.stop();
+    console.log('Recording stopped');
+    // Arrêter l'accès au microphone
+    gumStream.getAudioTracks()[0].stop();
+    console.log('Microphone access stopped');
+    // Exporter l'audio en WAV
+    rec.exportWAV(async (blob) => {
+      console.log('Audio exported as WAV');
+      // Vérifier la taille du fichier audio
+      if (blob.size === 0) {
+        console.error('Le fichier audio est vide.');
+        responseDiv.textContent = 'Erreur : Le fichier audio est vide.';
+        return;
+      }
+      // Rééchantillonner l'audio à 16 kHz
+      try {
+        const resampledBlob = await resampleAudio(blob, 16000);
+        console.log('Audio rééchantillonné à 16 kHz');
+        // Envoyer l'audio rééchantillonné à l'API pour analyse
+        await sendAudioToAPI(resampledBlob); // Ajouter await ici
+      } catch (error) {
+        console.error('Erreur lors du rééchantillonnage :', error);
+        responseDiv.textContent = 'Erreur : ' + error.message;
+      }
+    });
+}
+async function sendAudioToAPI(blob) {
+    console.log('Sending audio to API');
+    const formData = new FormData();
+    const filename = 'recorded-audio.wav'; // Nom du fichier
+    formData.append('files', blob, filename); // Utiliser 'files' comme nom de champ
+    try {
+        const response = await fetch('http://127.0.0.1:8000/predict/', {
+            method: 'POST',
+            body: formData,
+        });
+        console.log('API response status:', response.status);
+        if (!response.ok) {
+            throw new Error(`HTTP error! status: ${response.status}`);
+        }
+        const data = await response.json();
+        console.log('API response data:', data);
+        // Afficher le résultat de l'API
+        if (data.length > 0) {
+            responseDiv.innerHTML = `Label: <b>${data[0].label}</b>, Confidence: <b>${data[0].confidence}</b>`;
+        } else {
+            responseDiv.textContent = 'Error: No data returned from the API.';
+        }
+    } catch (error) {
+        console.error('Error sending audio to API:', error);
+        responseDiv.textContent = 'Error: ' + error.message;
+    }
+}
+// Pause Recording
+pauseButton.addEventListener('click', () => {
+    if (rec.recording) {
+      // Pause recording
+      rec.stop();
+      pauseButton.textContent = 'Resume';
+    } else {
+      // Resume recording
+      rec.record();
+      pauseButton.textContent = 'Pause';
+    }
+  });
+stopButton.addEventListener('click', () => {
+    stopRecording();
+});
+// Ajouter un écouteur d'événement pour un clic utilisateur sur le bouton d'enregistrement
+recordButton.addEventListener('click', async () => {
+    startAudioContext(); // Initialiser ou reprendre l'AudioContext
+    console.log('Recording started');
+    const constraints = { audio: true, video: false };
+    try {
+      gumStream = await navigator.mediaDevices.getUserMedia(constraints);
+      console.log('Microphone access granted');
+      input = audioContext.createMediaStreamSource(gumStream);
+      console.log('Audio source created');
+      // Initialize Recorder.js
+      rec = new Recorder(input, { numChannels: 1 });
+      console.log('Recorder initialized');
+      // Start recording
+      rec.record();
+      console.log('Recording started');
+      // Update button states
+      recordButton.disabled = true;
+      stopButton.disabled = false;
+      pauseButton.disabled = false;
+    } catch (error) {
+      console.error('Error accessing microphone:', error);
+      alert('Error accessing microphone: ' + error.message);
+    }
+  });

Web/styles.css ADDED Viewed

	@@ -0,0 +1,25 @@

+.metadata-table {
+    width: 100%;
+    border-collapse: collapse;
+    margin-top: 10px;
+}
+.metadata-table th,
+.metadata-table td {
+    border: 1px solid #ddd;
+    padding: 8px;
+    text-align: left;
+}
+.metadata-table th {
+    background-color: #f2f2f2;
+    font-weight: bold;
+}
+.metadata-table tr:nth-child(even) {
+    background-color: #f9f9f9;
+}
+.metadata-table tr:hover {
+    background-color: #f1f1f1;
+}

calculate_modules.py ADDED Viewed

	@@ -0,0 +1,333 @@

+import sys
+import numpy as np
+def obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold):
+    # False alarm and miss rates for ASV
+    Pfa_asv = sum(non_asv >= asv_threshold) / non_asv.size
+    Pmiss_asv = sum(tar_asv < asv_threshold) / tar_asv.size
+    # Rate of rejecting spoofs in ASV
+    if spoof_asv.size == 0:
+        Pmiss_spoof_asv = None
+        Pfa_spoof_asv = None
+    else:
+        Pmiss_spoof_asv = np.sum(spoof_asv < asv_threshold) / spoof_asv.size
+        Pfa_spoof_asv = np.sum(spoof_asv >= asv_threshold) / spoof_asv.size
+    return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv
+def obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold):
+    # False alarm and miss rates for ASV
+    Pfa_asv = sum(non_asv >= asv_threshold) / non_asv.size
+    Pmiss_asv = sum(tar_asv < asv_threshold) / tar_asv.size
+    # Rate of rejecting spoofs in ASV
+    if spoof_asv.size == 0:
+        Pmiss_spoof_asv = None
+        Pfa_spoof_asv = None
+    else:
+        Pmiss_spoof_asv = np.sum(spoof_asv < asv_threshold) / spoof_asv.size
+        Pfa_spoof_asv = np.sum(spoof_asv >= asv_threshold) / spoof_asv.size
+    return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv
+def compute_det_curve(target_scores, nontarget_scores):
+    n_scores = target_scores.size + nontarget_scores.size
+    all_scores = np.concatenate((target_scores, nontarget_scores))
+    labels = np.concatenate(
+        (np.ones(target_scores.size), np.zeros(nontarget_scores.size)))
+    # Sort labels based on scores
+    indices = np.argsort(all_scores, kind='mergesort')
+    labels = labels[indices]
+    # Compute false rejection and false acceptance rates
+    tar_trial_sums = np.cumsum(labels)
+    nontarget_trial_sums = nontarget_scores.size - \
+        (np.arange(1, n_scores + 1) - tar_trial_sums)
+    # false rejection rates
+    frr = np.concatenate(
+        (np.atleast_1d(0), tar_trial_sums / target_scores.size))
+    far = np.concatenate((np.atleast_1d(1), nontarget_trial_sums /
+                          nontarget_scores.size))  # false acceptance rates
+    # Thresholds are the sorted scores
+    thresholds = np.concatenate(
+        (np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))
+    return frr, far, thresholds
+def compute_Pmiss_Pfa_Pspoof_curves(tar_scores, non_scores, spf_scores):
+    # Concatenate all scores and designate arbitrary labels 1=target, 0=nontarget, -1=spoof
+    all_scores = np.concatenate((tar_scores, non_scores, spf_scores))
+    labels = np.concatenate((np.ones(tar_scores.size), np.zeros(non_scores.size), -1*np.ones(spf_scores.size)))
+    # Sort labels based on scores
+    indices = np.argsort(all_scores, kind='mergesort')
+    labels = labels[indices]
+    # Cumulative sums
+    tar_sums    = np.cumsum(labels==1)
+    non_sums    = np.cumsum(labels==0)
+    spoof_sums  = np.cumsum(labels==-1)
+    Pmiss       = np.concatenate((np.atleast_1d(0), tar_sums / tar_scores.size))
+    Pfa_non     = np.concatenate((np.atleast_1d(1), 1 - (non_sums / non_scores.size)))
+    Pfa_spoof   = np.concatenate((np.atleast_1d(1), 1 - (spoof_sums / spf_scores.size)))
+    thresholds  = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))  # Thresholds are the sorted scores
+    return Pmiss, Pfa_non, Pfa_spoof, thresholds
+def compute_eer(target_scores, nontarget_scores):
+    """ Returns equal error rate (EER) and the corresponding threshold. """
+    frr, far, thresholds = compute_det_curve(target_scores, nontarget_scores)
+    abs_diffs = np.abs(frr - far)
+    min_index = np.argmin(abs_diffs)
+    eer = np.mean((frr[min_index], far[min_index]))
+    return eer, frr, far, thresholds
+def compute_mindcf(frr, far, thresholds, Pspoof, Cmiss, Cfa):
+    min_c_det = float("inf")
+    min_c_det_threshold = thresholds
+    p_target = 1- Pspoof
+    for i in range(0, len(frr)):
+        # Weighted sum of false negative and false positive errors.
+        c_det = Cmiss * frr[i] * p_target + Cfa * far[i] * (1 - p_target)
+        if c_det < min_c_det:
+            min_c_det = c_det
+            min_c_det_threshold = thresholds[i]
+    # See Equations (3) and (4).  Now we normalize the cost.
+    c_def = min(Cmiss * p_target, Cfa * (1 - p_target))
+    min_dcf = min_c_det / c_def
+    return min_dcf, min_c_det_threshold
+def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv,
+                 Pmiss_spoof_asv, cost_model, print_cost):
+    # Sanity check of cost parameters
+    if cost_model['Cfa_asv'] < 0 or cost_model['Cmiss_asv'] < 0 or \
+            cost_model['Cfa_cm'] < 0 or cost_model['Cmiss_cm'] < 0:
+        print('WARNING: Usually the cost values should be positive!')
+    if cost_model['Ptar'] < 0 or cost_model['Pnon'] < 0 or cost_model['Pspoof'] < 0 or \
+            np.abs(cost_model['Ptar'] + cost_model['Pnon'] + cost_model['Pspoof'] - 1) > 1e-10:
+        sys.exit(
+            'ERROR: Your prior probabilities should be positive and sum up to one.'
+        )
+    # Unless we evaluate worst-case model, we need to have some spoof tests against asv
+    if Pmiss_spoof_asv is None:
+        sys.exit(
+            'ERROR: you should provide miss rate of spoof tests against your ASV system.'
+        )
+    # Sanity check of scores
+    combined_scores = np.concatenate((bonafide_score_cm, spoof_score_cm))
+    if np.isnan(combined_scores).any() or np.isinf(combined_scores).any():
+        sys.exit('ERROR: Your scores contain nan or inf.')
+    # Sanity check that inputs are scores and not decisions
+    n_uniq = np.unique(combined_scores).size
+    if n_uniq < 3:
+        sys.exit(
+            'ERROR: You should provide soft CM scores - not binary decisions')
+    # Obtain miss and false alarm rates of CM
+    Pmiss_cm, Pfa_cm, CM_thresholds = compute_det_curve(
+        bonafide_score_cm, spoof_score_cm)
+    # Constants - see ASVspoof 2019 evaluation plan
+    C1 = cost_model['Ptar'] * (cost_model['Cmiss_cm'] - cost_model['Cmiss_asv'] * Pmiss_asv) - \
+        cost_model['Pnon'] * cost_model['Cfa_asv'] * Pfa_asv
+    C2 = cost_model['Cfa_cm'] * cost_model['Pspoof'] * (1 - Pmiss_spoof_asv)
+    # Sanity check of the weights
+    if C1 < 0 or C2 < 0:
+        sys.exit(
+            'You should never see this error but I cannot evalute tDCF with negative weights - please check whether your ASV error rates are correctly computed?'
+        )
+    # Obtain t-DCF curve for all thresholds
+    tDCF = C1 * Pmiss_cm + C2 * Pfa_cm
+    # Normalized t-DCF
+    tDCF_norm = tDCF / np.minimum(C1, C2)
+    # Everything should be fine if reaching here.
+    if print_cost:
+        print('t-DCF evaluation from [Nbona={}, Nspoof={}] trials\n'.format(
+            bonafide_score_cm.size, spoof_score_cm.size))
+        print('t-DCF MODEL')
+        print('   Ptar         = {:8.5f} (Prior probability of target user)'.
+              format(cost_model['Ptar']))
+        print(
+            '   Pnon         = {:8.5f} (Prior probability of nontarget user)'.
+            format(cost_model['Pnon']))
+        print(
+            '   Pspoof       = {:8.5f} (Prior probability of spoofing attack)'.
+            format(cost_model['Pspoof']))
+        print(
+            '   Cfa_asv      = {:8.5f} (Cost of ASV falsely accepting a nontarget)'
+            .format(cost_model['Cfa_asv']))
+        print(
+            '   Cmiss_asv    = {:8.5f} (Cost of ASV falsely rejecting target speaker)'
+            .format(cost_model['Cmiss_asv']))
+        print(
+            '   Cfa_cm       = {:8.5f} (Cost of CM falsely passing a spoof to ASV system)'
+            .format(cost_model['Cfa_cm']))
+        print(
+            '   Cmiss_cm     = {:8.5f} (Cost of CM falsely blocking target utterance which never reaches ASV)'
+            .format(cost_model['Cmiss_cm']))
+        print(
+            '\n   Implied normalized t-DCF function (depends on t-DCF parameters and ASV errors), s=CM threshold)'
+        )
+        if C2 == np.minimum(C1, C2):
+            print(
+                '   tDCF_norm(s) = {:8.5f} x Pmiss_cm(s) + Pfa_cm(s)\n'.format(
+                    C1 / C2))
+        else:
+            print(
+                '   tDCF_norm(s) = Pmiss_cm(s) + {:8.5f} x Pfa_cm(s)\n'.format(
+                    C2 / C1))
+    return tDCF_norm, CM_thresholds
+def calculate_CLLR(target_llrs, nontarget_llrs):
+    """
+    Calculate the CLLR of the scores.
+    Parameters:
+    target_llrs (list or numpy array): Log-likelihood ratios for target trials.
+    nontarget_llrs (list or numpy array): Log-likelihood ratios for non-target trials.
+    Returns:
+    float: The calculated CLLR value.
+    """
+    def negative_log_sigmoid(lodds):
+        """
+        Calculate the negative log of the sigmoid function.
+        Parameters:
+        lodds (numpy array): Log-odds values.
+        Returns:
+        numpy array: The negative log of the sigmoid values.
+        """
+        return np.log1p(np.exp(-lodds))
+    # Convert the input lists to numpy arrays if they are not already
+    target_llrs = np.array(target_llrs)
+    nontarget_llrs = np.array(nontarget_llrs)
+    # Calculate the CLLR value
+    cllr = 0.5 * (np.mean(negative_log_sigmoid(target_llrs)) + np.mean(negative_log_sigmoid(-nontarget_llrs))) / np.log(2)
+    return cllr
+def compute_Pmiss_Pfa_Pspoof_curves(tar_scores, non_scores, spf_scores):
+    # Concatenate all scores and designate arbitrary labels 1=target, 0=nontarget, -1=spoof
+    all_scores = np.concatenate((tar_scores, non_scores, spf_scores))
+    labels = np.concatenate((np.ones(tar_scores.size), np.zeros(non_scores.size), -1*np.ones(spf_scores.size)))
+    # Sort labels based on scores
+    indices = np.argsort(all_scores, kind='mergesort')
+    labels = labels[indices]
+    # Cumulative sums
+    tar_sums    = np.cumsum(labels==1)
+    non_sums    = np.cumsum(labels==0)
+    spoof_sums  = np.cumsum(labels==-1)
+    Pmiss       = np.concatenate((np.atleast_1d(0), tar_sums / tar_scores.size))
+    Pfa_non     = np.concatenate((np.atleast_1d(1), 1 - (non_sums / non_scores.size)))
+    Pfa_spoof   = np.concatenate((np.atleast_1d(1), 1 - (spoof_sums / spf_scores.size)))
+    thresholds  = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))  # Thresholds are the sorted scores
+    return Pmiss, Pfa_non, Pfa_spoof, thresholds
+def compute_teer(Pmiss_CM, Pfa_CM, tau_CM, Pmiss_ASV, Pfa_non_ASV, Pfa_spf_ASV, tau_ASV):
+    # Different spoofing prevalence priors (rho) parameters values
+    rho_vals            = [0,0.5,1]
+    tEER_val    = np.empty([len(rho_vals),len(tau_ASV)], dtype=float)
+    for rho_idx, rho_spf in enumerate(rho_vals):
+        # Table to store the CM threshold index, per each of the ASV operating points
+        tEER_idx_CM = np.empty(len(tau_ASV), dtype=int)
+        tEER_path   = np.empty([len(rho_vals),len(tau_ASV),2], dtype=float)
+        # Tables to store the t-EER, total Pfa and total miss valuees along the t-EER path
+        Pmiss_total = np.empty(len(tau_ASV), dtype=float)
+        Pfa_total   = np.empty(len(tau_ASV), dtype=float)
+        min_tEER    = np.inf
+        argmin_tEER = np.empty(2)
+        # best intersection point
+        xpoint_crit_best = np.inf
+        xpoint = np.empty(2)
+        # Loop over all possible ASV thresholds
+        for tau_ASV_idx, tau_ASV_val in enumerate(tau_ASV):
+            # Tandem miss and fa rates as defined in the manuscript
+            Pmiss_tdm = Pmiss_CM + (1 - Pmiss_CM) * Pmiss_ASV[tau_ASV_idx]
+            Pfa_tdm   = (1 - rho_spf) * (1 - Pmiss_CM) * Pfa_non_ASV[tau_ASV_idx] + rho_spf * Pfa_CM * Pfa_spf_ASV[tau_ASV_idx]
+            # Store only the INDEX of the CM threshold (for the current ASV threshold)
+            h = Pmiss_tdm - Pfa_tdm
+            tmp = np.argmin(abs(h))
+            tEER_idx_CM[tau_ASV_idx] = tmp
+            if Pmiss_ASV[tau_ASV_idx] < (1 - rho_spf) * Pfa_non_ASV[tau_ASV_idx] + rho_spf * Pfa_spf_ASV[tau_ASV_idx]:
+                Pmiss_total[tau_ASV_idx] = Pmiss_tdm[tmp]
+                Pfa_total[tau_ASV_idx] = Pfa_tdm[tmp]
+                tEER_val[rho_idx,tau_ASV_idx] = np.mean([Pfa_total[tau_ASV_idx], Pmiss_total[tau_ASV_idx]])
+                tEER_path[rho_idx,tau_ASV_idx, 0] = tau_ASV_val
+                tEER_path[rho_idx,tau_ASV_idx, 1] = tau_CM[tmp]
+                if tEER_val[rho_idx,tau_ASV_idx] < min_tEER:
+                    min_tEER = tEER_val[rho_idx,tau_ASV_idx]
+                    argmin_tEER[0] = tau_ASV_val
+                    argmin_tEER[1] = tau_CM[tmp]
+                # Check how close we are to the INTERSECTION POINT for different prior (rho) values:
+                LHS = Pfa_non_ASV[tau_ASV_idx]/Pfa_spf_ASV[tau_ASV_idx]
+                RHS = Pfa_CM[tmp]/(1 - Pmiss_CM[tmp])
+                crit = abs(LHS - RHS)
+                if crit < xpoint_crit_best:
+                    xpoint_crit_best = crit
+                    xpoint[0] = tau_ASV_val
+                    xpoint[1] = tau_CM[tmp]
+                    xpoint_tEER = Pfa_spf_ASV[tau_ASV_idx]*Pfa_CM[tmp]
+            else:
+                # Not in allowed region
+                tEER_path[rho_idx,tau_ASV_idx, 0] = np.nan
+                tEER_path[rho_idx,tau_ASV_idx, 1] = np.nan
+                Pmiss_total[tau_ASV_idx] = np.nan
+                Pfa_total[tau_ASV_idx] = np.nan
+                tEER_val[rho_idx,tau_ASV_idx] = np.nan
+        return xpoint_tEER*100

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+version: '3.8'
+services:
+  backend:
+    build: .
+    ports:
+      - "8000:8000"
+    volumes:
+      - .:/app
+    command: uvicorn main:app --host 0.0.0.0 --port 8000
+  frontend:
+    image: nginx:alpine
+    ports:
+      - "80:80"
+    volumes:
+      - ./index.html:/usr/share/nginx/html/index.html
+      - ./script.js:/usr/share/nginx/html/script.js
+      - ./recorder.js:/usr/share/nginx/html/recorder.js
+    depends_on:
+      - backend

model_utils.py ADDED Viewed

	@@ -0,0 +1,671 @@

+"""
+AASIST
+Copyright (c) 2021-present NAVER Corp.
+MIT license
+"""
+import random
+from typing import Union
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+import json
+import torchaudio
+import numpy as np
+# Ensure that the Model class and all related components (GraphAttentionLayer, etc.) are defined here
+# Placeholder for dependencies
+# class Model(nn.Module):
+#     def __init__(self, d_args):
+#         # Your model implementation
+#         pass
+# Function to load configuration
+def load_config(config_path):
+    with open(config_path, 'r') as f:
+        return json.load(f)
+# Function to load the model
+def load_model(checkpoint_path, d_args):
+    model = Model(d_args)
+    try:
+        # Load checkpoint
+        checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
+        model.load_state_dict(checkpoint)
+        print("Model loaded successfully.")
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        raise
+    model.eval()
+    return model
+# Preprocess audio
+def preprocess_audio(audio_path, sample_rate=16000):
+    try:
+        waveform, sr = torchaudio.load(audio_path)
+        print(f"Loaded audio: {audio_path}, Sample Rate: {sr}")
+        if sr != sample_rate:
+            resample_transform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=sample_rate)
+            waveform = resample_transform(waveform)
+        if waveform.size(0) > 1:
+            waveform = torch.mean(waveform, dim=0, keepdim=True)  # Convert to mono if stereo
+        return waveform
+    except Exception as e:
+        print(f"Error in audio preprocessing: {e}")
+        raise
+# Inference function
+def infer(model, waveform, freq_aug=False):
+    try:
+        with torch.no_grad():
+            last_hidden, output = model(waveform, Freq_aug=freq_aug)
+            print("Model output:", output)
+            if output is None:
+                raise ValueError("Model output is None.")
+            predicted_label = torch.argmax(output, dim=1).item()
+            return predicted_label, output
+    except Exception as e:
+        print(f"Error during inference: {e}")
+        raise
+class GraphAttentionLayer(nn.Module):
+    def __init__(self, in_dim, out_dim, **kwargs):
+        super().__init__()
+        # attention map
+        self.att_proj = nn.Linear(in_dim, out_dim)
+        self.att_weight = self._init_new_params(out_dim, 1)
+        # project
+        self.proj_with_att = nn.Linear(in_dim, out_dim)
+        self.proj_without_att = nn.Linear(in_dim, out_dim)
+        # batch norm
+        self.bn = nn.BatchNorm1d(out_dim)
+        # dropout for inputs
+        self.input_drop = nn.Dropout(p=0.2)
+        # activate
+        self.act = nn.SELU(inplace=True)
+        # temperature
+        self.temp = 1.
+        if "temperature" in kwargs:
+            self.temp = kwargs["temperature"]
+    def forward(self, x):
+        '''
+        x   :(#bs, #node, #dim)
+        '''
+        # apply input dropout
+        x = self.input_drop(x)
+        # derive attention map
+        att_map = self._derive_att_map(x)
+        # projection
+        x = self._project(x, att_map)
+        # apply batch norm
+        x = self._apply_BN(x)
+        x = self.act(x)
+        return x
+    def _pairwise_mul_nodes(self, x):
+        '''
+        Calculates pairwise multiplication of nodes.
+        - for attention map
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, #dim)
+        '''
+        nb_nodes = x.size(1)
+        x = x.unsqueeze(2).expand(-1, -1, nb_nodes, -1)
+        x_mirror = x.transpose(1, 2)
+        return x * x_mirror
+    def _derive_att_map(self, x):
+        '''
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, 1)
+        '''
+        att_map = self._pairwise_mul_nodes(x)
+        # size: (#bs, #node, #node, #dim_out)
+        att_map = torch.tanh(self.att_proj(att_map))
+        # size: (#bs, #node, #node, 1)
+        att_map = torch.matmul(att_map, self.att_weight)
+        # apply temperature
+        att_map = att_map / self.temp
+        att_map = F.softmax(att_map, dim=-2)
+        return att_map
+    def _project(self, x, att_map):
+        x1 = self.proj_with_att(torch.matmul(att_map.squeeze(-1), x))
+        x2 = self.proj_without_att(x)
+        return x1 + x2
+    def _apply_BN(self, x):
+        org_size = x.size()
+        x = x.view(-1, org_size[-1])
+        x = self.bn(x)
+        x = x.view(org_size)
+        return x
+    def _init_new_params(self, *size):
+        out = nn.Parameter(torch.FloatTensor(*size))
+        nn.init.xavier_normal_(out)
+        return out
+class HtrgGraphAttentionLayer(nn.Module):
+    def __init__(self, in_dim, out_dim, **kwargs):
+        super().__init__()
+        self.proj_type1 = nn.Linear(in_dim, in_dim)
+        self.proj_type2 = nn.Linear(in_dim, in_dim)
+        # attention map
+        self.att_proj = nn.Linear(in_dim, out_dim)
+        self.att_projM = nn.Linear(in_dim, out_dim)
+        self.att_weight11 = self._init_new_params(out_dim, 1)
+        self.att_weight22 = self._init_new_params(out_dim, 1)
+        self.att_weight12 = self._init_new_params(out_dim, 1)
+        self.att_weightM = self._init_new_params(out_dim, 1)
+        # project
+        self.proj_with_att = nn.Linear(in_dim, out_dim)
+        self.proj_without_att = nn.Linear(in_dim, out_dim)
+        self.proj_with_attM = nn.Linear(in_dim, out_dim)
+        self.proj_without_attM = nn.Linear(in_dim, out_dim)
+        # batch norm
+        self.bn = nn.BatchNorm1d(out_dim)
+        # dropout for inputs
+        self.input_drop = nn.Dropout(p=0.2)
+        # activate
+        self.act = nn.SELU(inplace=True)
+        # temperature
+        self.temp = 1.
+        if "temperature" in kwargs:
+            self.temp = kwargs["temperature"]
+    def forward(self, x1, x2, master=None):
+        '''
+        x1  :(#bs, #node, #dim)
+        x2  :(#bs, #node, #dim)
+        '''
+        num_type1 = x1.size(1)
+        num_type2 = x2.size(1)
+        x1 = self.proj_type1(x1)
+        x2 = self.proj_type2(x2)
+        x = torch.cat([x1, x2], dim=1)
+        if master is None:
+            master = torch.mean(x, dim=1, keepdim=True)
+        # apply input dropout
+        x = self.input_drop(x)
+        # derive attention map
+        att_map = self._derive_att_map(x, num_type1, num_type2)
+        # directional edge for master node
+        master = self._update_master(x, master)
+        # projection
+        x = self._project(x, att_map)
+        # apply batch norm
+        x = self._apply_BN(x)
+        x = self.act(x)
+        x1 = x.narrow(1, 0, num_type1)
+        x2 = x.narrow(1, num_type1, num_type2)
+        return x1, x2, master
+    def _update_master(self, x, master):
+        att_map = self._derive_att_map_master(x, master)
+        master = self._project_master(x, master, att_map)
+        return master
+    def _pairwise_mul_nodes(self, x):
+        '''
+        Calculates pairwise multiplication of nodes.
+        - for attention map
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, #dim)
+        '''
+        nb_nodes = x.size(1)
+        x = x.unsqueeze(2).expand(-1, -1, nb_nodes, -1)
+        x_mirror = x.transpose(1, 2)
+        return x * x_mirror
+    def _derive_att_map_master(self, x, master):
+        '''
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, 1)
+        '''
+        att_map = x * master
+        att_map = torch.tanh(self.att_projM(att_map))
+        att_map = torch.matmul(att_map, self.att_weightM)
+        # apply temperature
+        att_map = att_map / self.temp
+        att_map = F.softmax(att_map, dim=-2)
+        return att_map
+    def _derive_att_map(self, x, num_type1, num_type2):
+        '''
+        x           :(#bs, #node, #dim)
+        out_shape   :(#bs, #node, #node, 1)
+        '''
+        att_map = self._pairwise_mul_nodes(x)
+        # size: (#bs, #node, #node, #dim_out)
+        att_map = torch.tanh(self.att_proj(att_map))
+        # size: (#bs, #node, #node, 1)
+        att_board = torch.zeros_like(att_map[:, :, :, 0]).unsqueeze(-1)
+        att_board[:, :num_type1, :num_type1, :] = torch.matmul(
+            att_map[:, :num_type1, :num_type1, :], self.att_weight11)
+        att_board[:, num_type1:, num_type1:, :] = torch.matmul(
+            att_map[:, num_type1:, num_type1:, :], self.att_weight22)
+        att_board[:, :num_type1, num_type1:, :] = torch.matmul(
+            att_map[:, :num_type1, num_type1:, :], self.att_weight12)
+        att_board[:, num_type1:, :num_type1, :] = torch.matmul(
+            att_map[:, num_type1:, :num_type1, :], self.att_weight12)
+        att_map = att_board
+        # att_map = torch.matmul(att_map, self.att_weight12)
+        # apply temperature
+        att_map = att_map / self.temp
+        att_map = F.softmax(att_map, dim=-2)
+        return att_map
+    def _project(self, x, att_map):
+        x1 = self.proj_with_att(torch.matmul(att_map.squeeze(-1), x))
+        x2 = self.proj_without_att(x)
+        return x1 + x2
+    def _project_master(self, x, master, att_map):
+        x1 = self.proj_with_attM(torch.matmul(
+            att_map.squeeze(-1).unsqueeze(1), x))
+        x2 = self.proj_without_attM(master)
+        return x1 + x2
+    def _apply_BN(self, x):
+        org_size = x.size()
+        x = x.view(-1, org_size[-1])
+        x = self.bn(x)
+        x = x.view(org_size)
+        return x
+    def _init_new_params(self, *size):
+        out = nn.Parameter(torch.FloatTensor(*size))
+        nn.init.xavier_normal_(out)
+        return out
+class GraphPool(nn.Module):
+    def __init__(self, k: float, in_dim: int, p: Union[float, int]):
+        super().__init__()
+        self.k = k
+        self.sigmoid = nn.Sigmoid()
+        self.proj = nn.Linear(in_dim, 1)
+        self.drop = nn.Dropout(p=p) if p > 0 else nn.Identity()
+        self.in_dim = in_dim
+    def forward(self, h):
+        Z = self.drop(h)
+        weights = self.proj(Z)
+        scores = self.sigmoid(weights)
+        new_h = self.top_k_graph(scores, h, self.k)
+        return new_h
+    def top_k_graph(self, scores, h, k):
+        """
+        args
+        =====
+        scores: attention-based weights (#bs, #node, 1)
+        h: graph data (#bs, #node, #dim)
+        k: ratio of remaining nodes, (float)
+        returns
+        =====
+        h: graph pool applied data (#bs, #node', #dim)
+        """
+        _, n_nodes, n_feat = h.size()
+        n_nodes = max(int(n_nodes * k), 1)
+        _, idx = torch.topk(scores, n_nodes, dim=1)
+        idx = idx.expand(-1, -1, n_feat)
+        h = h * scores
+        h = torch.gather(h, 1, idx)
+        return h
+class CONV(nn.Module):
+    @staticmethod
+    def to_mel(hz):
+        return 2595 * np.log10(1 + hz / 700)
+    @staticmethod
+    def to_hz(mel):
+        return 700 * (10**(mel / 2595) - 1)
+    def __init__(self,
+                 out_channels,
+                 kernel_size,
+                 sample_rate=16000,
+                 in_channels=1,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 bias=False,
+                 groups=1,
+                 mask=False):
+        super().__init__()
+        if in_channels != 1:
+            msg = "SincConv only support one input channel (here, in_channels = {%i})" % (
+                in_channels)
+            raise ValueError(msg)
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.sample_rate = sample_rate
+        # Forcing the filters to be odd (i.e, perfectly symmetrics)
+        if kernel_size % 2 == 0:
+            self.kernel_size = self.kernel_size + 1
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.mask = mask
+        if bias:
+            raise ValueError('SincConv does not support bias.')
+        if groups > 1:
+            raise ValueError('SincConv does not support groups.')
+        NFFT = 512
+        f = int(self.sample_rate / 2) * np.linspace(0, 1, int(NFFT / 2) + 1)
+        fmel = self.to_mel(f)
+        fmelmax = np.max(fmel)
+        fmelmin = np.min(fmel)
+        filbandwidthsmel = np.linspace(fmelmin, fmelmax, self.out_channels + 1)
+        filbandwidthsf = self.to_hz(filbandwidthsmel)
+        self.mel = filbandwidthsf
+        self.hsupp = torch.arange(-(self.kernel_size - 1) / 2,
+                                  (self.kernel_size - 1) / 2 + 1)
+        self.band_pass = torch.zeros(self.out_channels, self.kernel_size)
+        for i in range(len(self.mel) - 1):
+            fmin = self.mel[i]
+            fmax = self.mel[i + 1]
+            hHigh = (2*fmax/self.sample_rate) * \
+                np.sinc(2*fmax*self.hsupp/self.sample_rate)
+            hLow = (2*fmin/self.sample_rate) * \
+                np.sinc(2*fmin*self.hsupp/self.sample_rate)
+            hideal = hHigh - hLow
+            self.band_pass[i, :] = Tensor(np.hamming(
+                self.kernel_size)) * Tensor(hideal)
+    def forward(self, x, mask=False):
+        band_pass_filter = self.band_pass.clone().to(x.device)
+        if mask:
+            A = np.random.uniform(0, 20)
+            A = int(A)
+            A0 = random.randint(0, band_pass_filter.shape[0] - A)
+            band_pass_filter[A0:A0 + A, :] = 0
+        else:
+            band_pass_filter = band_pass_filter
+        self.filters = (band_pass_filter).view(self.out_channels, 1,
+                                               self.kernel_size)
+        return F.conv1d(x,
+                        self.filters,
+                        stride=self.stride,
+                        padding=self.padding,
+                        dilation=self.dilation,
+                        bias=None,
+                        groups=1)
+class Residual_block(nn.Module):
+    def __init__(self, nb_filts, first=False):
+        super().__init__()
+        self.first = first
+        if not self.first:
+            self.bn1 = nn.BatchNorm2d(num_features=nb_filts[0])
+        self.conv1 = nn.Conv2d(in_channels=nb_filts[0],
+                               out_channels=nb_filts[1],
+                               kernel_size=(2, 3),
+                               padding=(1, 1),
+                               stride=1)
+        self.selu = nn.SELU(inplace=True)
+        self.bn2 = nn.BatchNorm2d(num_features=nb_filts[1])
+        self.conv2 = nn.Conv2d(in_channels=nb_filts[1],
+                               out_channels=nb_filts[1],
+                               kernel_size=(2, 3),
+                               padding=(0, 1),
+                               stride=1)
+        if nb_filts[0] != nb_filts[1]:
+            self.downsample = True
+            self.conv_downsample = nn.Conv2d(in_channels=nb_filts[0],
+                                             out_channels=nb_filts[1],
+                                             padding=(0, 1),
+                                             kernel_size=(1, 3),
+                                             stride=1)
+        else:
+            self.downsample = False
+        self.mp = nn.MaxPool2d((1, 3))  # self.mp = nn.MaxPool2d((1,4))
+    def forward(self, x):
+        identity = x
+        if not self.first:
+            out = self.bn1(x)
+            out = self.selu(out)
+        else:
+            out = x
+        out = self.conv1(x)
+        # print('out',out.shape)
+        out = self.bn2(out)
+        out = self.selu(out)
+        # print('out',out.shape)
+        out = self.conv2(out)
+        #print('conv2 out',out.shape)
+        if self.downsample:
+            identity = self.conv_downsample(identity)
+        out += identity
+        out = self.mp(out)
+        return out
+class Model(nn.Module):
+    def __init__(self, d_args):
+        super().__init__()
+        self.d_args = d_args
+        filts = d_args["filts"]
+        gat_dims = d_args["gat_dims"]
+        pool_ratios = d_args["pool_ratios"]
+        temperatures = d_args["temperatures"]
+        self.conv_time = CONV(out_channels=filts[0],
+                              kernel_size=d_args["first_conv"],
+                              in_channels=1)
+        self.first_bn = nn.BatchNorm2d(num_features=1)
+        self.drop = nn.Dropout(0.5, inplace=True)
+        self.drop_way = nn.Dropout(0.2, inplace=True)
+        self.selu = nn.SELU(inplace=True)
+        self.encoder = nn.Sequential(
+            nn.Sequential(Residual_block(nb_filts=filts[1], first=True)),
+            nn.Sequential(Residual_block(nb_filts=filts[2])),
+            nn.Sequential(Residual_block(nb_filts=filts[3])),
+            nn.Sequential(Residual_block(nb_filts=filts[4])),
+            nn.Sequential(Residual_block(nb_filts=filts[4])),
+            nn.Sequential(Residual_block(nb_filts=filts[4])))
+        self.pos_S = nn.Parameter(torch.randn(1, 23, filts[-1][-1]))
+        self.master1 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))
+        self.master2 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))
+        self.GAT_layer_S = GraphAttentionLayer(filts[-1][-1],
+                                               gat_dims[0],
+                                               temperature=temperatures[0])
+        self.GAT_layer_T = GraphAttentionLayer(filts[-1][-1],
+                                               gat_dims[0],
+                                               temperature=temperatures[1])
+        self.HtrgGAT_layer_ST11 = HtrgGraphAttentionLayer(
+            gat_dims[0], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST12 = HtrgGraphAttentionLayer(
+            gat_dims[1], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST21 = HtrgGraphAttentionLayer(
+            gat_dims[0], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST22 = HtrgGraphAttentionLayer(
+            gat_dims[1], gat_dims[1], temperature=temperatures[2])
+        self.pool_S = GraphPool(pool_ratios[0], gat_dims[0], 0.3)
+        self.pool_T = GraphPool(pool_ratios[1], gat_dims[0], 0.3)
+        self.pool_hS1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hT1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hS2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hT2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        if "output_cls" in d_args:
+            self.out_layer = nn.Linear(5 * gat_dims[1], d_args["output_cls"])
+        else:
+            self.out_layer = nn.Linear(5 * gat_dims[1], 2)
+    def forward(self, x, Freq_aug=False):
+        x = x.unsqueeze(1)
+        x = self.conv_time(x, mask=Freq_aug)
+        x = x.unsqueeze(dim=1)
+        x = F.max_pool2d(torch.abs(x), (3, 3))
+        x = self.first_bn(x)
+        x = self.selu(x)
+        # get embeddings using encoder
+        # (#bs, #filt, #spec, #seq)
+        e = self.encoder(x)
+        # spectral GAT (GAT-S)
+        e_S, _ = torch.max(torch.abs(e), dim=3)  # max along time
+        e_S = e_S.transpose(1, 2) + self.pos_S
+        gat_S = self.GAT_layer_S(e_S)
+        out_S = self.pool_S(gat_S)  # (#bs, #node, #dim)
+        # temporal GAT (GAT-T)
+        e_T, _ = torch.max(torch.abs(e), dim=2)  # max along freq
+        e_T = e_T.transpose(1, 2)
+        gat_T = self.GAT_layer_T(e_T)
+        out_T = self.pool_T(gat_T)
+        # learnable master node
+        master1 = self.master1.expand(x.size(0), -1, -1)
+        master2 = self.master2.expand(x.size(0), -1, -1)
+        # inference 1
+        out_T1, out_S1, master1 = self.HtrgGAT_layer_ST11(
+            out_T, out_S, master=self.master1)
+        out_S1 = self.pool_hS1(out_S1)
+        out_T1 = self.pool_hT1(out_T1)
+        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST12(
+            out_T1, out_S1, master=master1)
+        out_T1 = out_T1 + out_T_aug
+        out_S1 = out_S1 + out_S_aug
+        master1 = master1 + master_aug
+        # inference 2
+        out_T2, out_S2, master2 = self.HtrgGAT_layer_ST21(
+            out_T, out_S, master=self.master2)
+        out_S2 = self.pool_hS2(out_S2)
+        out_T2 = self.pool_hT2(out_T2)
+        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST22(
+            out_T2, out_S2, master=master2)
+        out_T2 = out_T2 + out_T_aug
+        out_S2 = out_S2 + out_S_aug
+        master2 = master2 + master_aug
+        out_T1 = self.drop_way(out_T1)
+        out_T2 = self.drop_way(out_T2)
+        out_S1 = self.drop_way(out_S1)
+        out_S2 = self.drop_way(out_S2)
+        master1 = self.drop_way(master1)
+        master2 = self.drop_way(master2)
+        out_T = torch.max(out_T1, out_T2)
+        out_S = torch.max(out_S1, out_S2)
+        master = torch.max(master1, master2)
+        T_max, _ = torch.max(torch.abs(out_T), dim=1)
+        T_avg = torch.mean(out_T, dim=1)
+        S_max, _ = torch.max(torch.abs(out_S), dim=1)
+        S_avg = torch.mean(out_S, dim=1)
+        last_hidden = torch.cat(
+            [T_max, T_avg, S_max, S_avg, master.squeeze(1)], dim=1)
+        last_hidden = self.drop(last_hidden)
+        output = self.out_layer(last_hidden)
+        output=F.softmax(output,dim=1)
+        return last_hidden, output