Add Web WASM supoort and Update README

#5
This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +0 -6
  2. LICENSE +0 -46
  3. NOTICES +0 -68
  4. README.md +35 -181
  5. examples/.gitattributes +1 -0
  6. examples/CMakeLists.txt +0 -6
  7. examples/build-and-deploy-android.sh +0 -6
  8. examples/build-and-deploy-ios.sh +0 -6
  9. examples/build-and-deploy-linux.sh +0 -6
  10. examples/build-and-deploy-mac.sh +0 -6
  11. examples/build-and-deploy-windows.bat +0 -7
  12. examples/images/.gitattributes +2 -0
  13. examples/main.c +6 -14
  14. examples/plot_pr_curves.py +4 -5
  15. examples/sample_array.h +0 -6
  16. examples/test.py +3 -4
  17. examples/test_node.js +0 -529
  18. examples_onnx/CMakeLists.txt +0 -24
  19. examples_onnx/build-and-deploy-linux.sh +0 -37
  20. include/ten_vad.h +3 -4
  21. include/ten_vad.py +17 -55
  22. lib/Web/ten_vad.d.ts +0 -111
  23. lib/Web/ten_vad.js +0 -30
  24. lib/Windows/x64/ten_vad.lib +0 -0
  25. lib/Windows/x86/ten_vad.lib +0 -0
  26. lib/iOS/ten_vad.framework/Headers/ten_vad.h +3 -90
  27. lib/iOS/ten_vad.framework/Info.plist +0 -0
  28. lib/iOS/ten_vad.framework/Modules/module.modulemap +3 -5
  29. lib/macOS/ten_vad.framework/Headers +0 -1
  30. lib/{Web/ten_vad.wasm → macOS/ten_vad.framework/Headers/ten_vad.h} +2 -2
  31. lib/macOS/ten_vad.framework/Resources +0 -1
  32. src/onnx_model/ten-vad.onnx → lib/macOS/ten_vad.framework/Resources/Info.plist +2 -2
  33. lib/macOS/ten_vad.framework/Versions/A/Headers/ten_vad.h +3 -90
  34. lib/macOS/ten_vad.framework/Versions/A/Resources/Info.plist +3 -44
  35. lib/macOS/ten_vad.framework/Versions/Current +0 -1
  36. lib/macOS/ten_vad.framework/Versions/Current/Headers/ten_vad.h +3 -0
  37. lib/macOS/ten_vad.framework/Versions/Current/Resources/Info.plist +3 -0
  38. lib/macOS/ten_vad.framework/Versions/Current/ten_vad +3 -0
  39. lib/macOS/ten_vad.framework/ten_vad +0 -1
  40. lib/macOS/ten_vad.framework/ten_vad +3 -0
  41. setup.py +3 -26
  42. src/aed.cc +0 -993
  43. src/aed.h +0 -226
  44. src/aed_st.h +0 -132
  45. src/biquad.cc +0 -354
  46. src/biquad.h +0 -190
  47. src/biquad_st.h +0 -37
  48. src/coeff.h +0 -246
  49. src/fftw.c +0 -0
  50. src/fftw.h +0 -47
.gitattributes CHANGED
@@ -33,9 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- *.so filter=lfs diff=lfs merge=lfs -text
37
- *.dll filter=lfs diff=lfs merge=lfs -text
38
- ten_vad filter=lfs diff=lfs merge=lfs -text
39
- *.png filter=lfs diff=lfs merge=lfs -text
40
- *.scv filter=lfs diff=lfs merge=lfs -text
41
- *.wav filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
LICENSE DELETED
@@ -1,46 +0,0 @@
1
- Open Source License
2
-
3
- The ten-vad is licensed pursuant to the Apache License v2.0(provided
4
- for your convenience below), with the following additional conditions.
5
- You may reproduce, prepare Derivative Works of, publicly display, publicly perform,
6
- sublicense, distribute, or otherwise make available (together, "Deploy") the ten-vad,
7
- for commercial or non-commercial purposes, provided that you agree to abide by the terms below:
8
-
9
- 1. You may not Deploy the ten-vad in a way that competes with Agora's
10
- offerings and/or that allows others to compete with Agora's offerings,
11
- including without limitation enabling any third party to develop or
12
- deploy Applications.
13
-
14
- 2. You may Deploy the ten-vad solely to create and enable deployment
15
- of your Application(s) solely for your benefit and the benefit of your
16
- direct End Users. If you prefer, you may include the following notice in
17
- the documentation of your Application(s): "Powered by ten-vad".
18
-
19
- 3. Derivative Works of the ten-vad remain subject to this Open Source
20
- License.
21
-
22
- 4. "End Users" shall mean the end-users of your Application(s) who access
23
- the ten-vad solely to the extent necessary to access and use the
24
- Application(s) you create or deploy using ten-vad.
25
-
26
- 5. "Application(s)" shall mean your software programs designed or developed
27
- by using the ten-vad or where deployment is enabled by the ten-vad.
28
-
29
- 6. Note that the project contains derived code from other open source project
30
- with BSD-3-Clause and BSD-2-Clause license, refer to the "NOTICES"
31
- file in the root directory for detailed information.
32
-
33
- Copyright © 2025 Agora
34
-
35
- Licensed under the Apache License, Version 2.0 (the "License");
36
- you may not use this file except in compliance with the License.
37
- You may obtain a copy of the License at
38
-
39
- http://www.apache.org/licenses/LICENSE-2.0
40
-
41
- Unless required by applicable law or agreed to in writing, software
42
- distributed under the License is distributed on an "AS IS" BASIS,
43
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
44
- See the License for the specific language governing permissions and
45
- limitations under the License.
46
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
NOTICES DELETED
@@ -1,68 +0,0 @@
1
- This project includes modified code from the following third-party component:
2
-
3
- 1. File: lpcnet_enc.c
4
- - Source: LPCNet (https:github.com/xiph/LPCNet)
5
- - License: BSD-2-Clause
6
- - Copyright: 2017-2019, Mozilla
7
- - Original License Text:
8
- Copyright (c) 2017-2019 Mozilla
9
-
10
- Redistribution and use in source and binary forms, with or without modification,
11
- are permitted provided that the following conditions are met:
12
-
13
- - Redistributions of source code must retain the above copyright notice,
14
- this list of conditions and the following disclaimer.
15
-
16
- - Redistributions in binary form must reproduce the above copyright notice,
17
- this list of conditions and the following disclaimer in the documentation
18
- and/or other materials provided with the distribution.
19
-
20
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
24
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
-
32
- 2. Project: LPCNet
33
- - Source: LPCNet (https:github.com/xiph/LPCNet)
34
- - License: BSD-3-Clause
35
- - Copyright: 2017-2018, Mozilla, 2007-2017, Jean-Marc Valin, 2005-2017, Xiph.Org Foundation, 2003-2004, Mark Borgerding
36
- - Original License Text of LPCNet open source project:
37
- Copyright (c) 2017-2018, Mozilla
38
- Copyright (c) 2007-2017, Jean-Marc Valin
39
- Copyright (c) 2005-2017, Xiph.Org Foundation
40
- Copyright (c) 2003-2004, Mark Borgerding
41
-
42
- Redistribution and use in source and binary forms, with or without
43
- modification, are permitted provided that the following conditions
44
- are met:
45
-
46
- - Redistributions of source code must retain the above copyright
47
- notice, this list of conditions and the following disclaimer.
48
-
49
- - Redistributions in binary form must reproduce the above copyright
50
- notice, this list of conditions and the following disclaimer in the
51
- documentation and/or other materials provided with the distribution.
52
-
53
- - Neither the name of the Xiph.Org Foundation nor the names of its
54
- contributors may be used to endorse or promote products derived from
55
- this software without specific prior written permission.
56
-
57
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
58
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
60
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION
61
- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
62
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
63
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
64
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
65
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
66
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
67
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,6 +1,5 @@
1
  ---
2
  tags:
3
- - Voice Acticity Detection
4
  - voice activity detection
5
  - speech activity detection
6
  - real time
@@ -11,92 +10,11 @@ tags:
11
  - silero vad
12
  - conversational
13
  - automatic speech recognition
 
14
  ---
15
- ![TEN VAD banner][ten-vad-banner]
 
16
 
17
- [![Discussion posts](https://img.shields.io/github/discussions/TEN-framework/ten-vad?labelColor=gray&color=%20%23f79009)](https://github.com/TEN-framework/ten-vad/discussions/)
18
- [![Commits](https://img.shields.io/github/commit-activity/m/TEN-framework/ten-vad?labelColor=gray&color=pink)](https://github.com/TEN-framework/ten-vad/graphs/commit-activity)
19
- [![Issues closed](https://img.shields.io/github/issues-search?query=repo%3ATEN-framework%2Ften-vad%20is%3Aclosed&label=issues%20closed&labelColor=gray&color=green)](https://github.com/TEN-framework/ten-vad/issues)
20
- ![](https://img.shields.io/github/contributors/ten-framework/ten-vad?color=c4f042&labelColor=gray&style=flat-square)
21
- [![PRs Welcome](https://img.shields.io/badge/PRs-welcome!-brightgreen.svg?style=flat-square)](https://github.com/TEN-framework/ten-vad/pulls)
22
- [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/TEN-framework/TEN-vad)
23
-
24
- [![GitHub watchers](https://img.shields.io/github/watchers/TEN-framework/ten-vad?style=social&label=Watch)](https://GitHub.com/TEN-framework/ten-vad/watchers/?WT.mc_id=academic-105485-koreyst)
25
- [![GitHub forks](https://img.shields.io/github/forks/TEN-framework/ten-vad?style=social&label=Fork)](https://GitHub.com/TEN-framework/ten-vad/network/?WT.mc_id=academic-105485-koreyst)
26
- [![GitHub stars](https://img.shields.io/github/stars/TEN-framework/ten-vad?style=social&label=Star)](https://GitHub.com/TEN-framework/ten-vad/stargazers/?WT.mc_id=academic-105485-koreyst)
27
-
28
-
29
- *Latest News* 🔥
30
- - [2025/11] **WASM** build guide and browser test demo are now available in `lib/Web` and `examples`.
31
- - [2025/11] We supported **Python** inference with **ONNX model** on **Linux**, **macOS** thanks to Guy Nicholson!
32
- - [2025/11] We supported **Golang** on **Linux**, **macOS** and **Windows** with usage of the prebuilt-libs thanks to hylarucoder!
33
- - [2025/11] We supported Java on **Linux, macOS, Windows, Android** with usage of the prebuilt-libs thanks to ZhangYang!
34
- - [2025/07] 🎉 Exciting news! **TEN VAD** is now integrated into **k2-fsa/sherpa-onnx**, thanks to the fantastic work by Fangjun Kuang! You can now achieve more precise speech segment extraction and enjoy an enhanced ASR experience! Refer to the [documentation](https://k2-fsa.github.io/sherpa/onnx/vad/ten-vad.html) and give it a try!
35
- - [2025/07] We support **Python inference** on **macOS** and **Windows** with usage of the prebuilt-lib!
36
- - [2025/06] We **finally** released and **open-sourced** the **ONNX** model and the corresponding **preprocessing code**! Now you can deploy **TEN VAD** on **any platform** and **any hardware architecture**!
37
- - [2025/06] We are excited to announce the release of **WASM+JS** for Web WASM Support.
38
-
39
-
40
- ## Table of Contents
41
-
42
- - [Welcome to TEN](#welcome-to-ten)
43
- - [TEN Hugging Face Space](#ten-hugging-face-space)
44
- - [Introduction](#introduction)
45
- - [Key Features](#key-features)
46
- - [High-Performance](#1-high-performance)
47
- - [Performance Comparison](#11-performance-comparison)
48
- - [Agent-Friendly](#2-agent-friendly)
49
- - [Lightweight](#3-lightweight)
50
- - [Multiple Programming Languages and Platforms](#4-multiple-programming-languages-and-platforms)
51
- - [Supported Sampling Rate and Hop Size](#5-supproted-sampling-rate-and-hop-size)
52
- - [Developers Testimonial](#developers-testimonial)
53
- - [Installation](#installation)
54
- - [Quick Start](#quick-start)
55
- - [Python Usage](#python-usage)
56
- - [Linux](#1-linux)
57
- - [JS Usage](#js-usage)
58
- - [Web](#1-web)
59
- - [C Usage](#c-usage)
60
- - [Linux](#1-linux-1)
61
- - [Windows](#2-windows)
62
- - [macOS](#3-macos)
63
- - [Android](#4-android)
64
- - [iOS](#5-ios)
65
- - [TEN Ecosystem](#ten-ecosystem)
66
- - [Ask Questions](#ask-questions)
67
- - [Citations](#citations)
68
- - [License](#license)
69
-
70
-
71
- ## Welcome to TEN
72
-
73
- TEN is a collection of open-source projects for building real-time, multimodal conversational voice agents. It includes [ TEN Framework ](https://github.com/ten-framework/ten-framework), [ TEN VAD ](https://github.com/ten-framework/ten-vad), [ TEN Turn Detection ](https://github.com/ten-framework/ten-turn-detection), TEN Agent, TMAN Designer, and [ TEN Portal ](https://github.com/ten-framework/portal), all fully open-source.
74
-
75
-
76
- | Community Channel | Purpose |
77
- | ---------------- | ------- |
78
- | [![Follow on X](https://img.shields.io/twitter/follow/TenFramework?logo=X&color=%20%23f5f5f5)](https://twitter.com/intent/follow?screen_name=TenFramework) | Follow TEN Framework on X for updates and announcements |
79
- | [![Follow on LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-TEN_Framework-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/ten-framework) | Follow TEN Framework on LinkedIn for updates and announcements |
80
- | [![Discord TEN Community](https://dcbadge.vercel.app/api/server/VnPftUzAMJ?&style=flat&theme=light&color=lightgray)](https://discord.gg/VnPftUzAMJ) | Join our Discord community to connect with developers |
81
- | [![Hugging Face Space](https://img.shields.io/badge/Hugging%20Face-TEN%20Framework-yellow?style=flat&logo=huggingface)](https://huggingface.co/TEN-framework) | Join our Hugging Face community to explore our spaces and models |
82
- | [![WeChat](https://img.shields.io/badge/TEN_Framework-WeChat_Group-%2307C160?logo=wechat&labelColor=darkgreen&color=gray)](https://github.com/TEN-framework/ten-agent/discussions/170) | Join our WeChat group for Chinese community discussions |
83
-
84
-
85
- > \[!IMPORTANT]
86
- >
87
- > **Star TEN Repositories** ⭐️
88
- >
89
- > Get instant notifications for new releases and updates. Your support helps us grow and improve TEN!
90
-
91
-
92
- ![TEN star us gif](https://github.com/user-attachments/assets/eeebe996-8c14-4bf7-82ae-f1a1f7e30705)
93
-
94
-
95
- ## TEN Hugging Face Space
96
-
97
- <https://github.com/user-attachments/assets/725a8318-d679-4b17-b9e4-e3dce999b298>
98
-
99
- You are more than welcome to [Visit TEN Hugging Face Space](https://huggingface.co/spaces/TEN-framework/ten-agent-demo) to try VAD and Turn Detection together.
100
 
101
 
102
  ## **Introduction**
@@ -115,23 +33,21 @@ The precision-recall curves comparing the performance of WebRTC VAD (pitch-based
115
  <img src="./examples/images/PR_Curves_testset.png" width="800">
116
  </div>
117
 
118
- Note that the default threshold of 0.5 is used to generate binary speech indicators (0 for non-speech signal, 1 for speech signal). This threshold needs to be tuned according to your domain-specific task.
119
-
120
- #### **1.1 Performance Comparison**
121
-
122
- Developers can reproduce the performance comparison PR curves for **TEN VAD** and **Silero VAD** on the open-source testset (as shown in the figure above) by executing the following script on Linux x64 with a simply one line of code. The output figure will be saved in the same directory as the script.
123
 
124
  ```
125
  cd ./examples
126
  python plot_pr_curves.py
127
  ```
128
 
 
129
  ### **2. Agent-Friendly:**
130
  As illustrated in the figure below, TEN VAD rapidly detects speech-to-non-speech transitions, whereas Silero VAD suffers from a delay of several hundred milliseconds, resulting in increased end-to-end latency in human-agent interaction systems. In addition, as demonstrated in the 6.5s-7.0s audio segment, Silero VAD fails to identify short silent durations between adjacent speech segments.
131
  <div style="text-align:">
132
  <img src="./examples/images/Agent-Friendly-image.png" width="800">
133
  </div>
134
 
 
135
  ### **3. Lightweight:**
136
  We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equipped with varying CPUs. TEN VAD demonstrates much lower computational complexity and smaller library size than Silero VAD.
137
 
@@ -199,6 +115,7 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
199
  <td align="center"> 0.0050 </td>
200
  </tr>
201
  </table>
 
202
  <style>
203
  th, td {
204
  border: 1px solid #ddd;
@@ -207,17 +124,12 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
207
  </style>
208
 
209
  ### **4. Multiple programming languages and platforms:**
210
- TEN VAD provides cross-platform C compatibility across five operating systems (Linux x64, Windows, macOS, Android, iOS), with Python bindings optimized for Linux x64, with wasm for Web.
211
 
212
 
213
  ### **5. Supproted sampling rate and hop size:**
214
  TEN VAD operates on 16kHz audio input with configurable hop sizes (optimized frame configurations: 160/256 samples=10/16ms). Other sampling rates must be resampled to 16kHz.
215
 
216
- ## **Developers Testimonial**
217
- > "We selected TEN VAD because it provides faster and more accurate sentence-end detection in Japanese compared to other VADs, while still being lightweight and fast enough for live use." - LiveCap,Hakase shojo.
218
-
219
- > "TEN VAD's overall performance is better than Silero VAD. Its high accuracy and low resource consumption helped us improve efficiency and significantly reduce costs." - Rustpbx.
220
-
221
 
222
  ## **Installation**
223
  ```
@@ -273,8 +185,9 @@ The project supports five major platforms with dynamic library linking.
273
  </tr>
274
  </table>
275
 
 
276
  ### **Python Usage**
277
- #### **1. Linux / macOS / Windows**
278
  #### **Requirements**
279
  - numpy (Version 1.17.4/1.26.4 verified)
280
  - scipy (Version >= 1.5.0)
@@ -288,7 +201,7 @@ Note: You could use other versions of above packages, but we didn't test other v
288
 
289
  <br>
290
 
291
- The **lib** only depend on numpy, you have to install the dependency via requirements.txt:
292
 
293
  ```pip install -r requirements.txt```
294
 
@@ -306,7 +219,6 @@ sudo apt install libc++1
306
 
307
  <br>
308
 
309
-
310
  #### **Usage**
311
  Note: For usage in python, you can either use it by **git clone** or **pip**.
312
 
@@ -314,7 +226,7 @@ Note: For usage in python, you can either use it by **git clone** or **pip**.
314
 
315
  1. Clone the repository
316
  ```
317
- git clone https://github.com/TEN-framework/ten-vad.git
318
  ```
319
 
320
  2. Enter examples directory
@@ -327,7 +239,6 @@ cd ./examples
327
  python test.py s0724-s0730.wav out.txt
328
  ```
329
 
330
-
331
  ##### **By using pip:**
332
 
333
  1. Install via pip
@@ -342,24 +253,9 @@ pip install -U --force-reinstall -v git+https://github.com/TEN-framework/ten-vad
342
  from ten_vad import TenVad
343
  ```
344
 
345
-
346
- ### **JS Usage**
347
-
348
- #### **1. Web**
349
- ##### **Requirements**
350
- - Node.js (macOS v14.18.2, Linux v16.20.2 verified)
351
- - Terminal
352
-
353
- ##### **Usage**
354
- ```
355
- 1) cd ./examples
356
- 2) node test_node.js s0724-s0730.wav out.txt
357
- ```
358
-
359
-
360
  ### **C Usage**
361
  #### **Build Scripts**
362
- Located in examples/ directory and examples_onnx (for **ONNX** usage on Linux):
363
 
364
  - Linux: build-and-deploy-linux.sh
365
  - Windows: build-and-deploy-windows.bat
@@ -379,14 +275,13 @@ Runtime library path configuration:
379
  - Configure toolchain and architecture settings
380
 
381
  #### **Overview of Usage**
382
- - Navigate to examples/ or examples_onx/ (for **ONNX** usage on Linux)
383
  - Execute platform-specific build script
384
  - Configure dynamic library path
385
  - Run demo with sample audio s0724-s0730.wav
386
  - Processed results saved to out.txt
387
 
388
 
389
-
390
  The detailed usage methods of each platform are as follows <br>
391
 
392
  #### **1. Linux**
@@ -395,30 +290,18 @@ The detailed usage methods of each platform are as follows <br>
395
  - CMake
396
  - Terminal
397
 
398
- Note that if you did not install **libc++1** (Linux), you have to run the code below to install it:
399
  ```
400
  sudo apt update
401
  sudo apt install libc++1
402
  ```
403
 
404
- ##### **Usage (prebuilt-lib)**
405
  ```
406
  1) cd ./examples
407
  2) ./build-and-deploy-linux.sh
408
  ```
409
 
410
- ##### **Usage (ONNX)**
411
- You have to download the **onnxruntime** packages from the [microsoft official onnxruntime github website](https://github.com/microsoft/onnxruntime). Note that the version of onnxruntime must be higher than or equal to 1.17.1 (e.g. onnxruntime-linux-x64-1.17.1.tgz).
412
- <br>
413
- You can check the official **ONNX Runtime releases** from [this website](https://github.com/microsoft/onnxruntime/tags). And for example, to download version 1.17.1 (Linux x64), use [this link](https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-x64-1.17.1.tgz). After extracting the compressed file, you'll find two important directories:`include/` - header files, `lib/` - library files
414
- ```
415
- 1) cd examples_onnx/
416
- 2) ./build-and-deploy-linux.sh --ort-path /absolute/path/to/your/onnxruntime/root/dir
417
- ```
418
- Note 1: If executing the onnx demo from a different directory than the one used when running build-and-deploy-linux.sh, ensure to create a symbolic link to src/onnx_model/ to prevent ONNX model file loading failures.
419
- <br>
420
- Note 2: The **ONNX model** locates in `src/onnx_model` directory.
421
-
422
  #### **2. Windows**
423
  ##### **Requirements**
424
  - Visual Studio (2017, 2019, 2022 verified)
@@ -434,7 +317,6 @@ Note 2: The **ONNX model** locates in `src/onnx_model` directory.
434
  3) ./build-and-deploy-windows.bat
435
  ```
436
 
437
-
438
  #### **3. macOS**
439
  ##### **Requirements**
440
  - Xcode (15.2 verified)
@@ -449,7 +331,6 @@ Note 2: The **ONNX model** locates in `src/onnx_model` directory.
449
  3) ./build-and-deploy-mac.sh
450
  ```
451
 
452
-
453
  #### **4. Android**
454
  ##### **Requirements**
455
  - NDK (r25b, macOS verified)
@@ -466,7 +347,6 @@ Note 2: The **ONNX model** locates in `src/onnx_model` directory.
466
  4) ./build-and-deploy-android.sh
467
  ```
468
 
469
-
470
  #### **5. iOS**
471
  ##### **Requirements**
472
  Xcode (15.2, macOS verified)
@@ -517,28 +397,6 @@ cd ./examples
517
 
518
  3.5. Build in Xcode and run demo on your device.
519
 
520
-
521
-
522
- ## TEN Ecosystem
523
-
524
- | Project | Preview |
525
- | ------- | ------- |
526
- | [**🏚️ TEN Framework**][ten-framework-link]<br>TEN is an open-source framework for real-time, multimodal conversational AI.<br><br>![][ten-framework-shield] | ![][ten-framework-banner] |
527
- | [**️🔂 TEN Turn Detection**][ten-turn-detection-link]<br>TEN is for full-duplex dialogue communication.<br><br>![][ten-turn-detection-shield] | ![][ten-turn-detection-banner] |
528
- | [**🔉 TEN VAD**][ten-vad-link]<br>TEN VAD is a low-latency, lightweight and high-performance streaming voice activity detector (VAD).<br><br>![][ten-vad-shield] | ![][ten-vad-banner] |
529
- | [**🎙️ TEN Agent**][ten-agent-link]<br>TEN Agent is a showcase of TEN Framewrok.<br><br> | ![][ten-agent-banner] |
530
- | **🎨 TMAN Designer** <br>TMAN Designer is low/no code option to make a voice agent with easy to use workflow UI.<br><br> | ![][tman-designer-banner] |
531
- | [**📒 TEN Portal**][ten-portal-link]<br>The official site of TEN framework, it has documentation and blog.<br><br>![][ten-portal-shield] | ![][ten-portal-banner] |
532
-
533
- <br>
534
-
535
- ## Ask Questions
536
-
537
- [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/TEN-framework/TEN-vad)
538
-
539
- Most questions can be answered by using DeepWiki, it is fast, intutive to use and supports multiple languages.
540
-
541
-
542
  ## **Citations**
543
  ```
544
  @misc{TEN VAD,
@@ -552,32 +410,28 @@ Most questions can be answered by using DeepWiki, it is fast, intutive to use an
552
  }
553
  ```
554
 
 
555
 
556
- ## License
557
-
558
- This project is licensed pursuant to the Apache 2.0 with additional conditions. Refer to the "LICENSE" file in the root directory for detailed information. Note that `pitch_est.cc` contains modified code derived from [LPCNet](https://github.com/xiph/LPCNet), which is [BSD-2-Clause](https://spdx.org/licenses/BSD-2-Clause.html) and [BSD-3-Clause](https://spdx.org/licenses/BSD-3-Clause.html) licensed, refer to the NOTICES file in the root directory for detailed information.
559
-
560
-
561
-
562
-
563
- [back-to-top]: https://img.shields.io/badge/-Back_to_top-gray?style=flat-square
564
 
565
- [ten-framework-shield]: https://img.shields.io/github/stars/ten-framework/ten_framework?color=ffcb47&labelColor=gray&style=flat-square&logo=github
566
- [ten-framework-banner]: https://github.com/user-attachments/assets/7c8f72d7-3993-4d01-8504-b71578a22944
567
- [ten-framework-link]: https://github.com/ten-framework/ten_framework
 
568
 
569
- [ten-vad-link]: https://github.com/ten-framework/ten-vad
570
- [ten-vad-shield]: https://img.shields.io/github/stars/ten-framework/ten-vad?color=ffcb47&labelColor=gray&style=flat-square&logo=github
571
- [ten-vad-banner]: https://github.com/user-attachments/assets/d45870e4-9453-4047-8163-08737f82863f
572
 
573
- [ten-turn-detection-link]: https://github.com/ten-framework/ten-turn-detection
574
- [ten-turn-detection-shield]: https://img.shields.io/github/stars/ten-framework/ten-turn-detection?color=ffcb47&labelColor=gray&style=flat-square&logo=github
575
- [ten-turn-detection-banner]: https://github.com/user-attachments/assets/8d0ec716-5d0e-43e4-ad9a-d97b17305658
576
 
577
- [ten-agent-link]: https://github.com/TEN-framework/ten-framework/tree/main/ai_agents
578
- [ten-agent-banner]: https://github.com/user-attachments/assets/38de2207-939b-4702-a0aa-04491f5b5275
579
- [tman-designer-banner]: https://github.com/user-attachments/assets/804c3543-0a47-42b7-b40b-ef32b742fb8f
580
 
581
- [ten-portal-link]: https://github.com/ten-framework/portal
582
- [ten-portal-shield]: https://img.shields.io/github/stars/ten-framework/portal?color=ffcb47&labelColor=gray&style=flat-square&logo=github
583
- [ten-portal-banner]: https://github.com/user-attachments/assets/e17d8aaa-5928-45dd-ac71-814928e26a89
 
1
  ---
2
  tags:
 
3
  - voice activity detection
4
  - speech activity detection
5
  - real time
 
10
  - silero vad
11
  - conversational
12
  - automatic speech recognition
13
+ pipeline_tag: voice-activity-detection
14
  ---
15
+ # **TEN VAD**
16
+ ***A Low-Latency, Lightweight and High-Performance Streaming VAD***
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  ## **Introduction**
 
33
  <img src="./examples/images/PR_Curves_testset.png" width="800">
34
  </div>
35
 
36
+ Note that the default threshold of 0.5 is used to generate binary speech indicators (0 for non-speech signal, 1 for speech signal). This threshold needs to be tuned according to your domain-specific task. The precision-recall curve can be obtained by executing the following script on Linux x64. The output figure will be saved in the same directory as the script.
 
 
 
 
37
 
38
  ```
39
  cd ./examples
40
  python plot_pr_curves.py
41
  ```
42
 
43
+
44
  ### **2. Agent-Friendly:**
45
  As illustrated in the figure below, TEN VAD rapidly detects speech-to-non-speech transitions, whereas Silero VAD suffers from a delay of several hundred milliseconds, resulting in increased end-to-end latency in human-agent interaction systems. In addition, as demonstrated in the 6.5s-7.0s audio segment, Silero VAD fails to identify short silent durations between adjacent speech segments.
46
  <div style="text-align:">
47
  <img src="./examples/images/Agent-Friendly-image.png" width="800">
48
  </div>
49
 
50
+
51
  ### **3. Lightweight:**
52
  We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equipped with varying CPUs. TEN VAD demonstrates much lower computational complexity and smaller library size than Silero VAD.
53
 
 
115
  <td align="center"> 0.0050 </td>
116
  </tr>
117
  </table>
118
+
119
  <style>
120
  th, td {
121
  border: 1px solid #ddd;
 
124
  </style>
125
 
126
  ### **4. Multiple programming languages and platforms:**
127
+ TEN VAD provides cross-platform C compatibility across five operating systems (Linux x64, Windows, macOS, Android, iOS), with Python bindings optimized for Linux x64.
128
 
129
 
130
  ### **5. Supproted sampling rate and hop size:**
131
  TEN VAD operates on 16kHz audio input with configurable hop sizes (optimized frame configurations: 160/256 samples=10/16ms). Other sampling rates must be resampled to 16kHz.
132
 
 
 
 
 
 
133
 
134
  ## **Installation**
135
  ```
 
185
  </tr>
186
  </table>
187
 
188
+
189
  ### **Python Usage**
190
+ #### **1. Linux**
191
  #### **Requirements**
192
  - numpy (Version 1.17.4/1.26.4 verified)
193
  - scipy (Version >= 1.5.0)
 
201
 
202
  <br>
203
 
204
+ The **lib** only depends on numpy, you have to install the dependency via requirements.txt:
205
 
206
  ```pip install -r requirements.txt```
207
 
 
219
 
220
  <br>
221
 
 
222
  #### **Usage**
223
  Note: For usage in python, you can either use it by **git clone** or **pip**.
224
 
 
226
 
227
  1. Clone the repository
228
  ```
229
+ git clone https://huggingface.co/TEN-framework/ten-vad
230
  ```
231
 
232
  2. Enter examples directory
 
239
  python test.py s0724-s0730.wav out.txt
240
  ```
241
 
 
242
  ##### **By using pip:**
243
 
244
  1. Install via pip
 
253
  from ten_vad import TenVad
254
  ```
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  ### **C Usage**
257
  #### **Build Scripts**
258
+ Located in examples/ directory:
259
 
260
  - Linux: build-and-deploy-linux.sh
261
  - Windows: build-and-deploy-windows.bat
 
275
  - Configure toolchain and architecture settings
276
 
277
  #### **Overview of Usage**
278
+ - Navigate to examples/
279
  - Execute platform-specific build script
280
  - Configure dynamic library path
281
  - Run demo with sample audio s0724-s0730.wav
282
  - Processed results saved to out.txt
283
 
284
 
 
285
  The detailed usage methods of each platform are as follows <br>
286
 
287
  #### **1. Linux**
 
290
  - CMake
291
  - Terminal
292
 
293
+ Note that if you did not install **libc++1**, you have to run the code below to install it:
294
  ```
295
  sudo apt update
296
  sudo apt install libc++1
297
  ```
298
 
299
+ ##### **Usage**
300
  ```
301
  1) cd ./examples
302
  2) ./build-and-deploy-linux.sh
303
  ```
304
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  #### **2. Windows**
306
  ##### **Requirements**
307
  - Visual Studio (2017, 2019, 2022 verified)
 
317
  3) ./build-and-deploy-windows.bat
318
  ```
319
 
 
320
  #### **3. macOS**
321
  ##### **Requirements**
322
  - Xcode (15.2 verified)
 
331
  3) ./build-and-deploy-mac.sh
332
  ```
333
 
 
334
  #### **4. Android**
335
  ##### **Requirements**
336
  - NDK (r25b, macOS verified)
 
347
  4) ./build-and-deploy-android.sh
348
  ```
349
 
 
350
  #### **5. iOS**
351
  ##### **Requirements**
352
  Xcode (15.2, macOS verified)
 
397
 
398
  3.5. Build in Xcode and run demo on your device.
399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  ## **Citations**
401
  ```
402
  @misc{TEN VAD,
 
410
  }
411
  ```
412
 
413
+ ## Usage Guidance
414
 
415
+ 1. You may not (i) host the TEN VAD or the Derivative Works on any End
416
+ User devices, including but not limited to any mobile terminal devices
417
+ or (ii) Deploy the TEN VAD in a way that competes with Agora's
418
+ offerings and/or that allows others to compete with Agora's offerings,
419
+ including without limitation enabling any third party to develop or
420
+ deploy Applications.
 
 
421
 
422
+ 2. You may Deploy the TEN VAD solely to create and enable deployment
423
+ of your Application(s) solely for your benefit and the benefit of your
424
+ direct End Users. If you prefer, you may include the following notice in
425
+ the documentation of your Application(s): "Powered by TEN VAD".
426
 
427
+ 3. "End Users" shall mean the end-users of your Application(s) who access
428
+ the TEN VAD solely to the extent necessary to access and use the
429
+ Application(s) you create or deploy using TEN VAD.
430
 
431
+ 4. "Application(s)" shall mean your software programs designed or developed
432
+ by using the TEN VAD or where deployment is enabled by the TEN
433
+ VAD.
434
 
435
+ ## Future Open Source Plan
 
 
436
 
437
+ TEN-VAD is currently released as a binary. Based on community feedback and interest, we plan to progressively open source the internal components of the binary.
 
 
examples/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.wav filter=lfs diff=lfs merge=lfs -text
examples/CMakeLists.txt CHANGED
@@ -1,9 +1,3 @@
1
- #
2
- # Copyright © 2025 Agora
3
- # This file is part of TEN Framework, an open source project.
4
- # Licensed under the Apache License, Version 2.0, with certain conditions.
5
- # Refer to the "LICENSE" file in the root directory for more information.
6
- #
7
  cmake_minimum_required(VERSION 3.10)
8
  get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
9
 
 
 
 
 
 
 
 
1
  cmake_minimum_required(VERSION 3.10)
2
  get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
3
 
examples/build-and-deploy-android.sh CHANGED
@@ -1,10 +1,4 @@
1
  #!/bin/bash
2
- #
3
- # Copyright © 2025 Agora
4
- # This file is part of TEN Framework, an open source project.
5
- # Licensed under the Apache License, Version 2.0, with certain conditions.
6
- # Refer to the "LICENSE" file in the root directory for more information.
7
- #
8
  set -eo pipefail
9
 
10
  # Customize the arch and toolchain
 
1
  #!/bin/bash
 
 
 
 
 
 
2
  set -eo pipefail
3
 
4
  # Customize the arch and toolchain
examples/build-and-deploy-ios.sh CHANGED
@@ -1,10 +1,4 @@
1
  #!/usr/bin/env bash
2
- #
3
- # Copyright © 2025 Agora
4
- # This file is part of TEN Framework, an open source project.
5
- # Licensed under the Apache License, Version 2.0, with certain conditions.
6
- # Refer to the "LICENSE" file in the root directory for more information.
7
- #
8
  set -euo pipefail
9
 
10
  work_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
1
  #!/usr/bin/env bash
 
 
 
 
 
 
2
  set -euo pipefail
3
 
4
  work_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
examples/build-and-deploy-linux.sh CHANGED
@@ -1,10 +1,4 @@
1
  #!/bin/bash
2
- #
3
- # Copyright © 2025 Agora
4
- # This file is part of TEN Framework, an open source project.
5
- # Licensed under the Apache License, Version 2.0, with certain conditions.
6
- # Refer to the "LICENSE" file in the root directory for more information.
7
- #
8
  set -euo pipefail
9
 
10
  arch=x64
 
1
  #!/bin/bash
 
 
 
 
 
 
2
  set -euo pipefail
3
 
4
  arch=x64
examples/build-and-deploy-mac.sh CHANGED
@@ -1,10 +1,4 @@
1
  #!/bin/bash
2
- #
3
- # Copyright © 2025 Agora
4
- # This file is part of TEN Framework, an open source project.
5
- # Licensed under the Apache License, Version 2.0, with certain conditions.
6
- # Refer to the "LICENSE" file in the root directory for more information.
7
- #
8
  set -euo pipefail
9
 
10
  # Customize the arch
 
1
  #!/bin/bash
 
 
 
 
 
 
2
  set -euo pipefail
3
 
4
  # Customize the arch
examples/build-and-deploy-windows.bat CHANGED
@@ -1,13 +1,6 @@
1
  @echo off
2
  setlocal
3
 
4
- @REM
5
- @REM Copyright © 2025 Agora
6
- @REM This file is part of TEN Framework, an open source project.
7
- @REM Licensed under the Apache License, Version 2.0, with certain conditions.
8
- @REM Refer to the "LICENSE" file in the root directory for more information.
9
- @REM
10
-
11
  @REM Customize the arch
12
  set arch=x64
13
  @REM set arch=x86
 
1
  @echo off
2
  setlocal
3
 
 
 
 
 
 
 
 
4
  @REM Customize the arch
5
  set arch=x64
6
  @REM set arch=x86
examples/images/.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.jpg filter=lfs diff=lfs merge=lfs -text
2
+ *.png filter=lfs diff=lfs merge=lfs -text
examples/main.c CHANGED
@@ -1,8 +1,7 @@
1
  //
2
- // Copyright © 2025 Agora
3
  // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
  //
7
  #include <stdio.h>
8
  #include <stdint.h>
@@ -87,16 +86,9 @@ int vad_process(int16_t *input_buf, uint32_t frame_num,
87
  for (int i = 0; i < frame_num; ++i)
88
  {
89
  int16_t *audio_data = input_buf + i * hop_size;
90
- int res = ten_vad_process(ten_vad_handle, audio_data, hop_size,
91
- &out_probs[i], &out_flags[i]);
92
- if (res == 0)
93
- {
94
- printf("[%d] %0.6f, %d\n", i, out_probs[i], out_flags[i]);
95
- }
96
- else
97
- {
98
- printf("ten_vad_process failed res %d\n", res);
99
- }
100
  }
101
  uint64_t end = get_timestamp_ms();
102
  *use_time = (float)(end - start);
@@ -303,4 +295,4 @@ int read_wav_file(FILE *fp, wav_info_t *info)
303
  // restore original file position
304
  fseek(fp, orig_pos, SEEK_SET);
305
  return 0;
306
- }
 
1
  //
 
2
  // This file is part of TEN Framework, an open source project.
3
+ // Licensed under the Apache License, Version 2.0.
4
+ // See the LICENSE file for more information.
5
  //
6
  #include <stdio.h>
7
  #include <stdint.h>
 
86
  for (int i = 0; i < frame_num; ++i)
87
  {
88
  int16_t *audio_data = input_buf + i * hop_size;
89
+ ten_vad_process(ten_vad_handle, audio_data, hop_size,
90
+ &out_probs[i], &out_flags[i]);
91
+ printf("[%d] %0.6f, %d\n", i, out_probs[i], out_flags[i]);
 
 
 
 
 
 
 
92
  }
93
  uint64_t end = get_timestamp_ms();
94
  *use_time = (float)(end - start);
 
295
  // restore original file position
296
  fseek(fp, orig_pos, SEEK_SET);
297
  return 0;
298
+ }
examples/plot_pr_curves.py CHANGED
@@ -1,8 +1,7 @@
1
  #
2
- # Copyright © 2025 Agora
3
- # This file is part of TEN Framework, an open source project.
4
- # Licensed under the Apache License, Version 2.0, with certain conditions.
5
- # Refer to the "LICENSE" file in the root directory for more information.
6
  #
7
  import os, glob, sys, torchaudio
8
  import numpy as np
@@ -115,7 +114,7 @@ if __name__ == "__main__":
115
  # Get the directory of the script
116
  script_dir = os.path.dirname(os.path.abspath(__file__))
117
 
118
- # TEN-VAD-TestSet dir
119
  test_dir = f"{script_dir}/../testset"
120
 
121
  # Initialization
 
1
  #
2
+ # This file is part of TEN Framework, an open source project.
3
+ # Licensed under the Apache License, Version 2.0.
4
+ # See the LICENSE file for more information.
 
5
  #
6
  import os, glob, sys, torchaudio
7
  import numpy as np
 
114
  # Get the directory of the script
115
  script_dir = os.path.dirname(os.path.abspath(__file__))
116
 
117
+ # testset dir
118
  test_dir = f"{script_dir}/../testset"
119
 
120
  # Initialization
examples/sample_array.h CHANGED
@@ -1,9 +1,3 @@
1
- //
2
- // Copyright © 2025 Agora
3
- // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
- //
7
  // Used for iOS APP demo
8
  unsigned char sample_array[] = {
9
  0xe3, 0xff, 0xd4, 0xff, 0xdc, 0xff, 0xe0, 0xff, 0xf6, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xfc, 0xff,
 
 
 
 
 
 
 
1
  // Used for iOS APP demo
2
  unsigned char sample_array[] = {
3
  0xe3, 0xff, 0xd4, 0xff, 0xdc, 0xff, 0xe0, 0xff, 0xf6, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xfc, 0xff,
examples/test.py CHANGED
@@ -1,8 +1,7 @@
1
  #
2
- # Copyright © 2025 Agora
3
- # This file is part of TEN Framework, an open source project.
4
- # Licensed under the Apache License, Version 2.0, with certain conditions.
5
- # Refer to the "LICENSE" file in the root directory for more information.
6
  #
7
  import sys, os
8
 
 
1
  #
2
+ # This file is part of TEN Framework, an open source project.
3
+ # Licensed under the Apache License, Version 2.0.
4
+ # See the LICENSE file for more information.
 
5
  #
6
  import sys, os
7
 
examples/test_node.js DELETED
@@ -1,529 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- //
4
- // Copyright © 2025 Agora
5
- // This file is part of TEN Framework, an open source project.
6
- // Licensed under the Apache License, Version 2.0, with certain conditions.
7
- // Refer to the "LICENSE" file in the root directory for more information.
8
- //
9
-
10
- /**
11
- * TEN VAD WebAssembly Node.js Test
12
- * Simplified and clean version based on main.c
13
- */
14
-
15
- const fs = require('fs');
16
- const path = require('path');
17
-
18
- // Configuration
19
- const HOP_SIZE = 256; // 16ms per frame
20
- const VOICE_THRESHOLD = 0.5; // Voice detection threshold
21
-
22
- // WASM module paths
23
- const WASM_DIR = './../lib/Web';
24
- const WASM_JS_FILE = path.join(WASM_DIR, 'ten_vad.js');
25
- const WASM_BINARY_FILE = path.join(WASM_DIR, 'ten_vad.wasm');
26
-
27
- // Global state
28
- let vadModule = null;
29
- let vadHandle = null;
30
- let vadHandlePtr = null;
31
-
32
- // ============================================================================
33
- // UTILITY FUNCTIONS
34
- // ============================================================================
35
-
36
- function getTimestamp() {
37
- return Date.now();
38
- }
39
-
40
- function addHelperFunctions() {
41
- if (!vadModule.getValue) {
42
- vadModule.getValue = function(ptr, type) {
43
- switch (type) {
44
- case 'i32': return vadModule.HEAP32[ptr >> 2];
45
- case 'float': return vadModule.HEAPF32[ptr >> 2];
46
- default: throw new Error(`Unsupported type: ${type}`);
47
- }
48
- };
49
- }
50
-
51
- if (!vadModule.UTF8ToString) {
52
- vadModule.UTF8ToString = function(ptr) {
53
- if (!ptr) return '';
54
- let result = '';
55
- let i = ptr;
56
- while (vadModule.HEAPU8[i]) {
57
- result += String.fromCharCode(vadModule.HEAPU8[i++]);
58
- }
59
- return result;
60
- };
61
- }
62
- }
63
-
64
- // ============================================================================
65
- // AUDIO GENERATION
66
- // ============================================================================
67
-
68
- function generateTestAudio(durationMs = 5000) {
69
- const sampleRate = 16000;
70
- const totalSamples = Math.floor(durationMs * sampleRate / 1000);
71
- const audioData = new Int16Array(totalSamples);
72
-
73
- console.log(`Generating ${totalSamples} samples for ${durationMs}ms audio...`);
74
-
75
- for (let i = 0; i < totalSamples; i++) {
76
- const t = i / sampleRate;
77
- let sample = 0;
78
-
79
- if (t < 2.0) {
80
- // Voice frequencies (440Hz + 880Hz)
81
- sample = Math.sin(2 * Math.PI * 440 * t) * 8000 +
82
- Math.sin(2 * Math.PI * 880 * t) * 4000;
83
- } else if (t < 3.0) {
84
- // Noise
85
- sample = (Math.random() - 0.5) * 3000;
86
- } else if (t < 4.0) {
87
- // Mixed voice (220Hz + 660Hz)
88
- sample = Math.sin(2 * Math.PI * 220 * t) * 6000 +
89
- Math.sin(2 * Math.PI * 660 * t) * 3000;
90
- } else {
91
- // Silence with minimal noise
92
- sample = Math.random() * 50;
93
- }
94
-
95
- audioData[i] = Math.max(-32768, Math.min(32767, Math.floor(sample)));
96
- }
97
-
98
- return audioData;
99
- }
100
-
101
- // ============================================================================
102
- // VAD OPERATIONS
103
- // ============================================================================
104
-
105
- function getVADVersion() {
106
- if (!vadModule) return "unknown";
107
- try {
108
- const versionPtr = vadModule._ten_vad_get_version();
109
- return vadModule.UTF8ToString(versionPtr);
110
- } catch (error) {
111
- return "unknown";
112
- }
113
- }
114
-
115
- function createVADInstance() {
116
- try {
117
- vadHandlePtr = vadModule._malloc(4);
118
- const result = vadModule._ten_vad_create(vadHandlePtr, HOP_SIZE, VOICE_THRESHOLD);
119
-
120
- if (result === 0) {
121
- vadHandle = vadModule.getValue(vadHandlePtr, 'i32');
122
- return true;
123
- } else {
124
- console.error(`VAD creation failed with code: ${result}`);
125
- vadModule._free(vadHandlePtr);
126
- return false;
127
- }
128
- } catch (error) {
129
- console.error(`Error creating VAD instance: ${error.message}`);
130
- return false;
131
- }
132
- }
133
-
134
- function destroyVADInstance() {
135
- if (vadHandlePtr && vadModule) {
136
- vadModule._ten_vad_destroy(vadHandlePtr);
137
- vadModule._free(vadHandlePtr);
138
- vadHandlePtr = null;
139
- vadHandle = null;
140
- }
141
- }
142
-
143
- async function processAudio(inputBuf, frameNum, outProbs, outFlags) {
144
- console.log(`VAD version: ${getVADVersion()}`);
145
-
146
- if (!createVADInstance()) {
147
- return -1;
148
- }
149
-
150
- const startTime = getTimestamp();
151
-
152
- for (let i = 0; i < frameNum; i++) {
153
- const frameStart = i * HOP_SIZE;
154
- const frameData = inputBuf.slice(frameStart, frameStart + HOP_SIZE);
155
-
156
- const audioPtr = vadModule._malloc(HOP_SIZE * 2);
157
- const probPtr = vadModule._malloc(4);
158
- const flagPtr = vadModule._malloc(4);
159
-
160
- try {
161
- vadModule.HEAP16.set(frameData, audioPtr / 2);
162
-
163
- const result = vadModule._ten_vad_process(
164
- vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr
165
- );
166
-
167
- if (result === 0) {
168
- const probability = vadModule.getValue(probPtr, 'float');
169
- const flag = vadModule.getValue(flagPtr, 'i32');
170
-
171
- outProbs[i] = probability;
172
- outFlags[i] = flag;
173
-
174
- console.log(`[${i}] ${probability.toFixed(6)}, ${flag}`);
175
- } else {
176
- console.error(`Frame ${i} processing failed with code: ${result}`);
177
- outProbs[i] = 0.0;
178
- outFlags[i] = 0;
179
- }
180
- } finally {
181
- vadModule._free(audioPtr);
182
- vadModule._free(probPtr);
183
- vadModule._free(flagPtr);
184
- }
185
- }
186
-
187
- const endTime = getTimestamp();
188
- const processingTime = endTime - startTime;
189
-
190
- destroyVADInstance();
191
- return processingTime;
192
- }
193
-
194
- // ============================================================================
195
- // RESULT HANDLING
196
- // ============================================================================
197
-
198
- function printResults(processingTime, totalAudioTime, outFlags, frameNum) {
199
- const rtf = processingTime / totalAudioTime;
200
- const voiceFrames = outFlags.filter(flag => flag === 1).length;
201
- const voicePercentage = (voiceFrames / frameNum * 100).toFixed(1);
202
-
203
- console.log(`\n=== Processing Results ===`);
204
- console.log(`Time: ${processingTime}ms, Audio: ${totalAudioTime.toFixed(2)}ms, RTF: ${rtf.toFixed(6)}`);
205
- console.log(`Voice frames: ${voiceFrames}/${frameNum} (${voicePercentage}%)`);
206
- }
207
-
208
- function saveResults(outProbs, outFlags, frameNum, filename = 'out.txt') {
209
- let output = '';
210
- for (let i = 0; i < frameNum; i++) {
211
- output += `[${i}] ${outProbs[i].toFixed(6)}, ${outFlags[i]}\n`;
212
- }
213
-
214
- try {
215
- fs.writeFileSync(filename, output);
216
- console.log(`Results saved to ${filename}`);
217
- } catch (error) {
218
- console.error(`Failed to save results: ${error.message}`);
219
- }
220
- }
221
-
222
- // ============================================================================
223
- // TEST FUNCTIONS
224
- // ============================================================================
225
-
226
- async function testWithArray() {
227
- console.log("=== Array Test ===\n");
228
-
229
- const inputBuf = generateTestAudio(5000);
230
- const byteNum = inputBuf.byteLength;
231
- const sampleNum = byteNum / 2;
232
- const totalAudioTime = sampleNum / 16.0;
233
- const frameNum = Math.floor(sampleNum / HOP_SIZE);
234
-
235
- console.log(`Audio info: ${byteNum} bytes, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`);
236
-
237
- const outProbs = new Float32Array(frameNum);
238
- const outFlags = new Int32Array(frameNum);
239
-
240
- const processingTime = await processAudio(inputBuf, frameNum, outProbs, outFlags);
241
-
242
- if (processingTime > 0) {
243
- printResults(processingTime, totalAudioTime, outFlags, frameNum);
244
- }
245
-
246
- return 0;
247
- }
248
-
249
- // WAV File parsing utilities
250
- function parseWAVHeader(buffer) {
251
- if (buffer.length < 44) {
252
- throw new Error('Invalid WAV file: too small');
253
- }
254
-
255
- // Check RIFF header
256
- const riffHeader = buffer.toString('ascii', 0, 4);
257
- if (riffHeader !== 'RIFF') {
258
- throw new Error('Invalid WAV file: missing RIFF header');
259
- }
260
-
261
- // Check WAVE format
262
- const waveHeader = buffer.toString('ascii', 8, 12);
263
- if (waveHeader !== 'WAVE') {
264
- throw new Error('Invalid WAV file: not WAVE format');
265
- }
266
-
267
- let offset = 12;
268
- let dataOffset = -1;
269
- let dataSize = 0;
270
- let sampleRate = 0;
271
- let channels = 0;
272
- let bitsPerSample = 0;
273
-
274
- // Parse chunks
275
- while (offset < buffer.length - 8) {
276
- const chunkId = buffer.toString('ascii', offset, offset + 4);
277
- const chunkSize = buffer.readUInt32LE(offset + 4);
278
-
279
- if (chunkId === 'fmt ') {
280
- // Format chunk
281
- const audioFormat = buffer.readUInt16LE(offset + 8);
282
- channels = buffer.readUInt16LE(offset + 10);
283
- sampleRate = buffer.readUInt32LE(offset + 12);
284
- bitsPerSample = buffer.readUInt16LE(offset + 22);
285
-
286
- if (audioFormat !== 1) {
287
- throw new Error('Unsupported WAV format: only PCM is supported');
288
- }
289
-
290
- if (bitsPerSample !== 16) {
291
- throw new Error('Unsupported bit depth: only 16-bit is supported');
292
- }
293
- } else if (chunkId === 'data') {
294
- // Data chunk
295
- dataOffset = offset + 8;
296
- dataSize = chunkSize;
297
- break;
298
- }
299
-
300
- offset += 8 + chunkSize;
301
- // Align to even byte boundary
302
- if (chunkSize % 2 === 1) {
303
- offset++;
304
- }
305
- }
306
-
307
- if (dataOffset === -1) {
308
- throw new Error('Invalid WAV file: no data chunk found');
309
- }
310
-
311
- return {
312
- sampleRate,
313
- channels,
314
- bitsPerSample,
315
- dataOffset,
316
- dataSize,
317
- totalSamples: dataSize / (bitsPerSample / 8),
318
- samplesPerChannel: dataSize / (bitsPerSample / 8) / channels
319
- };
320
- }
321
-
322
- async function testWithWAV(inputFile, outputFile) {
323
- console.log("=== WAV File Test ===\n");
324
-
325
- if (!fs.existsSync(inputFile)) {
326
- console.error(`Input file not found: ${inputFile}`);
327
- return 1;
328
- }
329
-
330
- try {
331
- const buffer = fs.readFileSync(inputFile);
332
-
333
- // Parse WAV header properly
334
- const wavInfo = parseWAVHeader(buffer);
335
- console.log(`WAV Format: ${wavInfo.channels} channel(s), ${wavInfo.sampleRate}Hz, ${wavInfo.bitsPerSample}-bit`);
336
- console.log(`Total samples: ${wavInfo.totalSamples}, samples per channel: ${wavInfo.samplesPerChannel}`);
337
-
338
- // Validate format requirements
339
- if (wavInfo.sampleRate !== 16000) {
340
- console.warn(`Warning: Sample rate is ${wavInfo.sampleRate}Hz, expected 16000Hz`);
341
- }
342
-
343
- if (wavInfo.channels !== 1) {
344
- console.warn(`Warning: ${wavInfo.channels} channels detected, only first channel will be used`);
345
- }
346
-
347
- // Extract audio data
348
- const audioBuffer = buffer.slice(wavInfo.dataOffset, wavInfo.dataOffset + wavInfo.dataSize);
349
- const inputBuf = new Int16Array(audioBuffer.buffer.slice(audioBuffer.byteOffset));
350
-
351
- // Calculate correct sample number (for mono audio)
352
- const sampleNum = wavInfo.channels === 1 ?
353
- wavInfo.samplesPerChannel :
354
- Math.floor(wavInfo.samplesPerChannel); // Use only first channel if stereo
355
-
356
- const totalAudioTime = sampleNum / wavInfo.sampleRate * 1000; // in milliseconds
357
- const frameNum = Math.floor(sampleNum / HOP_SIZE);
358
-
359
- console.log(`Audio info: ${audioBuffer.length} bytes, ${sampleNum} samples, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`);
360
-
361
- // If stereo, extract only the first channel
362
- let processedInput = inputBuf;
363
- if (wavInfo.channels > 1) {
364
- console.log(`Extracting mono from ${wavInfo.channels} channels...`);
365
- processedInput = new Int16Array(Math.floor(inputBuf.length / wavInfo.channels));
366
- for (let i = 0; i < processedInput.length; i++) {
367
- processedInput[i] = inputBuf[i * wavInfo.channels]; // Take first channel
368
- }
369
- }
370
-
371
- const outProbs = new Float32Array(frameNum);
372
- const outFlags = new Int32Array(frameNum);
373
-
374
- const processingTime = await processAudio(processedInput, frameNum, outProbs, outFlags);
375
-
376
- if (processingTime > 0) {
377
- printResults(processingTime, totalAudioTime, outFlags, frameNum);
378
- saveResults(outProbs, outFlags, frameNum, outputFile);
379
- }
380
-
381
- return 0;
382
- } catch (error) {
383
- console.error(`Error processing WAV file: ${error.message}`);
384
- return 1;
385
- }
386
- }
387
-
388
- async function runBenchmark() {
389
- console.log("=== Performance Benchmark ===\n");
390
-
391
- if (!createVADInstance()) return;
392
-
393
- const testData = new Int16Array(HOP_SIZE);
394
- for (let i = 0; i < HOP_SIZE; i++) {
395
- testData[i] = Math.sin(2 * Math.PI * 440 * i / 16000) * 8000;
396
- }
397
-
398
- const testCases = [100, 1000, 10000];
399
-
400
- for (const numFrames of testCases) {
401
- const audioPtr = vadModule._malloc(HOP_SIZE * 2);
402
- const probPtr = vadModule._malloc(4);
403
- const flagPtr = vadModule._malloc(4);
404
-
405
- vadModule.HEAP16.set(testData, audioPtr / 2);
406
-
407
- const startTime = getTimestamp();
408
-
409
- for (let i = 0; i < numFrames; i++) {
410
- vadModule._ten_vad_process(vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr);
411
- }
412
-
413
- const endTime = getTimestamp();
414
- const totalTime = endTime - startTime;
415
- const avgTime = totalTime / numFrames;
416
-
417
- // Calculate RTF (Real-time Factor)
418
- // Each frame represents 16ms of audio (HOP_SIZE=256 samples at 16kHz)
419
- const frameAudioTime = (HOP_SIZE / 16000) * 1000; // 16ms
420
- const totalAudioTime = numFrames * frameAudioTime;
421
- const rtf = totalTime / totalAudioTime;
422
-
423
- console.log(`${numFrames} frames: ${totalTime}ms total, ${avgTime.toFixed(3)}ms/frame, RTF: ${rtf.toFixed(3)}`);
424
-
425
- vadModule._free(audioPtr);
426
- vadModule._free(probPtr);
427
- vadModule._free(flagPtr);
428
- }
429
-
430
- destroyVADInstance();
431
- }
432
-
433
- // ============================================================================
434
- // MODULE INITIALIZATION
435
- // ============================================================================
436
-
437
- async function loadModule() {
438
- try {
439
- console.log("Loading WebAssembly module...");
440
-
441
- if (!fs.existsSync(WASM_JS_FILE)) {
442
- throw new Error(`ten_vad.js not found at ${WASM_JS_FILE}`);
443
- }
444
-
445
- if (!fs.existsSync(WASM_BINARY_FILE)) {
446
- throw new Error(`ten_vad.wasm not found at ${WASM_BINARY_FILE}`);
447
- }
448
-
449
- // Read and modify the module file for Node.js compatibility
450
- const wasmJsContent = fs.readFileSync(WASM_JS_FILE, 'utf8');
451
- const modifiedContent = wasmJsContent
452
- .replace(/import\.meta\.url/g, `"${path.resolve(WASM_JS_FILE)}"`)
453
- .replace(/export default createVADModule;/, 'module.exports = createVADModule;');
454
-
455
- // Write temporary file
456
- const tempPath = './ten_vad_temp.js';
457
- fs.writeFileSync(tempPath, modifiedContent);
458
-
459
- // Load WASM binary
460
- const wasmBinary = fs.readFileSync(WASM_BINARY_FILE);
461
-
462
- // Load module
463
- const createVADModule = require(path.resolve(tempPath));
464
- vadModule = await createVADModule({
465
- wasmBinary: wasmBinary,
466
- locateFile: (filePath) => filePath.endsWith('.wasm') ? WASM_BINARY_FILE : filePath,
467
- noInitialRun: false,
468
- noExitRuntime: true
469
- });
470
-
471
- // Cleanup
472
- fs.unlinkSync(tempPath);
473
-
474
- // Add missing helper functions
475
- addHelperFunctions();
476
-
477
- console.log(`Module loaded successfully. Version: ${getVADVersion()}\n`);
478
- return true;
479
-
480
- } catch (error) {
481
- console.error(`Failed to load module: ${error.message}`);
482
- return false;
483
- }
484
- }
485
-
486
- // ============================================================================
487
- // MAIN FUNCTION
488
- // ============================================================================
489
-
490
- async function main() {
491
- const args = process.argv.slice(2);
492
-
493
- // Initialize module
494
- if (!await loadModule()) {
495
- process.exit(1);
496
- }
497
-
498
- try {
499
- if (args.length >= 2) {
500
- // Test with WAV file
501
- const [inputFile, outputFile] = args;
502
- console.log(`Input: ${inputFile}, Output: ${outputFile}\n`);
503
- await testWithWAV(inputFile, outputFile);
504
- } else {
505
- // Test with generated array
506
- await testWithArray();
507
- }
508
- await runBenchmark();
509
- return 0;
510
- } catch (error) {
511
- console.error(`Test failed: ${error.message}`);
512
- return 1;
513
- }
514
- }
515
-
516
- // ============================================================================
517
- // EXECUTION
518
- // ============================================================================
519
-
520
- if (require.main === module) {
521
- main().then(exitCode => {
522
- process.exit(exitCode);
523
- }).catch(error => {
524
- console.error(`Fatal error: ${error.message}`);
525
- process.exit(1);
526
- });
527
- }
528
-
529
- module.exports = { main, testWithArray, testWithWAV, runBenchmark };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples_onnx/CMakeLists.txt DELETED
@@ -1,24 +0,0 @@
1
- #
2
- # Copyright © 2025 Agora
3
- # This file is part of TEN Framework, an open source project.
4
- # Licensed under the Apache License, Version 2.0, with certain conditions.
5
- # Refer to the "LICENSE" file in the root directory for more information.
6
- #
7
- cmake_minimum_required(VERSION 3.10)
8
- get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
9
-
10
- project(ten_vad)
11
-
12
- set(CMAKE_BUILD_TYPE Release)
13
- add_compile_options(-Wno-write-strings -Wno-unused-result)
14
- include_directories(${ROOT}/src)
15
- include_directories(${ROOT}/include)
16
- include_directories(${ORT_ROOT}/include)
17
- file(GLOB LIBRARY_SOURCES "${ROOT}/src/*.cc" "${ROOT}/src/*.c")
18
- add_library(ten_vad SHARED ${LIBRARY_SOURCES})
19
- link_directories(${ORT_ROOT}/lib)
20
- target_link_libraries(ten_vad "${ORT_ROOT}/lib/libonnxruntime.so")
21
-
22
- set(EXECUTABLE_SOURCES ${ROOT}/examples/main.c)
23
- add_executable(ten_vad_demo ${EXECUTABLE_SOURCES})
24
- target_link_libraries(ten_vad_demo ten_vad)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples_onnx/build-and-deploy-linux.sh DELETED
@@ -1,37 +0,0 @@
1
- #!/bin/bash
2
- #
3
- # Copyright © 2025 Agora
4
- # This file is part of TEN Framework, an open source project.
5
- # Licensed under the Apache License, Version 2.0, with certain conditions.
6
- # Refer to the "LICENSE" file in the root directory for more information.
7
- #
8
- set -euo pipefail
9
-
10
- if [[ "$#" -lt 2 || "$1" != "--ort-path" ]]; then
11
- echo "usage: $0 --ort-path <path_to_onnxruntime>" >&2
12
- exit 1
13
- fi
14
-
15
- ORT_ROOT="$2"
16
- shift 2
17
-
18
- if [[ ! -d "$ORT_ROOT" || ! -d "$ORT_ROOT/lib" || ! -d "$ORT_ROOT/include" ]]; then
19
- echo "invalid onnxruntime library path: $ORT_ROOT" >&2
20
- exit 1
21
- fi
22
-
23
- arch=x64
24
- build_dir=build-linux/$arch
25
- rm -rf $build_dir
26
- mkdir -p $build_dir
27
- cd $build_dir
28
-
29
- # Step 1: Build the demo
30
- cmake ../../ -DORT_ROOT="$ORT_ROOT"
31
- cmake --build . --config Release
32
-
33
- # Step 2: Run the demo
34
- ln -s ../../../src/onnx_model/
35
- ./ten_vad_demo ../../../examples/s0724-s0730.wav out.txt
36
-
37
- cd ../../
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
include/ten_vad.h CHANGED
@@ -1,8 +1,7 @@
1
  //
2
- // Copyright © 2025 Agora
3
  // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
  //
7
  #ifndef TEN_VAD_H
8
  #define TEN_VAD_H
@@ -84,4 +83,4 @@ extern "C"
84
  }
85
  #endif
86
 
87
- #endif /* TEN_VAD_H */
 
1
  //
 
2
  // This file is part of TEN Framework, an open source project.
3
+ // Licensed under the Apache License, Version 2.0.
4
+ // See the LICENSE file for more information.
5
  //
6
  #ifndef TEN_VAD_H
7
  #define TEN_VAD_H
 
83
  }
84
  #endif
85
 
86
+ #endif /* TEN_VAD_H */
include/ten_vad.py CHANGED
@@ -1,74 +1,36 @@
1
  #
2
- # Copyright © 2025 Agora
3
- # This file is part of TEN Framework, an open source project.
4
- # Licensed under the Apache License, Version 2.0, with certain conditions.
5
- # Refer to the "LICENSE" file in the root directory for more information.
6
  #
7
  from ctypes import c_int, c_int32, c_float, c_size_t, CDLL, c_void_p, POINTER
8
  import numpy as np
9
  import os
10
- import platform
11
 
12
  class TenVad:
13
  def __init__(self, hop_size: int = 256, threshold: float = 0.5):
14
  self.hop_size = hop_size
15
  self.threshold = threshold
16
- if platform.system() == "Linux" and platform.machine() == "x86_64":
17
- git_path = os.path.join(
18
  os.path.dirname(os.path.relpath(__file__)),
19
- "../lib/Linux/x64/libten_vad.so"
20
  )
21
- if os.path.exists(git_path):
22
- self.vad_library = CDLL(git_path)
23
- else:
24
- pip_path = os.path.join(
25
- os.path.dirname(os.path.relpath(__file__)),
26
- "./ten_vad_library/libten_vad.so"
27
  )
28
- self.vad_library = CDLL(pip_path)
29
-
30
- elif platform.system() == "Darwin":
31
- git_path = os.path.join(
32
- os.path.dirname(os.path.relpath(__file__)),
33
- "../lib/macOS/ten_vad.framework/Versions/A/ten_vad"
34
  )
35
- if os.path.exists(git_path):
36
- self.vad_library = CDLL(git_path)
37
- else:
38
- pip_path = os.path.join(
39
- os.path.dirname(os.path.relpath(__file__)),
40
- "./ten_vad_library/libten_vad"
41
- )
42
- self.vad_library = CDLL(pip_path)
43
- elif platform.system().upper() == 'WINDOWS':
44
- if platform.machine().upper() in ['X64', 'X86_64', 'AMD64']:
45
- git_path = os.path.join(
46
- os.path.dirname(os.path.realpath(__file__)),
47
- "../lib/Windows/x64/ten_vad.dll"
48
- )
49
- if os.path.exists(git_path):
50
- self.vad_library = CDLL(git_path)
51
- else:
52
- pip_path = os.path.join(
53
- os.path.dirname(os.path.realpath(__file__)),
54
- "./ten_vad_library/ten_vad.dll"
55
  )
56
- self.vad_library = CDLL(pip_path)
57
- else:
58
- git_path = os.path.join(
59
- os.path.dirname(os.path.realpath(__file__)),
60
- "../lib/Windows/x86/ten_vad.dll"
61
  )
62
- if os.path.exists(git_path):
63
- self.vad_library = CDLL(git_path)
64
- else:
65
- pip_path = os.path.join(
66
- os.path.dirname(os.path.realpath(__file__)),
67
- "./ten_vad_library/ten_vad.dll"
68
- )
69
- self.vad_library = CDLL(pip_path)
70
- else:
71
- raise NotImplementedError(f"Unsupported platform: {platform.system()} {platform.machine()}")
72
  self.vad_handler = c_void_p(0)
73
  self.out_probability = c_float()
74
  self.out_flags = c_int32()
 
1
  #
2
+ # This file is part of TEN Framework, an open source project.
3
+ # Licensed under the Apache License, Version 2.0.
4
+ # See the LICENSE file for more information.
 
5
  #
6
  from ctypes import c_int, c_int32, c_float, c_size_t, CDLL, c_void_p, POINTER
7
  import numpy as np
8
  import os
 
9
 
10
  class TenVad:
11
  def __init__(self, hop_size: int = 256, threshold: float = 0.5):
12
  self.hop_size = hop_size
13
  self.threshold = threshold
14
+ if os.path.exists(
15
+ os.path.join(
16
  os.path.dirname(os.path.relpath(__file__)),
17
+ "../lib/Linux/x64/libten_vad.so",
18
  )
19
+ ):
20
+ self.vad_library = CDLL(
21
+ os.path.join(
22
+ os.path.dirname(os.path.relpath(__file__)),
23
+ "../lib/Linux/x64/libten_vad.so",
 
24
  )
 
 
 
 
 
 
25
  )
26
+ else:
27
+ self.vad_library = CDLL(
28
+ os.path.join(
29
+ os.path.dirname(
30
+ os.path.relpath(__file__)),
31
+ "./ten_vad_library/libten_vad.so",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  )
 
 
 
 
 
33
  )
 
 
 
 
 
 
 
 
 
 
34
  self.vad_handler = c_void_p(0)
35
  self.out_probability = c_float()
36
  self.out_flags = c_int32()
lib/Web/ten_vad.d.ts DELETED
@@ -1,111 +0,0 @@
1
- /**
2
- * This file is part of TEN Framework, an open source project.
3
- * Licensed under the Apache License, Version 2.0.
4
- * See the LICENSE file for more information.
5
- *
6
- * TEN VAD (Voice Activity Detection) WebAssembly Module
7
- * TypeScript type definitions
8
- */
9
-
10
- export interface TenVADModule {
11
- /**
12
- * Create and initialize a VAD instance
13
- * @param handlePtr Pointer to store the VAD handle
14
- * @param hopSize Number of samples between consecutive analysis frames (e.g., 256)
15
- * @param threshold VAD detection threshold [0.0, 1.0]
16
- * @returns 0 on success, -1 on error
17
- */
18
- _ten_vad_create(handlePtr: number, hopSize: number, threshold: number): number;
19
-
20
- /**
21
- * Process audio frame for voice activity detection
22
- * @param handle Valid VAD handle from ten_vad_create
23
- * @param audioDataPtr Pointer to int16 audio samples array
24
- * @param audioDataLength Length of audio data (should equal hopSize)
25
- * @param outProbabilityPtr Pointer to output probability [0.0, 1.0]
26
- * @param outFlagPtr Pointer to output flag (0: no voice, 1: voice detected)
27
- * @returns 0 on success, -1 on error
28
- */
29
- _ten_vad_process(
30
- handle: number,
31
- audioDataPtr: number,
32
- audioDataLength: number,
33
- outProbabilityPtr: number,
34
- outFlagPtr: number
35
- ): number;
36
-
37
- /**
38
- * Destroy VAD instance and release resources
39
- * @param handlePtr Pointer to the VAD handle
40
- * @returns 0 on success, -1 on error
41
- */
42
- _ten_vad_destroy(handlePtr: number): number;
43
-
44
- /**
45
- * Get library version string
46
- * @returns Version string pointer
47
- */
48
- _ten_vad_get_version(): number;
49
-
50
- // WebAssembly Memory Management
51
- _malloc(size: number): number;
52
- _free(ptr: number): void;
53
-
54
- // Memory access helpers
55
- HEAP16: Int16Array;
56
- HEAPF32: Float32Array;
57
- HEAP32: Int32Array;
58
- HEAPU8: Uint8Array;
59
-
60
- // Value access methods
61
- getValue(ptr: number, type: 'i8' | 'i16' | 'i32' | 'float' | 'double'): number;
62
- setValue(ptr: number, value: number, type: 'i8' | 'i16' | 'i32' | 'float' | 'double'): void;
63
-
64
- // String utilities
65
- UTF8ToString(ptr: number): string;
66
- lengthBytesUTF8(str: string): number;
67
- stringToUTF8(str: string, outPtr: number, maxBytesToWrite: number): void;
68
- }
69
-
70
- /**
71
- * High-level TypeScript wrapper for TEN VAD
72
- */
73
- export class TenVAD {
74
- private module: TenVADModule;
75
- private handle: number | null;
76
- private hopSize: number;
77
-
78
- constructor(module: TenVADModule, hopSize: number, threshold: number);
79
-
80
- /**
81
- * Process audio samples for voice activity detection
82
- * @param audioData Int16Array of audio samples (length must equal hopSize)
83
- * @returns Object with probability and voice detection flag
84
- */
85
- process(audioData: Int16Array): {
86
- probability: number;
87
- isVoice: boolean;
88
- } | null;
89
-
90
- /**
91
- * Get library version
92
- */
93
- getVersion(): string;
94
-
95
- /**
96
- * Destroy VAD instance
97
- */
98
- destroy(): void;
99
-
100
- /**
101
- * Check if VAD instance is valid
102
- */
103
- isValid(): boolean;
104
- }
105
-
106
- /**
107
- * Create TEN VAD WebAssembly module
108
- */
109
- declare function createVADModule(): Promise<TenVADModule>;
110
-
111
- export default createVADModule;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lib/Web/ten_vad.js DELETED
@@ -1,30 +0,0 @@
1
-
2
- var createVADModule = (() => {
3
- var _scriptDir = import.meta.url;
4
-
5
- return (
6
- function(createVADModule) {
7
- createVADModule = createVADModule || {};
8
-
9
-
10
- var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w;
11
- if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary);
12
- var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[];
13
- function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href;
14
- function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0<b.length;)b.shift()(a)}
15
- var da=[null,[],[]],ea={a:function(){z("")},f:function(b,c,m){E.copyWithin(b,c,c+m)},c:function(b){var c=E.length;b>>>=0;if(2147483648<b)return!1;for(var m=1;4>=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e<m;e++){var W=F[c>>2],
16
- X=F[c+4>>2];c+=8;for(var G=0;G<X;G++){var f=E[W+G],H=da[b];if(0===f||10===f){f=H;for(var n=0,q=n+NaN,t=n;f[t]&&!(t>=q);)++t;if(16<t-n&&f.buffer&&C)f=C.decode(f.subarray(n,t));else{for(q="";n<t;){var g=f[n++];if(g&128){var I=f[n++]&63;if(192==(g&224))q+=String.fromCharCode((g&31)<<6|I);else{var Y=f[n++]&63;g=224==(g&240)?(g&15)<<12|I<<6|Y:(g&7)<<18|I<<12|Y<<6|f[n++]&63;65536>g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1===
17
- b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}};
18
- (function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h,
19
- b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})();
20
- a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)};
21
- a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)};
22
- function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0<N)){if(a.preRun)for("function"==typeof a.preRun&&(a.preRun=[a.preRun]);a.preRun.length;)ba();U(K);0<N||(a.setStatus?(a.setStatus("Running..."),setTimeout(function(){setTimeout(function(){a.setStatus("")},1);b()},1)):b())}}
23
- if(a.preInit)for("function"==typeof a.preInit&&(a.preInit=[a.preInit]);0<a.preInit.length;)a.preInit.pop()();Z();
24
-
25
-
26
- return createVADModule.ready
27
- }
28
- );
29
- })();
30
- export default createVADModule;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lib/Windows/x64/ten_vad.lib CHANGED
Binary files a/lib/Windows/x64/ten_vad.lib and b/lib/Windows/x64/ten_vad.lib differ
 
lib/Windows/x86/ten_vad.lib CHANGED
Binary files a/lib/Windows/x86/ten_vad.lib and b/lib/Windows/x86/ten_vad.lib differ
 
lib/iOS/ten_vad.framework/Headers/ten_vad.h CHANGED
@@ -1,90 +1,3 @@
1
- /*
2
- * @file ten_vad.h
3
- * @brief Ten Voice Activity Detection (ten_vad) C API
4
- * Version: 1.0.0
5
- *
6
- * Provides functions to create, process, and destroy a VAD instance.
7
- */
8
- #ifndef TEN_VAD_H
9
- #define TEN_VAD_H
10
-
11
- #if defined(__APPLE__) || defined(__ANDROID__) || defined(__linux__)
12
- #define TENVAD_API __attribute__((visibility("default")))
13
- #elif defined(_WIN32) || defined(__CYGWIN__)
14
- /**
15
- * @def TENVAD_API
16
- * @brief Export/import macro for ten_vad shared library symbols.
17
- */
18
- #ifdef TENVAD_EXPORTS
19
- #define TENVAD_API __declspec(dllexport)
20
- #else
21
- #define TENVAD_API __declspec(dllimport)
22
- #endif
23
- #else
24
- #define TENVAD_API
25
- #endif
26
-
27
- #include <stddef.h> /* size_t */
28
- #include <stdint.h> /* int16_t */
29
-
30
- #ifdef __cplusplus
31
- extern "C"
32
- {
33
- #endif
34
-
35
- /**
36
- * @typedef ten_vad_handle
37
- * @brief Opaque handle for ten_vad instance.
38
- */
39
- typedef void *ten_vad_handle_t;
40
-
41
- /**
42
- * @brief Create and initialize a ten_vad instance.
43
- *
44
- * @param[out] handle Pointer to receive the vad handle.
45
- * @param[in] hop_size The number of samples between the start points of
46
- * two consecutive analysis frames. (e.g., 256).
47
- * @param[in] threshold VAD detection threshold ranging from [0.0, 1.0]
48
- * (default: 0.5).
49
- * @return 0 on success, or -1 error occurs.
50
- */
51
- TENVAD_API int ten_vad_create(ten_vad_handle_t *handle, size_t hop_size,
52
- float threshold);
53
-
54
- /**
55
- * @brief Process one audio frame for voice activity detection.
56
- * Must call ten_vad_init() before calling this, and ten_vad_destroy() when done.
57
- *
58
- * @param[in] handle Valid VAD handle returned by ten_vad_create().
59
- * @param[in] audio_data Pointer to an array of int16_t samples,
60
- * buffer length must equal the hop size specified at ten_vad_create.
61
- * @param[in] audio_data_length size of audio_data buffer, here should be equal to hop_size.
62
- * @param[out] out_probability Pointer to a float (size 1) that receives the
63
- * voice activity probability in the range [0.0, 1.0].
64
- * @param[out] out_flag Pointer to an int (size 1) that receives the
65
- * detection result: 0 = no voice, 1 = voice detected.
66
- * @return 0 on success, or -1 error occurs.
67
- */
68
- TENVAD_API int ten_vad_process(ten_vad_handle_t handle, const int16_t *audio_data, size_t audio_data_length,
69
- float *out_probability, int *out_flag);
70
-
71
- /**
72
- * @brief Destroy a ten_vad instance and release its resources.
73
- *
74
- * @param[in,out] handle Pointer to the ten_vad handle; set to NULL on return.
75
- * @return 0 on success, or -1 error occurs.
76
- */
77
- TENVAD_API int ten_vad_destroy(ten_vad_handle_t *handle);
78
-
79
- /**
80
- * @brief Get the ten_vad library version string.
81
- *
82
- * @return The version string (e.g., "1.0.0").
83
- */
84
- TENVAD_API const char *ten_vad_get_version(void);
85
-
86
- #ifdef __cplusplus
87
- }
88
- #endif
89
-
90
- #endif /* TEN_VAD_H */
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbf0ab2d2ee30d9c170556efb9a7200a53725053cfa7c66a0dff79e7c9351e8
3
+ size 2885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lib/iOS/ten_vad.framework/Info.plist CHANGED
Binary files a/lib/iOS/ten_vad.framework/Info.plist and b/lib/iOS/ten_vad.framework/Info.plist differ
 
lib/iOS/ten_vad.framework/Modules/module.modulemap CHANGED
@@ -1,5 +1,3 @@
1
- framework module ten_vad {
2
- umbrella header "ten_vad.h"
3
- export *
4
- module * { export * }
5
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac22f2ff0291876b7d5069f957825d01abc06a3da84c9f4385154a8e99964096
3
+ size 115
 
 
lib/macOS/ten_vad.framework/Headers DELETED
@@ -1 +0,0 @@
1
- Versions/Current/Headers
 
 
lib/{Web/ten_vad.wasm → macOS/ten_vad.framework/Headers/ten_vad.h} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ec0b9640683987e15a4e54e4ce5642b2447c6e5d82b1be889b5099c75434fc3
3
- size 283349
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbf0ab2d2ee30d9c170556efb9a7200a53725053cfa7c66a0dff79e7c9351e8
3
+ size 2885
lib/macOS/ten_vad.framework/Resources DELETED
@@ -1 +0,0 @@
1
- Versions/Current/Resources
 
 
src/onnx_model/ten-vad.onnx → lib/macOS/ten_vad.framework/Resources/Info.plist RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e10b98a0cab1c98e847fbdda14cb3d45a38336d47535a3f63a0fb6c4e0f4cdf4
3
- size 315449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa8df4f544b3143b819d6ffd5c21574c02884bf41cb2b7a8df45c7f10f75c3a
3
+ size 1216
lib/macOS/ten_vad.framework/Versions/A/Headers/ten_vad.h CHANGED
@@ -1,90 +1,3 @@
1
- /*
2
- * @file ten_vad.h
3
- * @brief Ten Voice Activity Detection (ten_vad) C API
4
- * Version: 1.0.0
5
- *
6
- * Provides functions to create, process, and destroy a VAD instance.
7
- */
8
- #ifndef TEN_VAD_H
9
- #define TEN_VAD_H
10
-
11
- #if defined(__APPLE__) || defined(__ANDROID__) || defined(__linux__)
12
- #define TENVAD_API __attribute__((visibility("default")))
13
- #elif defined(_WIN32) || defined(__CYGWIN__)
14
- /**
15
- * @def TENVAD_API
16
- * @brief Export/import macro for ten_vad shared library symbols.
17
- */
18
- #ifdef TENVAD_EXPORTS
19
- #define TENVAD_API __declspec(dllexport)
20
- #else
21
- #define TENVAD_API __declspec(dllimport)
22
- #endif
23
- #else
24
- #define TENVAD_API
25
- #endif
26
-
27
- #include <stddef.h> /* size_t */
28
- #include <stdint.h> /* int16_t */
29
-
30
- #ifdef __cplusplus
31
- extern "C"
32
- {
33
- #endif
34
-
35
- /**
36
- * @typedef ten_vad_handle
37
- * @brief Opaque handle for ten_vad instance.
38
- */
39
- typedef void *ten_vad_handle_t;
40
-
41
- /**
42
- * @brief Create and initialize a ten_vad instance.
43
- *
44
- * @param[out] handle Pointer to receive the vad handle.
45
- * @param[in] hop_size The number of samples between the start points of
46
- * two consecutive analysis frames. (e.g., 256).
47
- * @param[in] threshold VAD detection threshold ranging from [0.0, 1.0]
48
- * (default: 0.5).
49
- * @return 0 on success, or -1 error occurs.
50
- */
51
- TENVAD_API int ten_vad_create(ten_vad_handle_t *handle, size_t hop_size,
52
- float threshold);
53
-
54
- /**
55
- * @brief Process one audio frame for voice activity detection.
56
- * Must call ten_vad_init() before calling this, and ten_vad_destroy() when done.
57
- *
58
- * @param[in] handle Valid VAD handle returned by ten_vad_create().
59
- * @param[in] audio_data Pointer to an array of int16_t samples,
60
- * buffer length must equal the hop size specified at ten_vad_create.
61
- * @param[in] audio_data_length size of audio_data buffer, here should be equal to hop_size.
62
- * @param[out] out_probability Pointer to a float (size 1) that receives the
63
- * voice activity probability in the range [0.0, 1.0].
64
- * @param[out] out_flag Pointer to an int (size 1) that receives the
65
- * detection result: 0 = no voice, 1 = voice detected.
66
- * @return 0 on success, or -1 error occurs.
67
- */
68
- TENVAD_API int ten_vad_process(ten_vad_handle_t handle, const int16_t *audio_data, size_t audio_data_length,
69
- float *out_probability, int *out_flag);
70
-
71
- /**
72
- * @brief Destroy a ten_vad instance and release its resources.
73
- *
74
- * @param[in,out] handle Pointer to the ten_vad handle; set to NULL on return.
75
- * @return 0 on success, or -1 error occurs.
76
- */
77
- TENVAD_API int ten_vad_destroy(ten_vad_handle_t *handle);
78
-
79
- /**
80
- * @brief Get the ten_vad library version string.
81
- *
82
- * @return The version string (e.g., "1.0.0").
83
- */
84
- TENVAD_API const char *ten_vad_get_version(void);
85
-
86
- #ifdef __cplusplus
87
- }
88
- #endif
89
-
90
- #endif /* TEN_VAD_H */
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbf0ab2d2ee30d9c170556efb9a7200a53725053cfa7c66a0dff79e7c9351e8
3
+ size 2885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lib/macOS/ten_vad.framework/Versions/A/Resources/Info.plist CHANGED
@@ -1,44 +1,3 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
- <plist version="1.0">
4
- <dict>
5
- <key>BuildMachineOSBuild</key>
6
- <string>23D60</string>
7
- <key>CFBundleDevelopmentRegion</key>
8
- <string>English</string>
9
- <key>CFBundleExecutable</key>
10
- <string>ten_vad</string>
11
- <key>CFBundleIdentifier</key>
12
- <string>com.yourcompany.ten_vad</string>
13
- <key>CFBundleInfoDictionaryVersion</key>
14
- <string>6.0</string>
15
- <key>CFBundlePackageType</key>
16
- <string>FMWK</string>
17
- <key>CFBundleSignature</key>
18
- <string>????</string>
19
- <key>CFBundleSupportedPlatforms</key>
20
- <array>
21
- <string>MacOSX</string>
22
- </array>
23
- <key>CSResourcesFileMapped</key>
24
- <true/>
25
- <key>DTCompiler</key>
26
- <string>com.apple.compilers.llvm.clang.1_0</string>
27
- <key>DTPlatformBuild</key>
28
- <string></string>
29
- <key>DTPlatformName</key>
30
- <string>macosx</string>
31
- <key>DTPlatformVersion</key>
32
- <string>14.2</string>
33
- <key>DTSDKBuild</key>
34
- <string>23C53</string>
35
- <key>DTSDKName</key>
36
- <string>macosx14.2</string>
37
- <key>DTXcode</key>
38
- <string>1520</string>
39
- <key>DTXcodeBuild</key>
40
- <string>15C500b</string>
41
- <key>LSMinimumSystemVersion</key>
42
- <string>10.10</string>
43
- </dict>
44
- </plist>
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa8df4f544b3143b819d6ffd5c21574c02884bf41cb2b7a8df45c7f10f75c3a
3
+ size 1216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lib/macOS/ten_vad.framework/Versions/Current DELETED
@@ -1 +0,0 @@
1
- A
 
 
lib/macOS/ten_vad.framework/Versions/Current/Headers/ten_vad.h ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbf0ab2d2ee30d9c170556efb9a7200a53725053cfa7c66a0dff79e7c9351e8
3
+ size 2885
lib/macOS/ten_vad.framework/Versions/Current/Resources/Info.plist ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa8df4f544b3143b819d6ffd5c21574c02884bf41cb2b7a8df45c7f10f75c3a
3
+ size 1216
lib/macOS/ten_vad.framework/Versions/Current/ten_vad ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81b2de13710670bb94fef315ab50fedc903a21c04c4290c6c2ac28d8b42e715a
3
+ size 744600
lib/macOS/ten_vad.framework/ten_vad DELETED
@@ -1 +0,0 @@
1
- Versions/Current/ten_vad
 
 
lib/macOS/ten_vad.framework/ten_vad ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81b2de13710670bb94fef315ab50fedc903a21c04c4290c6c2ac28d8b42e715a
3
+ size 744600
setup.py CHANGED
@@ -1,11 +1,5 @@
1
- #
2
- # Copyright © 2025 Agora
3
- # This file is part of TEN Framework, an open source project.
4
- # Licensed under the Apache License, Version 2.0, with certain conditions.
5
- # Refer to the "LICENSE" file in the root directory for more information.
6
- #
7
  from setuptools import setup
8
- import os, shutil, platform
9
  from setuptools.command.install import install
10
 
11
  class custom_install_command(install):
@@ -13,25 +7,8 @@ class custom_install_command(install):
13
  install.run(self)
14
  target_dir = os.path.join(self.install_lib, "ten_vad_library")
15
  os.makedirs(target_dir, exist_ok=True)
16
-
17
- if platform.system() == "Linux" and platform.machine() == "x86_64":
18
- shutil.copy("lib/Linux/x64/libten_vad.so", target_dir)
19
- print(f"Linux x64 library installed to: {target_dir}")
20
- elif platform.system() == "Darwin":
21
- shutil.copy("lib/macOS/ten_vad.framework/Versions/A/ten_vad",
22
- os.path.join(target_dir, "libten_vad"))
23
- print(f"macOS library installed to: {target_dir}")
24
- elif platform.system().upper() == 'WINDOWS':
25
- if platform.machine().upper() in ['X64', 'X86_64', 'AMD64']:
26
- shutil.copy("lib/Windows/x64/ten_vad.dll",
27
- os.path.join(target_dir, "ten_vad.dll"))
28
- print(f"Windows x64 library installed to: {target_dir}")
29
- else:
30
- shutil.copy("lib/Windows/x86/ten_vad.dll",
31
- os.path.join(target_dir, "ten_vad.dll"))
32
- print(f"Windows x86 library installed to: {target_dir}")
33
- else:
34
- raise NotImplementedError(f"Unsupported platform: {platform.system()} {platform.machine()}")
35
 
36
  root_dir = os.path.dirname(os.path.abspath(__file__))
37
  shutil.copy(f"{root_dir}/include/ten_vad.py", f"{root_dir}/ten_vad.py")
 
 
 
 
 
 
 
1
  from setuptools import setup
2
+ import os, shutil
3
  from setuptools.command.install import install
4
 
5
  class custom_install_command(install):
 
7
  install.run(self)
8
  target_dir = os.path.join(self.install_lib, "ten_vad_library")
9
  os.makedirs(target_dir, exist_ok=True)
10
+ shutil.copy("lib/Linux/x64/libten_vad.so", target_dir)
11
+ print(f"Files installed to: {target_dir}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  root_dir = os.path.dirname(os.path.abspath(__file__))
14
  shutil.copy(f"{root_dir}/include/ten_vad.py", f"{root_dir}/ten_vad.py")
src/aed.cc DELETED
@@ -1,993 +0,0 @@
1
- //
2
- // Copyright © 2025 Agora
3
- // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
- //
7
- #include <string.h>
8
- #include <stdlib.h>
9
- #include <algorithm>
10
- #include <math.h>
11
- #include "aed.h"
12
- #include "aed_st.h"
13
- #include "coeff.h"
14
- #include "pitch_est.h"
15
- #include "stft.h"
16
- #include <assert.h>
17
-
18
- #define AUP_AED_ALIGN8(o) (((o) + 7) & (~7))
19
- #define AUP_AED_MAX(x, y) (((x) > (y)) ? (x) : (y))
20
- #define AUP_AED_MIN(x, y) (((x) > (y)) ? (y) : (x))
21
- #define AUP_AED_EPS (1e-20f)
22
-
23
- /// ///////////////////////////////////////////////////////////////////////
24
- /// Internal Utils
25
- /// ///////////////////////////////////////////////////////////////////////
26
-
27
- AUP_MODULE_AIVAD::AUP_MODULE_AIVAD(char* onnx_path) {
28
- ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
29
- OrtStatus* status =
30
- ort_api->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "TEN-VAD", &ort_env);
31
- if (status) {
32
- printf("Failed to create env: %s\n", ort_api->GetErrorMessage(status));
33
- ort_api->ReleaseStatus(status);
34
- ort_api->ReleaseEnv(ort_env);
35
- ort_env = NULL;
36
- return;
37
- }
38
-
39
- OrtSessionOptions* session_options;
40
- ort_api->CreateSessionOptions(&session_options);
41
- ort_api->SetIntraOpNumThreads(session_options, 1);
42
- status =
43
- ort_api->CreateSession(ort_env, onnx_path, session_options, &ort_session);
44
- ort_api->ReleaseSessionOptions(session_options);
45
- if (status) {
46
- printf("Failed to create ort_session: %s\n",
47
- ort_api->GetErrorMessage(status));
48
- ort_api->ReleaseStatus(status);
49
- ort_api->ReleaseEnv(ort_env);
50
- ort_env = NULL;
51
- return;
52
- }
53
-
54
- ort_api->GetAllocatorWithDefaultOptions(&ort_allocator);
55
- size_t num_inputs;
56
- ort_api->SessionGetInputCount(ort_session, &num_inputs);
57
- assert(num_inputs == AUP_AED_MODEL_IO_NUM);
58
- for (size_t i = 0; i < num_inputs; i++) {
59
- char* input_name;
60
- ort_api->SessionGetInputName(ort_session, i, ort_allocator, &input_name);
61
- strncpy(input_names_buf[i], input_name, sizeof(input_names_buf[i]));
62
- input_names[i] = input_names_buf[i];
63
- ort_api->AllocatorFree(ort_allocator, input_name);
64
- }
65
-
66
- size_t num_outputs;
67
- ort_api->SessionGetOutputCount(ort_session, &num_outputs);
68
- assert(num_outputs == AUP_AED_MODEL_IO_NUM);
69
- for (size_t i = 0; i < num_outputs; i++) {
70
- char* output_name;
71
- ort_api->SessionGetOutputName(ort_session, i, ort_allocator, &output_name);
72
- strncpy(output_names_buf[i], output_name, sizeof(output_names_buf[i]));
73
- output_names[i] = output_names_buf[i];
74
- ort_api->AllocatorFree(ort_allocator, output_name);
75
- }
76
-
77
- OrtMemoryInfo* memory_info;
78
- status = ort_api->CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault,
79
- &memory_info);
80
- if (status != NULL) {
81
- printf("Failed to create memory info: %s\n",
82
- ort_api->GetErrorMessage(status));
83
- ort_api->ReleaseStatus(status);
84
- ort_api->ReleaseSession(ort_session);
85
- ort_api->ReleaseEnv(ort_env);
86
- ort_session = NULL;
87
- ort_env = NULL;
88
- return;
89
- }
90
- int64_t input_shapes0[] = {1, AUP_AED_CONTEXT_WINDOW_LEN, AUP_AED_FEA_LEN};
91
- int64_t input_shapes1234[] = {1, AUP_AED_MODEL_HIDDEN_DIM};
92
- for (int i = 0; i < num_inputs; i++) {
93
- status = ort_api->CreateTensorWithDataAsOrtValue(
94
- memory_info, i == 0 ? input_data_buf_0 : input_data_buf_1234[i - 1],
95
- i == 0 ? sizeof(input_data_buf_0) : sizeof(input_data_buf_1234[i - 1]),
96
- i == 0 ? input_shapes0 : input_shapes1234,
97
- i == 0 ? sizeof(input_shapes0) / sizeof(input_shapes0[0])
98
- : sizeof(input_shapes1234) / sizeof(input_shapes1234[0]),
99
- ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &ort_input_tensors[i]);
100
- if (status != NULL) {
101
- printf("Failed to create input tensor %d: %s\n", i,
102
- ort_api->GetErrorMessage(status));
103
- ort_api->ReleaseStatus(status);
104
- ort_api->ReleaseSession(ort_session);
105
- ort_api->ReleaseEnv(ort_env);
106
- ort_session = NULL;
107
- ort_env = NULL;
108
- return;
109
- }
110
- }
111
-
112
- int64_t output_shapes0[] = {1, 1, 1};
113
- int64_t output_shapes1234[] = {1, AUP_AED_MODEL_HIDDEN_DIM};
114
- for (int i = 0; i < num_outputs; i++) {
115
- status = ort_api->CreateTensorAsOrtValue(
116
- ort_allocator, i == 0 ? output_shapes0 : output_shapes1234,
117
- i == 0 ? sizeof(output_shapes0) / sizeof(output_shapes0[0])
118
- : sizeof(output_shapes1234) / sizeof(output_shapes1234[0]),
119
- ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &ort_output_tensors[i]);
120
- if (status != NULL) {
121
- printf("Failed to create output tensor %d: %s\n", i,
122
- ort_api->GetErrorMessage(status));
123
- ort_api->ReleaseStatus(status);
124
- ort_api->ReleaseSession(ort_session);
125
- ort_api->ReleaseEnv(ort_env);
126
- ort_session = NULL;
127
- ort_env = NULL;
128
- return;
129
- }
130
- }
131
- inited = 1;
132
- }
133
-
134
- AUP_MODULE_AIVAD::~AUP_MODULE_AIVAD() {
135
- for (int i = 0; i < AUP_AED_MODEL_IO_NUM; i++) {
136
- if (ort_output_tensors[i]) {
137
- ort_api->ReleaseValue(ort_output_tensors[i]);
138
- }
139
- }
140
- if (ort_session) {
141
- ort_api->ReleaseSession(ort_session);
142
- }
143
- if (ort_env) {
144
- ort_api->ReleaseEnv(ort_env);
145
- }
146
- }
147
-
148
- int AUP_MODULE_AIVAD::Process(float* input, float* output) {
149
- if (!inited) {
150
- printf("not inited!\n");
151
- return -1;
152
- }
153
-
154
- memcpy(input_data_buf_0, input, sizeof(input_data_buf_0));
155
- if (clear_hidden) {
156
- memset(input_data_buf_1234, 0, sizeof(input_data_buf_1234));
157
- clear_hidden = 0;
158
- }
159
- OrtStatus* status = ort_api->Run(
160
- ort_session, NULL, input_names, ort_input_tensors, AUP_AED_MODEL_IO_NUM,
161
- output_names, AUP_AED_MODEL_IO_NUM, ort_output_tensors);
162
- float* output_data;
163
- ort_api->GetTensorMutableData(ort_output_tensors[0], (void**)&output_data);
164
- *output = output_data[0];
165
- for (int i = 1; i < AUP_AED_MODEL_IO_NUM; i++) {
166
- ort_api->GetTensorMutableData(ort_output_tensors[i], (void**)&output_data);
167
- memcpy(input_data_buf_1234[i - 1], output_data,
168
- sizeof(input_data_buf_1234[i - 1]));
169
- }
170
-
171
- return 0;
172
- }
173
-
174
- int AUP_MODULE_AIVAD::Reset() {
175
- if (!inited) {
176
- return -1;
177
- }
178
-
179
- clear_hidden = 1;
180
- return 0;
181
- }
182
-
183
- static int AUP_Aed_checkStatCfg(Aed_StaticCfg* pCfg) {
184
- if (pCfg == NULL) {
185
- return -1;
186
- }
187
-
188
- #if AUP_AED_FEA_LEN < AUP_AED_MEL_FILTER_BANK_NUM
189
- return -1;
190
- #endif
191
-
192
- if (pCfg->hopSz < 32) {
193
- return -1;
194
- }
195
-
196
- if (pCfg->frqInputAvailableFlag == 1) {
197
- if (pCfg->fftSz < 128 || pCfg->fftSz < pCfg->hopSz) {
198
- return -1;
199
- }
200
- if (pCfg->anaWindowSz > pCfg->fftSz || pCfg->anaWindowSz < pCfg->hopSz) {
201
- return -1;
202
- }
203
- }
204
-
205
- return 0;
206
- }
207
-
208
- static int AUP_Aed_publishStaticCfg(Aed_St* stHdl) {
209
- const Aed_StaticCfg* pStatCfg;
210
-
211
- if (stHdl == NULL) {
212
- return -1;
213
- }
214
- pStatCfg = (const Aed_StaticCfg*)(&(stHdl->stCfg));
215
-
216
- stHdl->extFftSz = 0;
217
- stHdl->extNBins = 0;
218
- stHdl->extWinSz = 0;
219
- if (pStatCfg->frqInputAvailableFlag == 1) {
220
- stHdl->extFftSz = pStatCfg->fftSz;
221
- stHdl->extNBins = (stHdl->extFftSz >> 1) + 1;
222
- stHdl->extWinSz = pStatCfg->anaWindowSz;
223
- }
224
- stHdl->extHopSz = pStatCfg->hopSz;
225
-
226
- stHdl->intFftSz = AUP_AED_ASSUMED_FFTSZ;
227
- stHdl->intHopSz = AUP_AED_ASSUMED_HOPSZ;
228
- stHdl->intWinSz = AUP_AED_ASSUMED_WINDOWSZ;
229
- stHdl->intNBins = (stHdl->intFftSz >> 1) + 1;
230
- stHdl->intAnalyWindowPtr = AUP_AED_STFTWindow_Hann768;
231
-
232
- if (pStatCfg->frqInputAvailableFlag == 0 ||
233
- stHdl->extHopSz != stHdl->intHopSz) {
234
- // external STFT analysis framework is not supported at all
235
- stHdl->intAnalyFlag =
236
- 2; // internally redo analysis based on input time signal
237
- } else if (stHdl->extFftSz == stHdl->intFftSz) {
238
- // external STFT analysis framework completely match with internal
239
- // requirement
240
- stHdl->intAnalyFlag = 0; // directly use external spectrum
241
- } else { // external spectrum need to be interpolated or extrapolated before
242
- // AIVAD
243
- stHdl->intAnalyFlag =
244
- 1; // use external spectrum with interpolation / exterpolation
245
- }
246
- stHdl->inputTimeFIFOLen = stHdl->extHopSz + stHdl->intHopSz;
247
-
248
- // for aiaed release2.0.0, pre-emphasis for input time-signal is needed,
249
- // therefore, we need redo analysis based on input time signal preprocessed by
250
- // pre-emphasis.
251
- stHdl->intAnalyFlag =
252
- 2; // internally redo analysis based on input time signal
253
-
254
- stHdl->feaSz = (size_t)AUP_AED_FEA_LEN;
255
- stHdl->melFbSz = (size_t)AUP_AED_MEL_FILTER_BANK_NUM;
256
- stHdl->algDelay = (size_t)AUP_AED_LOOKAHEAD_NFRM;
257
- stHdl->algCtxtSz = (size_t)AUP_AED_CONTEXT_WINDOW_LEN;
258
- stHdl->frmRmsBufLen = AUP_AED_MAX(1, stHdl->algDelay);
259
-
260
- return 0;
261
- }
262
-
263
- static int AUP_Aed_publishDynamCfg(Aed_St* stHdl) {
264
- const Aed_DynamCfg* pDynmCfg;
265
- PE_DynamCfg peDynmCfg;
266
- if (stHdl == NULL) {
267
- return -1;
268
- }
269
-
270
- pDynmCfg = (const Aed_DynamCfg*)(&(stHdl->dynamCfg));
271
- stHdl->aivadResetFrmNum = pDynmCfg->resetFrameNum;
272
- stHdl->voiceDecideThresh = pDynmCfg->extVoiceThr;
273
-
274
- if (stHdl->pitchEstStPtr != NULL) {
275
- peDynmCfg.voicedThr = pDynmCfg->pitchEstVoicedThr;
276
- AUP_PE_setDynamCfg(stHdl->pitchEstStPtr, &peDynmCfg);
277
- }
278
-
279
- return 0;
280
- }
281
-
282
- static int AUP_Aed_resetVariables(Aed_St* stHdl) {
283
- if (stHdl == NULL) {
284
- return -1;
285
- }
286
-
287
- // first clear all the dynamic memory, all the dynamic variables which are
288
- // not listed bellow are cleared to 0 by this step
289
- memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
290
-
291
- float* melFbCoef = stHdl->melFilterBankCoef;
292
- size_t* melBinBuff = stHdl->melFilterBinBuff;
293
- size_t i, j;
294
- size_t nBins = stHdl->intNBins;
295
- size_t melFbSz = stHdl->melFbSz;
296
-
297
- stHdl->aedProcFrmCnt = 0;
298
- stHdl->inputTimeFIFOIdx = 0;
299
- stHdl->aivadResetCnt = 0;
300
- stHdl->timeSignalPre = 0.0f;
301
- stHdl->aivadScore =
302
- -1.0f; // as default value, labeling as aed is not working yet
303
- stHdl->aivadScorePre = -1.0f;
304
-
305
- stHdl->pitchFreq = 0.0f;
306
-
307
- // generate mel filter-bank coefficients
308
- float low_mel = 2595.0f * log10f(1.0f + 0.0f / 700.0f);
309
- float high_mel = 2595.0f * log10f(1.0f + 8000.0f / 700.0f);
310
- float mel_points = 0.0f;
311
- float hz_points = 0.0f;
312
- size_t idx = 0;
313
-
314
- for (i = 0; i < melFbSz + 2; i++) {
315
- mel_points = i * (high_mel - low_mel) / ((float)melFbSz + 1.0f) + low_mel;
316
- hz_points = 700.0f * (powf(10.0f, mel_points / 2595.0f) - 1.0f);
317
- melBinBuff[i] =
318
- (size_t)((stHdl->intFftSz + 1.0f) * hz_points / (float)AUP_AED_FS);
319
- if (i > 0 && melBinBuff[i] == melBinBuff[i - 1]) {
320
- return -1;
321
- }
322
- }
323
-
324
- for (j = 0; j < melFbSz; j++) {
325
- for (i = melBinBuff[j]; i < melBinBuff[j + 1]; i++) {
326
- idx = j * nBins + i;
327
- melFbCoef[idx] = (float)(i - melBinBuff[j]) /
328
- (float)(melBinBuff[j + 1] - melBinBuff[j]);
329
- }
330
- for (i = melBinBuff[j + 1]; i < melBinBuff[j + 2]; i++) {
331
- idx = j * nBins + i;
332
- melFbCoef[idx] = (float)(melBinBuff[j + 2] - i) /
333
- (float)(melBinBuff[j + 2] - melBinBuff[j + 1]);
334
- }
335
- }
336
-
337
- if (stHdl->pitchEstStPtr != NULL) {
338
- if (AUP_PE_init(stHdl->pitchEstStPtr) < 0) {
339
- return -1;
340
- }
341
- }
342
-
343
- if (stHdl->aivadInf != NULL) {
344
- stHdl->aivadInf->Reset();
345
- }
346
-
347
- if (stHdl->timeInAnalysis != NULL) {
348
- if (AUP_Analyzer_init(stHdl->timeInAnalysis) < 0) {
349
- return -1;
350
- }
351
- }
352
-
353
- return 0;
354
- }
355
-
356
- static int AUP_Aed_addOneCnter(int cnter) {
357
- cnter++;
358
- if (cnter >= 1000000000) {
359
- cnter = 0; // reset every half year
360
- }
361
- return (cnter);
362
- }
363
-
364
- static void AUP_Aed_binPowerConvert(const float* src, float* tgt, int srcNBins,
365
- int tgtNBins) {
366
- float rate;
367
- int srcIdx, tgtIdx;
368
- if (srcNBins == tgtNBins) {
369
- memcpy(tgt, src, sizeof(float) * tgtNBins);
370
- return;
371
- }
372
-
373
- memset(tgt, 0, sizeof(float) * tgtNBins);
374
-
375
- rate = (float)(srcNBins - 1) / (float)(tgtNBins - 1);
376
- for (tgtIdx = 0; tgtIdx < tgtNBins; tgtIdx++) {
377
- srcIdx = (int)(tgtIdx * rate);
378
- srcIdx = AUP_AED_MIN(srcNBins - 1, AUP_AED_MAX(srcIdx, 0));
379
- tgt[tgtIdx] = src[srcIdx];
380
- }
381
-
382
- return;
383
- }
384
-
385
- static void AUP_Aed_CalcBinPow(int nBins, const float* cmplxSpctr,
386
- float* binPow) {
387
- int idx, realIdx, imagIdx;
388
-
389
- // bin-0
390
- binPow[0] = cmplxSpctr[0] * cmplxSpctr[0];
391
-
392
- // bin-(NBins-1)
393
- binPow[nBins - 1] = cmplxSpctr[1] * cmplxSpctr[1];
394
-
395
- for (idx = 1; idx < (nBins - 1); idx++) {
396
- realIdx = idx << 1;
397
- imagIdx = realIdx + 1;
398
-
399
- binPow[idx] = cmplxSpctr[realIdx] * cmplxSpctr[realIdx] +
400
- cmplxSpctr[imagIdx] * cmplxSpctr[imagIdx];
401
- }
402
- return;
403
- }
404
-
405
- static int AUP_Aed_pitch_proc(void* pitchModule, const float* timeSignal,
406
- size_t timeLen, const float* binPow, size_t nBins,
407
- PE_OutputData* pOut) {
408
- PE_InputData peInData;
409
-
410
- peInData.timeSignal = timeSignal;
411
- peInData.hopSz = (int)timeLen;
412
- peInData.inBinPow = binPow;
413
- peInData.nBins = (int)nBins;
414
- pOut->pitchFreq = 0;
415
- pOut->voiced = -1;
416
- return AUP_PE_proc(pitchModule, &peInData, pOut);
417
- }
418
-
419
- static int AUP_Aed_aivad_proc(Aed_St* stHdl, const float* inBinPow,
420
- float* aivadScore) {
421
- if (stHdl == NULL || inBinPow == NULL || aivadScore == NULL) {
422
- return -1;
423
- }
424
-
425
- size_t i, j;
426
- size_t nBins = stHdl->intNBins;
427
- size_t melFbSz = stHdl->melFbSz;
428
- size_t srcOffset;
429
- size_t srcLen;
430
-
431
- float* aivadInputFeatStack = stHdl->aivadInputFeatStack;
432
- float* melFbCoef = stHdl->melFilterBankCoef;
433
- const float* aivadFeatMean = AUP_AED_FEATURE_MEANS;
434
- const float* aivadFeatStd = AUP_AED_FEATURE_STDS;
435
- float* curMelFbCoefPtr = NULL;
436
- float* curInputFeatPtr = NULL;
437
- float perBandValue = 0.0f;
438
- float powerNormal = 32768.0f * 32768.0f;
439
-
440
- // update aivad feature buff.
441
- srcOffset = stHdl->feaSz;
442
- srcLen = (stHdl->algCtxtSz - 1) * stHdl->feaSz;
443
- memmove(aivadInputFeatStack, aivadInputFeatStack + srcOffset,
444
- sizeof(float) * srcLen);
445
- curInputFeatPtr = aivadInputFeatStack + srcLen;
446
-
447
- // cal. mel-filter-bank feature
448
- for (i = 0; i < melFbSz; i++) {
449
- perBandValue = 0.0f;
450
- curMelFbCoefPtr = melFbCoef + i * nBins;
451
- for (j = 0; j < nBins; j++) {
452
- perBandValue += (inBinPow[j] * curMelFbCoefPtr[j]);
453
- }
454
- perBandValue = perBandValue / powerNormal;
455
- perBandValue = logf(perBandValue + AUP_AED_EPS);
456
- curInputFeatPtr[i] =
457
- (perBandValue - aivadFeatMean[i]) / (aivadFeatStd[i] + AUP_AED_EPS);
458
- }
459
-
460
- // extra feat.
461
- for (i = melFbSz; i < stHdl->feaSz; i++) {
462
- curInputFeatPtr[i] =
463
- (stHdl->pitchFreq - aivadFeatMean[i]) / (aivadFeatStd[i] + AUP_AED_EPS);
464
- }
465
-
466
- // exe. aivad
467
- // exe. aivad
468
- float aivadOutput;
469
- if (stHdl->aivadInf != NULL &&
470
- stHdl->aivadInf->Process(stHdl->aivadInputFeatStack, &aivadOutput) != 0) {
471
- return -1;
472
- }
473
-
474
- (*aivadScore) = aivadOutput;
475
-
476
- stHdl->aivadResetCnt += 1;
477
- if (stHdl->aivadResetCnt >= stHdl->aivadResetFrmNum) {
478
- if (stHdl->aivadInf != NULL && stHdl->aivadInf->Reset() != 0) {
479
- }
480
- stHdl->aivadResetCnt = 0;
481
- }
482
-
483
- return 0;
484
- }
485
-
486
- static int AUP_Aed_dynamMemPrepare(Aed_St* stHdl, void* memPtrExt,
487
- size_t memSize) {
488
- if (stHdl == NULL) {
489
- return -1;
490
- }
491
- size_t pitchInNBins = stHdl->intNBins;
492
- size_t totalMemSize = 0;
493
- size_t inputTimeFIFOMemSize = 0;
494
- size_t inputEmphTimeFIFOMemSize = 0;
495
- size_t aivadInputCmplxSptrmMemSize = 0;
496
- size_t aivadInputBinPowMemSize = 0;
497
- size_t frameRmsBuffMemSize = 0;
498
- size_t aivadInputFeatStackMemSize = 0;
499
- size_t aimdInputFeatStackMemSize = 0;
500
- size_t melFilterBankCoefMemSize = 0;
501
- size_t melFilterBinBuffMemSize = 0;
502
- size_t inputFloatBuffMemSize = 0;
503
-
504
- // size_t vadScoreOutputBuffDelaySample = 384; // buff. delay for output
505
- char* memPtr = NULL;
506
-
507
- // size_t nBinsBufferMemSize = AUP_AED_ALIGN8(sizeof(float) * nBins);
508
- // size_t spctrmMemSize = AUP_AED_ALIGN8(sizeof(float) * (nBins - 1) * 2);
509
-
510
- inputTimeFIFOMemSize =
511
- AUP_AED_ALIGN8(sizeof(float) * stHdl->inputTimeFIFOLen);
512
- totalMemSize += inputTimeFIFOMemSize;
513
-
514
- inputEmphTimeFIFOMemSize =
515
- AUP_AED_ALIGN8(sizeof(float) * stHdl->inputTimeFIFOLen);
516
- totalMemSize += inputEmphTimeFIFOMemSize;
517
-
518
- aivadInputCmplxSptrmMemSize = AUP_AED_ALIGN8(sizeof(float) * stHdl->intFftSz);
519
- totalMemSize += aivadInputCmplxSptrmMemSize;
520
-
521
- aivadInputBinPowMemSize = AUP_AED_ALIGN8(sizeof(float) * stHdl->intNBins);
522
- totalMemSize += aivadInputBinPowMemSize;
523
-
524
- aivadInputFeatStackMemSize =
525
- AUP_AED_ALIGN8(sizeof(float) * stHdl->algCtxtSz * stHdl->feaSz);
526
- totalMemSize += aivadInputFeatStackMemSize;
527
-
528
- aimdInputFeatStackMemSize =
529
- AUP_AED_ALIGN8(sizeof(float) * stHdl->algCtxtSz * stHdl->feaSz);
530
- totalMemSize += aimdInputFeatStackMemSize;
531
-
532
- melFilterBankCoefMemSize =
533
- AUP_AED_ALIGN8(sizeof(float) * pitchInNBins * stHdl->feaSz);
534
- totalMemSize += melFilterBankCoefMemSize;
535
-
536
- melFilterBinBuffMemSize = AUP_AED_ALIGN8(sizeof(size_t) * (stHdl->feaSz + 2));
537
- totalMemSize += melFilterBinBuffMemSize;
538
-
539
- frameRmsBuffMemSize = AUP_AED_ALIGN8(stHdl->frmRmsBufLen * sizeof(float));
540
- totalMemSize += frameRmsBuffMemSize;
541
-
542
- inputFloatBuffMemSize = AUP_AED_ALIGN8(stHdl->extHopSz * sizeof(float));
543
- totalMemSize += inputFloatBuffMemSize;
544
-
545
- if (memPtrExt == NULL) {
546
- return ((int)totalMemSize);
547
- }
548
-
549
- if (totalMemSize > memSize) {
550
- return -1;
551
- }
552
-
553
- memPtr = (char*)memPtrExt;
554
-
555
- stHdl->inputTimeFIFO = (float*)memPtr;
556
- memPtr += inputTimeFIFOMemSize;
557
-
558
- stHdl->inputEmphTimeFIFO = (float*)memPtr;
559
- memPtr += inputEmphTimeFIFOMemSize;
560
-
561
- stHdl->aivadInputCmplxSptrm = (float*)memPtr;
562
- memPtr += aivadInputCmplxSptrmMemSize;
563
-
564
- stHdl->aivadInputBinPow = (float*)memPtr;
565
- memPtr += aivadInputBinPowMemSize;
566
-
567
- stHdl->aivadInputFeatStack = (float*)memPtr;
568
- memPtr += aivadInputFeatStackMemSize;
569
-
570
- stHdl->melFilterBankCoef = (float*)memPtr;
571
- memPtr += melFilterBankCoefMemSize;
572
-
573
- stHdl->melFilterBinBuff = (size_t*)memPtr;
574
- memPtr += melFilterBinBuffMemSize;
575
-
576
- stHdl->frameRmsBuff = (float*)memPtr;
577
- memPtr += frameRmsBuffMemSize;
578
-
579
- stHdl->inputFloatBuff = (float*)memPtr;
580
- memPtr += inputFloatBuffMemSize;
581
-
582
- if (((size_t)(memPtr - (char*)memPtrExt)) > totalMemSize) {
583
- return -1;
584
- }
585
-
586
- return ((int)totalMemSize);
587
- }
588
-
589
- static int AUP_Aed_runOneFrm(Aed_St* stHdl, const float* tSignal, int hopSz,
590
- const float* binPowPtr, int nBins) {
591
- PE_OutputData peOutData = {0, 0};
592
- float aivadScore = -1.0f;
593
- float mediaFilterout = 0;
594
- int mediaIdx = (int)(AUP_AED_OUTPUT_SMOOTH_FILTER_LEN) / 2;
595
- int i;
596
-
597
- if (AUP_Aed_pitch_proc(stHdl->pitchEstStPtr, tSignal, hopSz, binPowPtr, nBins,
598
- &peOutData) < 0) {
599
- return -1;
600
- }
601
- stHdl->pitchFreq = peOutData.pitchFreq;
602
- if (AUP_Aed_aivad_proc(stHdl, binPowPtr, &aivadScore) < 0) {
603
- return -1;
604
- }
605
- stHdl->aivadScore = aivadScore;
606
-
607
- return 0;
608
- }
609
-
610
- /// ///////////////////////////////////////////////////////////////////////
611
- /// Public API
612
- /// ///////////////////////////////////////////////////////////////////////
613
-
614
- int AUP_Aed_create(void** stPtr) {
615
- if (stPtr == NULL) {
616
- return -1;
617
- }
618
- Aed_St* tmpPtr = (Aed_St*)malloc(sizeof(Aed_St));
619
- if (tmpPtr == NULL) {
620
- return -1;
621
- }
622
- memset(tmpPtr, 0, sizeof(Aed_St));
623
-
624
- if (AUP_PE_create(&(tmpPtr->pitchEstStPtr)) < 0) {
625
- return -1;
626
- }
627
- if (AUP_Analyzer_create(&(tmpPtr->timeInAnalysis)) < 0) {
628
- return -1;
629
- }
630
-
631
- tmpPtr->stCfg.enableFlag = 1; // as default, module enabled
632
- tmpPtr->stCfg.fftSz = 1024;
633
- tmpPtr->stCfg.hopSz = 256;
634
- tmpPtr->stCfg.anaWindowSz = 768;
635
- tmpPtr->stCfg.frqInputAvailableFlag = 0;
636
-
637
- tmpPtr->dynamCfg.extVoiceThr = 0.5f;
638
- tmpPtr->dynamCfg.extMusicThr = 0.5f;
639
- tmpPtr->dynamCfg.extEnergyThr = 10.0f;
640
- tmpPtr->dynamCfg.resetFrameNum = 1875; // TODO
641
- tmpPtr->dynamCfg.pitchEstVoicedThr = AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR;
642
-
643
- (*stPtr) = (void*)tmpPtr;
644
-
645
- return 0;
646
- }
647
-
648
- int AUP_Aed_destroy(void** stPtr) {
649
- if (stPtr == NULL || (*stPtr) == NULL) {
650
- return -1;
651
- }
652
- Aed_St* stHdl = (Aed_St*)(*stPtr);
653
-
654
- if (stHdl->aivadInf != NULL) {
655
- delete stHdl->aivadInf;
656
- }
657
- stHdl->aivadInf = NULL;
658
-
659
- if (AUP_PE_destroy(&(stHdl->pitchEstStPtr)) < 0) {
660
- return -1;
661
- }
662
- if (AUP_Analyzer_destroy(&(stHdl->timeInAnalysis)) < 0) {
663
- return -1;
664
- }
665
-
666
- if (stHdl->dynamMemPtr != NULL) {
667
- free(stHdl->dynamMemPtr);
668
- }
669
- stHdl->dynamMemPtr = NULL;
670
-
671
- if (stHdl != NULL) {
672
- free(stHdl);
673
- }
674
- (*stPtr) = NULL;
675
-
676
- return 0;
677
- }
678
-
679
- int AUP_Aed_memAllocate(void* stPtr, const Aed_StaticCfg* pCfg) {
680
- Aed_St* stHdl = (Aed_St*)(stPtr);
681
- Aed_StaticCfg aedStatCfg;
682
- PE_StaticCfg pitchStatCfg;
683
- Analyzer_StaticCfg analyzerStatCfg;
684
- int totalMemSize = 0;
685
-
686
- if (stPtr == NULL || pCfg == NULL) {
687
- return -1;
688
- }
689
-
690
- // 1th: check static cfg.
691
- memcpy(&aedStatCfg, pCfg, sizeof(Aed_StaticCfg));
692
- if (AUP_Aed_checkStatCfg(&aedStatCfg) < 0) {
693
- return -1;
694
- }
695
-
696
- memcpy(&(stHdl->stCfg), &aedStatCfg, sizeof(Aed_StaticCfg));
697
-
698
- // 2th: publish static configuration to internal statical configuration
699
- // registers
700
- if (AUP_Aed_publishStaticCfg(stHdl) < 0) {
701
- return -1;
702
- }
703
-
704
- // 3th: create aivad instance
705
- if (stHdl->aivadInf == NULL) {
706
- stHdl->aivadInf = new AUP_MODULE_AIVAD("onnx_model/ten-vad.onnx");
707
- if (stHdl->aivadInf == NULL) {
708
- return -1;
709
- }
710
- }
711
- stHdl->aivadInf->Reset();
712
-
713
- // 4th: memAllocate operation for Pitch-Estimator ............
714
- if (AUP_PE_getStaticCfg(stHdl->pitchEstStPtr, &pitchStatCfg) < 0) {
715
- return -1;
716
- }
717
- pitchStatCfg.fftSz = stHdl->intFftSz;
718
- pitchStatCfg.anaWindowSz = stHdl->intWinSz;
719
- pitchStatCfg.hopSz = stHdl->intHopSz;
720
- pitchStatCfg.useLPCPreFiltering = AUP_AED_PITCH_EST_USE_LPC;
721
- pitchStatCfg.procFs = AUP_AED_PITCH_EST_PROCFS;
722
- if (AUP_PE_memAllocate(stHdl->pitchEstStPtr, &pitchStatCfg) < 0) {
723
- return -1;
724
- }
725
-
726
- // creation and initialization with time-analysis module ......
727
- AUP_Analyzer_getStaticCfg(stHdl->timeInAnalysis, &analyzerStatCfg);
728
- analyzerStatCfg.win_len = (int)stHdl->intWinSz;
729
- analyzerStatCfg.hop_size = (int)stHdl->intHopSz;
730
- analyzerStatCfg.fft_size = (int)stHdl->intFftSz;
731
- analyzerStatCfg.ana_win_coeff = stHdl->intAnalyWindowPtr;
732
- if (AUP_Analyzer_memAllocate(stHdl->timeInAnalysis, &analyzerStatCfg) < 0) {
733
- return -1;
734
- }
735
-
736
- // 5th: check memory requirement ..............................
737
- totalMemSize = AUP_Aed_dynamMemPrepare(stHdl, NULL, 0);
738
- if (totalMemSize < 0) {
739
- return -1;
740
- }
741
-
742
- // 6th: allocate dynamic memory
743
- if (totalMemSize > (int)stHdl->dynamMemSize) {
744
- if (stHdl->dynamMemPtr != NULL) {
745
- free(stHdl->dynamMemPtr);
746
- stHdl->dynamMemPtr = NULL;
747
- stHdl->dynamMemSize = 0;
748
- }
749
- stHdl->dynamMemPtr = malloc(totalMemSize);
750
- if (stHdl->dynamMemPtr == NULL) {
751
- return -1;
752
- }
753
- stHdl->dynamMemSize = totalMemSize;
754
- }
755
- memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
756
-
757
- // 7th: setup the pointers/variable
758
- if (AUP_Aed_dynamMemPrepare(stHdl, stHdl->dynamMemPtr, stHdl->dynamMemSize) <
759
- 0) {
760
- return -1;
761
- }
762
-
763
- // 8th: publish internal dynamic config registers
764
- if (AUP_Aed_publishDynamCfg(stHdl) < 0) {
765
- return -1;
766
- }
767
-
768
- return 0;
769
- }
770
-
771
- int AUP_Aed_init(void* stPtr) {
772
- Aed_St* stHdl = (Aed_St*)(stPtr);
773
- if (stPtr == NULL) {
774
- return -1;
775
- }
776
-
777
- // publish internal dynamic config registers
778
- if (AUP_Aed_publishDynamCfg(stHdl) < 0) {
779
- return -1;
780
- }
781
-
782
- // clear/reset run-time variables
783
- if (AUP_Aed_resetVariables(stHdl) < 0) {
784
- return -1;
785
- }
786
-
787
- return 0;
788
- }
789
-
790
- int AUP_Aed_setDynamCfg(void* stPtr, const Aed_DynamCfg* pCfg) {
791
- Aed_St* stHdl = (Aed_St*)(stPtr);
792
-
793
- if (stPtr == NULL || pCfg == NULL) {
794
- return -1;
795
- }
796
-
797
- memcpy(&(stHdl->dynamCfg), pCfg, sizeof(Aed_DynamCfg));
798
-
799
- // publish internal dynamic configuration registers
800
- if (AUP_Aed_publishDynamCfg(stHdl) < 0) {
801
- return -1;
802
- }
803
-
804
- return 0;
805
- }
806
-
807
- int AUP_Aed_getStaticCfg(const void* stPtr, Aed_StaticCfg* pCfg) {
808
- const Aed_St* stHdl = (const Aed_St*)(stPtr);
809
-
810
- if (stPtr == NULL || pCfg == NULL) {
811
- return -1;
812
- }
813
-
814
- memcpy(pCfg, &(stHdl->stCfg), sizeof(Aed_StaticCfg));
815
-
816
- return 0;
817
- }
818
-
819
- int AUP_Aed_getDynamCfg(const void* stPtr, Aed_DynamCfg* pCfg) {
820
- const Aed_St* stHdl = (const Aed_St*)(stPtr);
821
-
822
- if (stPtr == NULL || pCfg == NULL) {
823
- return -1;
824
- }
825
-
826
- memcpy(pCfg, &(stHdl->dynamCfg), sizeof(Aed_DynamCfg));
827
-
828
- return 0;
829
- }
830
-
831
- int AUP_Aed_getAlgDelay(const void* stPtr, int* delayInFrms) {
832
- const Aed_St* stHdl = (const Aed_St*)(stPtr);
833
-
834
- if (stPtr == NULL || delayInFrms == NULL) {
835
- return -1;
836
- }
837
-
838
- (*delayInFrms) = (int)stHdl->algDelay;
839
-
840
- return 0;
841
- }
842
-
843
- int AUP_Aed_proc(void* stPtr, const Aed_InputData* pIn, Aed_OutputData* pOut) {
844
- Analyzer_InputData analyzerInput;
845
- Analyzer_OutputData analyzerOutput;
846
- Aed_St* stHdl = (Aed_St*)(stPtr);
847
-
848
- const float* binPowPtr = NULL;
849
- float frameRms = 0.0f;
850
- float frameEnergy = 0.0f;
851
- float powerNormal = 32768.0f * 32768.0f;
852
- int idx;
853
-
854
- if (stPtr == NULL) {
855
- return -1;
856
- }
857
- if (stHdl->stCfg.enableFlag == 0) { // this module is disabled
858
- return 0;
859
- }
860
- if (pIn == NULL || pIn->timeSignal == NULL || pOut == NULL) {
861
- return -1;
862
- }
863
-
864
- if (stHdl->intAnalyFlag != 2) { // the external spectra is going to be used
865
- if (pIn->binPower == NULL) {
866
- return -1;
867
- }
868
- if (pIn->nBins != (int)((stHdl->stCfg.fftSz >> 1) + 1) ||
869
- pIn->hopSz != (int)(stHdl->stCfg.hopSz)) {
870
- return -1;
871
- }
872
- }
873
-
874
- // cal. input frame energy ....
875
- for (idx = 0; idx < pIn->hopSz; idx++) {
876
- frameRms += (pIn->timeSignal[idx] * pIn->timeSignal[idx]);
877
- }
878
- frameEnergy = frameRms;
879
- frameRms = sqrtf(frameRms / (float)pIn->hopSz);
880
- memmove(stHdl->frameRmsBuff, stHdl->frameRmsBuff + 1,
881
- sizeof(float) * (stHdl->frmRmsBufLen - 1));
882
- stHdl->frameRmsBuff[stHdl->frmRmsBufLen - 1] = frameRms;
883
-
884
- // input signal conversion .........
885
- if ((stHdl->inputTimeFIFOIdx + pIn->hopSz) > (int)stHdl->inputTimeFIFOLen) {
886
- return -1;
887
- }
888
-
889
- // update pre-emphasis time signal FIFO
890
- float* timeSigEphaPtr = stHdl->inputEmphTimeFIFO + stHdl->inputTimeFIFOIdx;
891
- for (idx = 0; idx < pIn->hopSz; idx++) {
892
- timeSigEphaPtr[idx] = pIn->timeSignal[idx] - 0.97f * stHdl->timeSignalPre;
893
- stHdl->timeSignalPre = pIn->timeSignal[idx];
894
- }
895
-
896
- memcpy(stHdl->inputTimeFIFO + stHdl->inputTimeFIFOIdx, pIn->timeSignal,
897
- sizeof(float) * (pIn->hopSz));
898
- stHdl->inputTimeFIFOIdx += pIn->hopSz;
899
-
900
- if (stHdl->intAnalyFlag == 0) { // directly use external spectra
901
- if (stHdl->inputTimeFIFOIdx != (int)(stHdl->intHopSz) ||
902
- (int)(stHdl->intNBins) != pIn->nBins) {
903
- return -1;
904
- }
905
-
906
- // one-time processing ...
907
- stHdl->aedProcFrmCnt = AUP_Aed_addOneCnter(stHdl->aedProcFrmCnt);
908
- binPowPtr = pIn->binPower;
909
-
910
- // update: stHdl->pitchFreq, stHdl->aivadScore
911
- if (AUP_Aed_runOneFrm(stHdl, stHdl->inputTimeFIFO, (int)stHdl->intHopSz,
912
- binPowPtr, (int)stHdl->intNBins) < 0) {
913
- return -1;
914
- }
915
-
916
- // update the inputTimeFIFO
917
- stHdl->inputTimeFIFOIdx = 0;
918
- } else if (stHdl->intAnalyFlag ==
919
- 1) { // do interpolation or extrapolation with external spectra
920
- if (stHdl->inputTimeFIFOIdx != (int)(stHdl->intHopSz) ||
921
- (int)(stHdl->extNBins) != pIn->nBins) {
922
- return -1;
923
- }
924
-
925
- // one-time processing ....
926
- stHdl->aedProcFrmCnt = AUP_Aed_addOneCnter(stHdl->aedProcFrmCnt);
927
- AUP_Aed_binPowerConvert(pIn->binPower, stHdl->aivadInputBinPow,
928
- (int)stHdl->extNBins, (int)stHdl->intNBins);
929
- binPowPtr = stHdl->aivadInputBinPow;
930
-
931
- // update: stHdl->pitchFreq, stHdl->aivadScore
932
- if (AUP_Aed_runOneFrm(stHdl, stHdl->inputTimeFIFO, (int)stHdl->intHopSz,
933
- binPowPtr, (int)stHdl->intNBins) < 0) {
934
- return -1;
935
- }
936
-
937
- // update the inputTimeFIFO
938
- stHdl->inputTimeFIFOIdx = 0;
939
- } else { // we need to do STFT on the input time-signal
940
- if (stHdl->timeInAnalysis == NULL) {
941
- return -1;
942
- }
943
-
944
- // loop processing .....
945
- while (stHdl->inputTimeFIFOIdx >= (int)stHdl->intHopSz) {
946
- stHdl->aedProcFrmCnt = AUP_Aed_addOneCnter(stHdl->aedProcFrmCnt);
947
-
948
- analyzerInput.input = stHdl->inputEmphTimeFIFO;
949
- analyzerInput.iLength = (int)stHdl->intHopSz;
950
- analyzerOutput.output = stHdl->aivadInputCmplxSptrm;
951
- analyzerOutput.oLength = (int)stHdl->intFftSz;
952
- if (AUP_Analyzer_proc(stHdl->timeInAnalysis, &analyzerInput,
953
- &analyzerOutput) < 0) {
954
- return -1;
955
- }
956
-
957
- AUP_Aed_CalcBinPow((int)stHdl->intNBins, stHdl->aivadInputCmplxSptrm,
958
- stHdl->aivadInputBinPow);
959
- binPowPtr = stHdl->aivadInputBinPow;
960
-
961
- // update: stHdl->pitchFreq, stHdl->aivadScore
962
- if (AUP_Aed_runOneFrm(stHdl, stHdl->inputTimeFIFO, (int)stHdl->intHopSz,
963
- binPowPtr, (int)stHdl->intNBins) < 0) {
964
- return -1;
965
- }
966
-
967
- // update the inputTimeFIFO & inputEmphTimeFIFO.....
968
- if (stHdl->inputTimeFIFOIdx > (int)stHdl->intHopSz) {
969
- memcpy(stHdl->inputTimeFIFO, stHdl->inputTimeFIFO + stHdl->intHopSz,
970
- sizeof(float) * (stHdl->inputTimeFIFOIdx - stHdl->intHopSz));
971
- memcpy(stHdl->inputEmphTimeFIFO,
972
- stHdl->inputEmphTimeFIFO + stHdl->intHopSz,
973
- sizeof(float) * (stHdl->inputTimeFIFOIdx - stHdl->intHopSz));
974
- }
975
- stHdl->inputTimeFIFOIdx -= (int)stHdl->intHopSz;
976
- }
977
- }
978
-
979
- // write to output res.
980
- pOut->frameEnergy = frameEnergy / powerNormal;
981
- pOut->frameRms = stHdl->frameRmsBuff[0];
982
- pOut->pitchFreq = stHdl->pitchFreq;
983
- pOut->voiceProb = stHdl->aivadScore;
984
- if (pOut->voiceProb < 0.0f) {
985
- pOut->vadRes = -1;
986
- } else if (pOut->voiceProb <= stHdl->voiceDecideThresh) {
987
- pOut->vadRes = 0;
988
- } else {
989
- pOut->vadRes = 1;
990
- }
991
-
992
- return 0;
993
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/aed.h DELETED
@@ -1,226 +0,0 @@
1
- //
2
- // Copyright © 2025 Agora
3
- // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
- //
7
- #ifndef __AED_H__
8
- #define __AED_H__
9
-
10
- #include <stdint.h>
11
- #include <stdlib.h>
12
-
13
- #define AUP_AED_MAX_FFT_SZ (1024) // the max. fft-size supported by VAD module
14
- #define AUP_AED_MAX_NBINS ((AUP_AED_MAX_FFT_SZ >> 1) + 1)
15
-
16
- #define AUP_AED_FS (16000) // assumed input freq.
17
-
18
- // Configuration Parameters, which impacts dynamic memory occupation, can only
19
- // be set during allocation
20
- typedef struct Aed_StaticCfg_ {
21
- int enableFlag; // flag to enable or disable this module
22
- // 0: disable, o.w.: enable
23
- size_t fftSz; // fft-size, only support: 128, 256, 512, 1024
24
- size_t hopSz; // fft-Hop Size, will be used to check
25
- size_t anaWindowSz; // fft-window Size, will be used to calc rms
26
- int frqInputAvailableFlag; // whether Aed_InputData will contain external
27
- // freq. power-sepctra
28
- } Aed_StaticCfg;
29
-
30
- // Configuraiton parameters which can be modified/set every frames
31
- typedef struct Aed_DynamCfg_ {
32
- float extVoiceThr; // threshold for ai based voice decision [0,1]
33
- float extMusicThr; // threshold for ai based music decision [0,1]
34
- float extEnergyThr; // threshold for energy based vad decision [0, ---]
35
- size_t resetFrameNum; // frame number for aivad reset [1875, 75000]
36
- float pitchEstVoicedThr; // threshold for pitch-estimator to output estimated
37
- // pitch
38
- } Aed_DynamCfg;
39
-
40
- // Spectrum are assumed to be generated with time-domain samples in [-32768,
41
- // 32767] with or without pre-emphasis operation
42
- typedef struct Aed_InputData_ {
43
- const float* binPower; // [NBins], power spectrum of 16KHz samples
44
- int nBins;
45
- const float*
46
- timeSignal; // [hopSz] // this frame's input signal, in [-32768, 32767]
47
- int hopSz; // should be equal to StaticCfg->hopSz
48
- } Aed_InputData;
49
-
50
- // return data from statistical ns module
51
- typedef struct Aed_OutputData_ {
52
- float frameEnergy; // frame energy for input normalized data
53
- float frameRms; // rms for input int16 data
54
- int energyVadRes; // vad res 0/1 with extEnergyThreshold based on input frame
55
- // energy
56
- float voiceProb; // vad score [0,1]
57
- int vadRes; // vad res 0/1 with extVoiceThr based on ai method, t + 16ms res
58
- // correspond to the t input
59
- float pitchFreq; // estimated pitch freq.
60
- } Aed_OutputData;
61
-
62
- #ifdef __cplusplus
63
- extern "C" {
64
- #endif
65
-
66
- /****************************************************************************
67
- * AUP_Aed_Create(...)
68
- *
69
- * This function creats a state handler from nothing, which is NOT ready for
70
- * processing
71
- *
72
- * Input:
73
- *
74
- * Output:
75
- * - stPtr : buffer to store the returned state handler
76
- *
77
- * Return value : 0 - Ok
78
- * -1 - Error
79
- */
80
- int AUP_Aed_create(void** stPtr);
81
-
82
- /****************************************************************************
83
- * AUP_Aed_Destroy(...)
84
- *
85
- * destroy VAD instance, and releasing all the dynamically allocated memory
86
- * this interface will also release ainsFactory, which was
87
- * created externally and passed to VAD module through memAllocate interface
88
- *
89
- * Input:
90
- * - stPtr : buffer of State Handler, after this method, this
91
- * handler won't be usable anymore
92
- *
93
- * Output:
94
- *
95
- * Return value : 0 - Ok
96
- * -1 - Error
97
- */
98
- int AUP_Aed_destroy(void** stPtr);
99
-
100
- /****************************************************************************
101
- * AUP_Aed_MemAllocate(...)
102
- *
103
- * This function sets Static Config params and does memory allocation
104
- * operation, will lose the dynamCfg values
105
- *
106
- * Input:
107
- * - stPtr : State Handler which was returned by _create
108
- * - pCfg : static configuration parameters
109
- *
110
- * Output:
111
- *
112
- * Return value : 0 - Ok
113
- * -1 - Error
114
- */
115
- int AUP_Aed_memAllocate(void* stPtr, const Aed_StaticCfg* pCfg);
116
-
117
- /****************************************************************************
118
- * AUP_Aed_init(...)
119
- *
120
- * This function resets (initialize) the VAD module and gets it prepared for
121
- * processing
122
- *
123
- * Input:
124
- * - stPtr : State Handler which has gone through create and
125
- * memAllocate
126
- *
127
- * Output:
128
- *
129
- * Return value : 0 - Ok
130
- * -1 - Error
131
- */
132
- int AUP_Aed_init(void* stPtr);
133
-
134
- /****************************************************************************
135
- * AUP_Aed_setDynamCfg(...)
136
- *
137
- * This function set dynamic (per-frame variable) configuration
138
- *
139
- * Input:
140
- * - stPtr : State Handler which has gone through create and
141
- * memAllocate
142
- * - pCfg : configuration content
143
- *
144
- * Output:
145
- *
146
- * Return value : 0 - Ok
147
- * -1 - Error
148
- */
149
- int AUP_Aed_setDynamCfg(void* stPtr, const Aed_DynamCfg* pCfg);
150
-
151
- /****************************************************************************
152
- * AUP_Aed_getStaticCfg(...)
153
- *
154
- * This function get static configuration status from VAD module
155
- *
156
- * Input:
157
- * - stPtr : State Handler which has gone through create and
158
- * memAllocate
159
- *
160
- * Output:
161
- * - pCfg : configuration content
162
- *
163
- * Return value : 0 - Ok
164
- * -1 - Error
165
- */
166
- int AUP_Aed_getStaticCfg(const void* stPtr, Aed_StaticCfg* pCfg);
167
-
168
- /****************************************************************************
169
- * AUP_Aed_getDynamCfg(...)
170
- *
171
- * This function get dynamic (per-frame variable) configuration status from
172
- * VAD module
173
- *
174
- * Input:
175
- * - stPtr : State Handler which has gone through create and
176
- * memAllocate
177
- *
178
- * Output:
179
- * - pCfg : configuration content
180
- *
181
- * Return value : 0 - Ok
182
- * -1 - Error
183
- */
184
- int AUP_Aed_getDynamCfg(const void* stPtr, Aed_DynamCfg* pCfg);
185
-
186
- /****************************************************************************
187
- * AUP_Aed_getAlgDelay(...)
188
- *
189
- * This function get algorithm delay from VAD module
190
- *
191
- * Input:
192
- * - stPtr : State Handler which has gone through create and
193
- * memAllocate
194
- *
195
- * Output:
196
- * - delayInFrms : algorithm delay in terms of frames
197
- *
198
- * Return value : 0 - Ok
199
- * -1 - Error
200
- */
201
- int AUP_Aed_getAlgDelay(const void* stPtr, int* delayInFrms);
202
-
203
- /****************************************************************************
204
- * AUP_Aed_proc(...)
205
- *
206
- * process a single frame
207
- *
208
- * Input:
209
- * - stPtr : State Handler which has gone through create and
210
- * memAllocate and reset
211
- * - pCtrl : per-frame variable control parameters
212
- * - pIn : input data stream
213
- *
214
- * Output:
215
- * - pOut : output data (mask, highband time-domain gain etc.)
216
- *
217
- * Return value : 0 - Ok
218
- * -1 - Error
219
- */
220
- int AUP_Aed_proc(void* stPtr, const Aed_InputData* pIn, Aed_OutputData* pOut);
221
-
222
- #ifdef __cplusplus
223
- }
224
- #endif
225
-
226
- #endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/aed_st.h DELETED
@@ -1,132 +0,0 @@
1
- //
2
- // Copyright © 2025 Agora
3
- // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
- //
7
- #ifndef __AED_ST_H__
8
- #define __AED_ST_H__
9
-
10
- #include <stdio.h>
11
- #include <onnxruntime_c_api.h>
12
-
13
- #include "aed.h"
14
-
15
- #define AUP_AED_FS (16000)
16
- #define AUP_AED_MAX_IN_BUFF_SIZE (256)
17
- #define AUP_AED_POWER_SPCTR_NORMALIZER (9.3132e-10f) // = 1/(32768^2)
18
- #define AUP_AED_OUTPUT_SMOOTH_FILTER_LEN (10) // 160ms
19
-
20
- #define AUP_AED_MEL_FILTER_BANK_NUM (40)
21
- #define AUP_AED_LOOKAHEAD_NFRM (1)
22
- #define AUP_AED_CONTEXT_WINDOW_LEN (3) // context window length of AIVAD
23
- #define AUP_AED_FEA_LEN \
24
- (AUP_AED_MEL_FILTER_BANK_NUM + 1) // feature length of AIVAD
25
-
26
- #define AUP_AED_PITCH_EST_USE_LPC (1)
27
- #define AUP_AED_PITCH_EST_PROCFS (4000)
28
- #if AUP_AED_PITCH_EST_PROCFS == 2000
29
- #define AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR (0.45f)
30
- #else
31
- #define AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR (0.4f)
32
- #endif
33
-
34
- #define AUP_AED_MODEL_IO_NUM (5)
35
- #define AUP_AED_MODEL_NAME_LENGTH (32)
36
- #define AUP_AED_MODEL_HIDDEN_DIM (64)
37
-
38
- class AUP_MODULE_AIVAD {
39
- public:
40
- AUP_MODULE_AIVAD(char* onnx_path);
41
- ~AUP_MODULE_AIVAD();
42
- int Process(float* input, float* output);
43
- int Reset();
44
-
45
- private:
46
- const OrtApi* ort_api = NULL;
47
- OrtAllocator* ort_allocator = NULL;
48
- OrtEnv* ort_env = NULL;
49
- OrtSession* ort_session = NULL;
50
- int inited = 0;
51
- int clear_hidden = 0;
52
-
53
- char input_names_buf[AUP_AED_MODEL_IO_NUM][AUP_AED_MODEL_NAME_LENGTH] = {0};
54
- const char* input_names[AUP_AED_MODEL_IO_NUM] = {NULL};
55
- float input_data_buf_0[AUP_AED_CONTEXT_WINDOW_LEN * AUP_AED_FEA_LEN] = {0};
56
- float input_data_buf_1234[AUP_AED_MODEL_IO_NUM - 1]
57
- [AUP_AED_MODEL_HIDDEN_DIM] = {0};
58
- OrtValue* ort_input_tensors[AUP_AED_MODEL_IO_NUM] = {NULL};
59
-
60
- char output_names_buf[AUP_AED_MODEL_IO_NUM][AUP_AED_MODEL_NAME_LENGTH] = {0};
61
- const char* output_names[AUP_AED_MODEL_IO_NUM] = {NULL};
62
- OrtValue* ort_output_tensors[AUP_AED_MODEL_IO_NUM] = {NULL};
63
- };
64
-
65
- typedef struct Aed_St_ {
66
- void* dynamMemPtr; // memory pointer holding the dynamic memory
67
- size_t dynamMemSize; // size of the buffer *dynamMemPtr
68
-
69
- Aed_StaticCfg stCfg;
70
-
71
- Aed_DynamCfg dynamCfg;
72
-
73
- // Internal Static Config Registers, which are generated from stCfg
74
- size_t extFftSz; // externally decided FFT-Sz
75
- size_t extHopSz; // externally decided FFT-Hop-Sz
76
- size_t extNBins; // (FFTSz/2) + 1
77
- size_t extWinSz; // externally decided FFT-Window-Sz
78
-
79
- size_t intFftSz; // internal FFT Sz
80
- size_t intHopSz; // internal Hop Sz
81
- size_t intWinSz; // internal Window Sz
82
- size_t intNBins; // internal NBins
83
- const float* intAnalyWindowPtr; // internal analysis pointer
84
- int intAnalyFlag; // whether to do internal analysis
85
- // 0: directly use external spectrum
86
- // 1: use external spectrum with interpolation / exterpolation
87
- // 2: need to redo analysis based on input time-domain signal
88
- size_t inputTimeFIFOLen; // length of input FIFO buffer
89
- // if = 0: no need for input time-domain FIFO Queue
90
-
91
- // Internal static config registers for pitch-est module
92
- size_t feaSz;
93
- size_t melFbSz;
94
- size_t algDelay; // in terms of processing frames
95
- size_t algCtxtSz;
96
- size_t frmRmsBufLen; // frameRmsBuff: buffer-length of frameRmsBuff (FIFO)
97
-
98
- // Internal dynamic Config Registers, which are generated from dynamCfg
99
- size_t aivadResetFrmNum;
100
- float voiceDecideThresh;
101
-
102
- // SubModules
103
- AUP_MODULE_AIVAD* aivadInf;
104
-
105
- void* pitchEstStPtr; // pitch-estimation module handler
106
- void* timeInAnalysis;
107
- // state handler of STFT analysis module
108
-
109
- // Variables
110
- int aedProcFrmCnt; // counter of consecutive AI-VAD processed frames
111
- int inputTimeFIFOIdx;
112
- float* inputTimeFIFO; // [inputTimeFIFOLen]
113
- // input fifo buffer of time-signal to adjust between extHopSz and intHopSz
114
- float* inputEmphTimeFIFO; // [inputTimeFIFOLen]
115
- float* aivadInputCmplxSptrm; // [intFftSz]
116
- float* aivadInputBinPow; // [intNBins] // AIVAD input power spectrum
117
- size_t aivadResetCnt;
118
- float timeSignalPre;
119
- float aivadScore;
120
- float aivadScorePre;
121
-
122
- float pitchFreq; // input audio pitch in Hz
123
- float* frameRmsBuff; // [frmRmsBufLen], FIFO, to delay frmRms result so that
124
- // it aligns with AIVAD result
125
- float* aivadInputFeatStack; // [...] = [AUP_AED_CONTEXT_WINDOW_LEN *
126
- // AUP_AED_FEA_LEN]
127
- float* melFilterBankCoef; // [melFbSz][nBins]
128
- size_t* melFilterBinBuff; // [melFbSz + 2]
129
- float* inputFloatBuff; // [hopSz]
130
- } Aed_St;
131
-
132
- #endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/biquad.cc DELETED
@@ -1,354 +0,0 @@
1
- //
2
- // Copyright © 2025 Agora
3
- // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
- //
7
- #include "biquad.h"
8
-
9
- #include <math.h>
10
- #include <stdlib.h>
11
- #include <string.h>
12
- #include <stdio.h>
13
-
14
- #include "biquad_st.h"
15
-
16
- #define AUP_BIQUAD_NUM_DUMP_FILES (20)
17
- #define AUP_BIQUAD_DUMP_FILENAMES (200)
18
-
19
- // ==========================================================================================
20
- // internal tools
21
- // ==========================================================================================
22
-
23
- static int AUP_Biquad_checkStatCfg(const Biquad_StaticCfg* pCfg) {
24
- int secIdx;
25
- if (pCfg == NULL) {
26
- return -1;
27
- }
28
-
29
- if (pCfg->maxNSample == 0 ||
30
- pCfg->maxNSample > AGORA_UAP_BIQUAD_MAX_INPUT_LEN) {
31
- return -1;
32
- }
33
- if (pCfg->nsect > AGORA_UAP_BIQUAD_MAX_SECTION) {
34
- return -1;
35
- }
36
-
37
- // if external filter coefficients are required, we need to check the
38
- // external filter coeff pointers' validness
39
- if (pCfg->nsect > 0) {
40
- for (secIdx = 0; secIdx < pCfg->nsect; secIdx++) {
41
- if (pCfg->B[secIdx] == NULL || pCfg->A[secIdx] == NULL) {
42
- return -1;
43
- }
44
- }
45
- if (pCfg->G == NULL) {
46
- return -1;
47
- }
48
- }
49
-
50
- return 0;
51
- }
52
-
53
- static int AUP_Biquad_publishStaticCfg(Biquad_St* stHdl) {
54
- const Biquad_StaticCfg* pStatCfg;
55
- int idx;
56
-
57
- if (stHdl == NULL) {
58
- return -1;
59
- }
60
- pStatCfg = (const Biquad_StaticCfg*)(&(stHdl->stCfg));
61
-
62
- stHdl->maxNSample = (int)pStatCfg->maxNSample;
63
-
64
- // first, give default (all-pass-filter) values to filter coeffs
65
- for (idx = 0; idx < AGORA_UAP_BIQUAD_MAX_SECTION; idx++) {
66
- stHdl->BCoeff[idx][0] = 1.0f;
67
- stHdl->BCoeff[idx][1] = 0;
68
- stHdl->BCoeff[idx][2] = 0;
69
- stHdl->ACoeff[idx][0] = 1.0f;
70
- stHdl->ACoeff[idx][1] = 0;
71
- stHdl->ACoeff[idx][2] = 0;
72
- stHdl->GCoeff[idx] = 1.0f;
73
- }
74
-
75
- if (pStatCfg->nsect <= 0) {
76
- stHdl->nsect = _BIQUAD_DC_REMOVAL_NSECT;
77
- for (idx = 0; idx < stHdl->nsect; idx++) {
78
- stHdl->BCoeff[idx][0] = _BIQUAD_DC_REMOVAL_B[idx][0];
79
- stHdl->BCoeff[idx][1] = _BIQUAD_DC_REMOVAL_B[idx][1];
80
- stHdl->BCoeff[idx][2] = _BIQUAD_DC_REMOVAL_B[idx][2];
81
- stHdl->ACoeff[idx][0] = _BIQUAD_DC_REMOVAL_A[idx][0];
82
- stHdl->ACoeff[idx][1] = _BIQUAD_DC_REMOVAL_A[idx][1];
83
- stHdl->ACoeff[idx][2] = _BIQUAD_DC_REMOVAL_A[idx][2];
84
- stHdl->GCoeff[idx] = _BIQUAD_DC_REMOVAL_G[idx];
85
- }
86
- } else {
87
- stHdl->nsect = pStatCfg->nsect;
88
- for (idx = 0; idx < stHdl->nsect; idx++) {
89
- stHdl->BCoeff[idx][0] = pStatCfg->B[idx][0];
90
- stHdl->BCoeff[idx][1] = pStatCfg->B[idx][1];
91
- stHdl->BCoeff[idx][2] = pStatCfg->B[idx][2];
92
-
93
- stHdl->ACoeff[idx][0] = pStatCfg->A[idx][0];
94
- stHdl->ACoeff[idx][1] = pStatCfg->A[idx][1];
95
- stHdl->ACoeff[idx][2] = pStatCfg->A[idx][2];
96
-
97
- stHdl->GCoeff[idx] = pStatCfg->G[idx];
98
- }
99
- }
100
-
101
- return 0;
102
- }
103
-
104
- static int AUP_Biquad_resetVariables(Biquad_St* stHdl) {
105
- memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
106
- memset(stHdl->sectW, 0, sizeof(stHdl->sectW));
107
-
108
- return 0;
109
- }
110
-
111
- // ==========================================================================================
112
- // public APIS
113
- // ==========================================================================================
114
-
115
- int AUP_Biquad_create(void** stPtr) {
116
- Biquad_St* tmpPtr;
117
-
118
- if (stPtr == NULL) {
119
- return -1;
120
- }
121
- *stPtr = (void*)malloc(sizeof(Biquad_St));
122
- if (*stPtr == NULL) {
123
- return -1;
124
- }
125
- memset(*stPtr, 0, sizeof(Biquad_St));
126
-
127
- tmpPtr = (Biquad_St*)(*stPtr);
128
-
129
- tmpPtr->dynamMemPtr = NULL;
130
- tmpPtr->dynamMemSize = 0;
131
-
132
- tmpPtr->stCfg.maxNSample = 768;
133
- tmpPtr->stCfg.nsect = 0;
134
- for (int idx = 0; idx < AGORA_UAP_BIQUAD_MAX_SECTION; idx++) {
135
- tmpPtr->stCfg.A[idx] = NULL;
136
- tmpPtr->stCfg.B[idx] = NULL;
137
- }
138
- tmpPtr->stCfg.G = NULL;
139
-
140
- return 0;
141
- }
142
-
143
- int AUP_Biquad_destroy(void** stPtr) {
144
- Biquad_St* stHdl;
145
-
146
- if (stPtr == NULL) {
147
- return 0;
148
- }
149
-
150
- stHdl = (Biquad_St*)(*stPtr);
151
- if (stHdl == NULL) {
152
- return 0;
153
- }
154
-
155
- if (stHdl->dynamMemPtr != NULL) {
156
- free(stHdl->dynamMemPtr);
157
- }
158
- stHdl->dynamMemPtr = NULL;
159
-
160
- free(stHdl);
161
-
162
- (*stPtr) = NULL;
163
-
164
- return 0;
165
- }
166
-
167
- int AUP_Biquad_memAllocate(void* stPtr, const Biquad_StaticCfg* pCfg) {
168
- Biquad_St* stHdl = NULL;
169
- char* memPtr = NULL;
170
- int maxNSample, nsect, idx;
171
-
172
- int inputTempBufMemSize = 0;
173
- int sectOutputBufMemSize_EACH = 0;
174
- int totalMemSize = 0;
175
-
176
- if (stPtr == NULL || pCfg == NULL) {
177
- return -1;
178
- }
179
- stHdl = (Biquad_St*)(stPtr);
180
-
181
- if (AUP_Biquad_checkStatCfg(pCfg) < 0) {
182
- return -1;
183
- }
184
- memcpy(&(stHdl->stCfg), pCfg, sizeof(Biquad_StaticCfg));
185
-
186
- if (AUP_Biquad_publishStaticCfg(stHdl) < 0) {
187
- return -1;
188
- }
189
- maxNSample = stHdl->maxNSample;
190
- nsect = stHdl->nsect;
191
-
192
- // check memory requirement
193
- inputTempBufMemSize = AGORA_UAP_BIQUAD_ALIGN8(sizeof(float) * maxNSample);
194
- totalMemSize += inputTempBufMemSize;
195
-
196
- sectOutputBufMemSize_EACH =
197
- AGORA_UAP_BIQUAD_ALIGN8(sizeof(float) * maxNSample);
198
- totalMemSize += sectOutputBufMemSize_EACH * nsect;
199
-
200
- // allocate dynamic memory
201
- if ((size_t)totalMemSize > stHdl->dynamMemSize) {
202
- if (stHdl->dynamMemPtr != NULL) {
203
- free(stHdl->dynamMemPtr);
204
- stHdl->dynamMemSize = 0;
205
- }
206
- stHdl->dynamMemPtr = malloc(totalMemSize);
207
- if (stHdl->dynamMemPtr == NULL) {
208
- return -1;
209
- }
210
- stHdl->dynamMemSize = totalMemSize;
211
- }
212
- memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
213
-
214
- // setup the pointers/variable
215
- memPtr = (char*)(stHdl->dynamMemPtr);
216
-
217
- stHdl->inputTempBuf = (float*)memPtr;
218
- memPtr += inputTempBufMemSize;
219
-
220
- for (idx = 0; idx < nsect; idx++) {
221
- stHdl->sectOutputBuf[idx] = (float*)memPtr;
222
- memPtr += sectOutputBufMemSize_EACH;
223
- }
224
- for (; idx < AGORA_UAP_BIQUAD_MAX_SECTION; idx++) {
225
- stHdl->sectOutputBuf[idx] = NULL;
226
- }
227
-
228
- if (((int)(memPtr - (char*)(stHdl->dynamMemPtr))) > totalMemSize) {
229
- return -1;
230
- }
231
-
232
- return 0;
233
- }
234
-
235
- int AUP_Biquad_init(void* stPtr) {
236
- Biquad_St* stHdl;
237
-
238
- if (stPtr == NULL) {
239
- return -1;
240
- }
241
- stHdl = (Biquad_St*)(stPtr);
242
-
243
- if (AUP_Biquad_resetVariables(stHdl) < 0) {
244
- return -1;
245
- }
246
-
247
- return 0;
248
- }
249
-
250
- int AUP_Biquad_getStaticCfg(const void* stPtr, Biquad_StaticCfg* pCfg) {
251
- const Biquad_St* stHdl;
252
-
253
- if (stPtr == NULL || pCfg == NULL) {
254
- return -1;
255
- }
256
- stHdl = (const Biquad_St*)(stPtr);
257
-
258
- memcpy(pCfg, &(stHdl->stCfg), sizeof(Biquad_StaticCfg));
259
-
260
- return 0;
261
- }
262
-
263
- int AUP_Biquad_getAlgDelay(const void* stPtr, int* delayInSamples) {
264
- const Biquad_St* stHdl;
265
-
266
- if (stPtr == NULL || delayInSamples == NULL) {
267
- return -1;
268
- }
269
- stHdl = (const Biquad_St*)(stPtr);
270
-
271
- *delayInSamples = stHdl->nsect;
272
-
273
- return 0;
274
- }
275
-
276
- int AUP_Biquad_proc(void* stPtr, const Biquad_InputData* pIn,
277
- Biquad_OutputData* pOut) {
278
- Biquad_St* stHdl = NULL;
279
- int isFloatIO = 0;
280
- int inputNSamples, nSect;
281
- int sectIdx, smplIdx;
282
- float tmp1;
283
- const short* pShortTemp;
284
- float* src;
285
- float* tgt;
286
-
287
- if (stPtr == NULL || pIn == NULL || pOut == NULL) { // pCtrl == NULL
288
- return -1;
289
- }
290
- if (pIn->samplesPtr == NULL || pOut->outputBuff == NULL) {
291
- return -1;
292
- }
293
-
294
- stHdl = (Biquad_St*)(stPtr);
295
-
296
- if (((int)pIn->nsamples) > stHdl->maxNSample) {
297
- return -1;
298
- }
299
-
300
- isFloatIO = 0;
301
- if (pIn->sampleType != 0) {
302
- isFloatIO = 1;
303
- }
304
-
305
- inputNSamples = (int)pIn->nsamples;
306
- nSect = stHdl->nsect;
307
-
308
- // special handle for input
309
- if (isFloatIO == 0) {
310
- pShortTemp = (const short*)pIn->samplesPtr;
311
- for (smplIdx = 0; smplIdx < inputNSamples; smplIdx++) {
312
- stHdl->inputTempBuf[smplIdx] = (float)pShortTemp[smplIdx];
313
- }
314
- } else {
315
- memcpy(stHdl->inputTempBuf, (const float*)pIn->samplesPtr,
316
- sizeof(float) * inputNSamples);
317
- }
318
-
319
- for (sectIdx = 0; sectIdx < nSect; sectIdx++) {
320
- if (sectIdx == 0) {
321
- src = stHdl->inputTempBuf;
322
- } else {
323
- src = stHdl->sectOutputBuf[sectIdx - 1];
324
- }
325
- tgt = stHdl->sectOutputBuf[sectIdx];
326
-
327
- for (smplIdx = 0; smplIdx < inputNSamples; smplIdx++) {
328
- tmp1 = src[smplIdx] -
329
- stHdl->ACoeff[sectIdx][1] * stHdl->sectW[sectIdx][0] -
330
- stHdl->ACoeff[sectIdx][2] * stHdl->sectW[sectIdx][1];
331
-
332
- tgt[smplIdx] = stHdl->GCoeff[sectIdx] *
333
- (stHdl->BCoeff[sectIdx][0] * tmp1 +
334
- stHdl->BCoeff[sectIdx][1] * stHdl->sectW[sectIdx][0] +
335
- stHdl->BCoeff[sectIdx][2] * stHdl->sectW[sectIdx][1]);
336
-
337
- stHdl->sectW[sectIdx][1] = stHdl->sectW[sectIdx][0];
338
- stHdl->sectW[sectIdx][0] = tmp1;
339
- }
340
- }
341
-
342
- // prepare output buffer
343
- if (isFloatIO == 0) {
344
- for (smplIdx = 0; smplIdx < inputNSamples; smplIdx++) {
345
- ((short*)pOut->outputBuff)[smplIdx] =
346
- (short)_BIQUAD_FLOAT2SHORT(stHdl->sectOutputBuf[nSect - 1][smplIdx]);
347
- }
348
- } else {
349
- memcpy(pOut->outputBuff, stHdl->sectOutputBuf[nSect - 1],
350
- sizeof(float) * inputNSamples);
351
- }
352
-
353
- return 0;
354
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/biquad.h DELETED
@@ -1,190 +0,0 @@
1
- //
2
- // Copyright © 2025 Agora
3
- // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
- //
7
- #ifndef __BIQUAD_H__
8
- #define __BIQUAD_H__
9
-
10
- #include <stdio.h>
11
-
12
- #define AGORA_UAP_BIQUAD_MAX_SECTION (20)
13
- // the max. number of sections supported by this Biquad module
14
-
15
- #define AGORA_UAP_BIQUAD_MAX_INPUT_LEN (3840)
16
- // max. number of samples each time can be fed in
17
-
18
- #define AGORA_UAP_BIQUAD_ALIGN8(o) (((o) + 7) & (~7))
19
- #define _BIQUAD_FLOAT2SHORT(x) \
20
- ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : (short)floor(.5 + (x))))
21
-
22
- #define _BIQUAD_DC_REMOVAL_NSECT (2)
23
- const float _BIQUAD_DC_REMOVAL_B[_BIQUAD_DC_REMOVAL_NSECT][3] = {
24
- {1.0f, -2.0f, 1.0f}, {1.0f, -1.0f, 0.0f}};
25
- const float _BIQUAD_DC_REMOVAL_A[_BIQUAD_DC_REMOVAL_NSECT][3] = {
26
- {1.0f, -1.93944294f, 0.94281253f}, {1.0f, -0.94276431f, 0.0f}};
27
- // const float _BIQUAD_DC_REMOVAL_G[_BIQUAD_DC_REMOVAL_NSECT] = {0.97056387f,
28
- // 0.97138215f};
29
- const float _BIQUAD_DC_REMOVAL_G[_BIQUAD_DC_REMOVAL_NSECT] = {0.97056387f,
30
- 0.8655014957f};
31
-
32
- // Configuration Parameters, which impacts dynamic memory occupation, can only
33
- // be set during allocation
34
- typedef struct Biquad_StaticCfg_ {
35
- size_t maxNSample; // max. number of samples each time can be fed in
36
- // (0, AGORA_UAP_BIQUAD_MAX_INPUT_LEN]
37
-
38
- int nsect; // the number of sections to be processed by this Biquad module
39
- // (-inf, AGORA_UAP_BIQUAD_MAX_SECTION]
40
- // if <= 0, use internal default filter coefficients
41
-
42
- const float* B[AGORA_UAP_BIQUAD_MAX_SECTION];
43
- const float* A[AGORA_UAP_BIQUAD_MAX_SECTION];
44
- // always assume A[...][0] = 1.0f
45
- const float* G;
46
- } Biquad_StaticCfg;
47
-
48
- typedef struct Biquad_InputData_ {
49
- const void*
50
- samplesPtr; // externally provided buffer containing input time samples
51
- // either in short or float type
52
- short sampleType; // = 0: samplesPtr = short*; o.w. samplesPtr = float*
53
- size_t nsamples; // number of samples fed in this time
54
- } Biquad_InputData;
55
-
56
- typedef struct Biquad_OutputData_ {
57
- void* outputBuff; // externally provided output buffer,
58
- // assumed to be of enough size nsamples *
59
- // sizeof(short)/sizeof(short) output data type is the same
60
- // as input
61
- } Biquad_OutputData;
62
-
63
- #ifdef __cplusplus
64
- extern "C" {
65
- #endif
66
-
67
- /****************************************************************************
68
- * AUP_Biquad_create(...)
69
- *
70
- * This function creats a state handler from nothing, which is NOT ready for
71
- * processing
72
- *
73
- * Input:
74
- *
75
- * Output:
76
- * - stPtr : buffer to store the returned state handler
77
- *
78
- * Return value : 0 - Ok
79
- * -1 - Error
80
- */
81
- int AUP_Biquad_create(void** stPtr);
82
-
83
- /****************************************************************************
84
- * AUP_Biquad_destroy(...)
85
- *
86
- * destroy biquad instance, and releasing all the dynamically allocated memory
87
- *
88
- * Input:
89
- * - stPtr : buffer of State Handler, after this method, this
90
- * handler won't be usable anymore
91
- *
92
- * Output:
93
- *
94
- * Return value : 0 - Ok
95
- * -1 - Error
96
- */
97
- int AUP_Biquad_destroy(void** stPtr);
98
-
99
- /****************************************************************************
100
- * AUP_Biquad_memAllocate(...)
101
- *
102
- * This function sets Static Config params and does memory allocation
103
- * operation
104
- *
105
- * Input:
106
- * - stPtr : State Handler which was returned by _create
107
- * - pCfg : static configuration parameters
108
- *
109
- * Output:
110
- *
111
- * Return value : 0 - Ok
112
- * -1 - Error
113
- */
114
- int AUP_Biquad_memAllocate(void* stPtr, const Biquad_StaticCfg* pCfg);
115
-
116
- /****************************************************************************
117
- * AUP_Biquad_init(...)
118
- *
119
- * This function resets (initialize) the biquad module and gets it prepared for
120
- * processing
121
- *
122
- * Input:
123
- * - stPtr : State Handler which has gone through create and
124
- * memAllocate
125
- *
126
- * Output:
127
- *
128
- * Return value : 0 - Ok
129
- * -1 - Error
130
- */
131
- int AUP_Biquad_init(void* stPtr);
132
-
133
- /****************************************************************************
134
- * AUP_Biquad_getStaticCfg(...)
135
- *
136
- * This function get static configuration status from Biquad module
137
- *
138
- * Input:
139
- * - stPtr : State Handler which has gone through create and
140
- * memAllocate
141
- *
142
- * Output:
143
- * - pCfg : configuration content
144
- *
145
- * Return value : 0 - Ok
146
- * -1 - Error
147
- */
148
- int AUP_Biquad_getStaticCfg(const void* stPtr, Biquad_StaticCfg* pCfg);
149
-
150
- /****************************************************************************
151
- * AUP_Biquad_getAlgDelay(...)
152
- *
153
- * This function get algorithm delay from biquad module
154
- *
155
- * Input:
156
- * - stPtr : State Handler which has gone through create and
157
- * memAllocate
158
- *
159
- * Output:
160
- * - delayInSamples : algorithm delay in terms of samples
161
- *
162
- * Return value : 0 - Ok
163
- * -1 - Error
164
- */
165
- int AUP_Biquad_getAlgDelay(const void* stPtr, int* delayInSamples);
166
-
167
- /****************************************************************************
168
- * AUP_Biquad_proc(...)
169
- *
170
- * process a single frame
171
- *
172
- * Input:
173
- * - stPtr : State Handler which has gone through create and
174
- * memAllocate
175
- * - pCtrl : per-frame variable control parameters
176
- * - pIn : input data stream
177
- *
178
- * Output:
179
- * - pOut : output data (mask, highband time-domain gain etc.)
180
- *
181
- * Return value : 0 - Ok
182
- * -1 - Error
183
- */
184
- int AUP_Biquad_proc(void* stPtr, const Biquad_InputData* pIn,
185
- Biquad_OutputData* pOut);
186
-
187
- #ifdef __cplusplus
188
- }
189
- #endif
190
- #endif // __BIQUAD_H__
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/biquad_st.h DELETED
@@ -1,37 +0,0 @@
1
- //
2
- // Copyright © 2025 Agora
3
- // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
- //
7
- #ifndef __BIQUAD_ST_H__
8
- #define __BIQUAD_ST_H__
9
-
10
- #include <stdio.h>
11
- #include "biquad.h"
12
-
13
- typedef struct Biquad_St_ {
14
- void* dynamMemPtr; // memory pointer holding the dynamic memory
15
- size_t dynamMemSize; // size of the buffer *dynamMemPtr
16
-
17
- // Static Configuration
18
- Biquad_StaticCfg stCfg;
19
-
20
- // ---------------------------------------------------------------
21
- // Internal Static Config Registers, which are generated from stCfg
22
- int maxNSample;
23
- int nsect;
24
- float BCoeff[AGORA_UAP_BIQUAD_MAX_SECTION][3];
25
- float ACoeff[AGORA_UAP_BIQUAD_MAX_SECTION][3];
26
- float GCoeff[AGORA_UAP_BIQUAD_MAX_SECTION]; // gain for each section
27
-
28
- // Variables
29
- float* inputTempBuf; // [maxNSample]
30
- float sectW[AGORA_UAP_BIQUAD_MAX_SECTION][2];
31
- // each section's register
32
- float* sectOutputBuf
33
- [AGORA_UAP_BIQUAD_MAX_SECTION]; //[AGORA_UAP_BIQUAD_MAX_SECTION][maxNSample]
34
- // each section's output buffer
35
- } Biquad_St;
36
-
37
- #endif // __BIQUAD_ST_H__
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/coeff.h DELETED
@@ -1,246 +0,0 @@
1
- //
2
- // Copyright © 2025 Agora
3
- // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
- //
7
- #ifndef __COEFF_H__
8
- #define __COEFF_H__
9
-
10
- #include "aed_st.h"
11
-
12
- #define AUP_AED_MEAN_STD_NBINS AUP_AED_FEA_LEN
13
-
14
- #define AUP_AED_ASSUMED_HOPSZ (256)
15
- #define AUP_AED_ASSUMED_WINDOWSZ (768)
16
- #define AUP_AED_ASSUMED_FFTSZ (1024)
17
-
18
- // means of inpu-mel-filterbank
19
- const float AUP_AED_FEATURE_MEANS[AUP_AED_MEAN_STD_NBINS] = {
20
- -8.198236465454e+00f, -6.265716552734e+00f, -5.483818531036e+00f,
21
- -4.758691310883e+00f, -4.417088985443e+00f, -4.142892837524e+00f,
22
- -3.912850379944e+00f, -3.845927953720e+00f, -3.657090425491e+00f,
23
- -3.723418712616e+00f, -3.876134157181e+00f, -3.843890905380e+00f,
24
- -3.690405130386e+00f, -3.756065845490e+00f, -3.698696136475e+00f,
25
- -3.650463104248e+00f, -3.700468778610e+00f, -3.567321300507e+00f,
26
- -3.498900175095e+00f, -3.477807044983e+00f, -3.458816051483e+00f,
27
- -3.444923877716e+00f, -3.401328563690e+00f, -3.306261301041e+00f,
28
- -3.278556823730e+00f, -3.233250856400e+00f, -3.198616027832e+00f,
29
- -3.204526424408e+00f, -3.208798646927e+00f, -3.257838010788e+00f,
30
- -3.381376743317e+00f, -3.534021377563e+00f, -3.640867948532e+00f,
31
- -3.726858854294e+00f, -3.773730993271e+00f, -3.804667234421e+00f,
32
- -3.832901000977e+00f, -3.871120452881e+00f, -3.990592956543e+00f,
33
- -4.480289459229e+00f, 9.235690307617e+01f};
34
-
35
- // stds of input-mel-filterbank
36
- const float AUP_AED_FEATURE_STDS[AUP_AED_MEAN_STD_NBINS] = {
37
- 5.166063785553e+00f, 4.977209568024e+00f, 4.698895931244e+00f,
38
- 4.630621433258e+00f, 4.634347915649e+00f, 4.641156196594e+00f,
39
- 4.640676498413e+00f, 4.666367053986e+00f, 4.650534629822e+00f,
40
- 4.640020847321e+00f, 4.637400150299e+00f, 4.620099067688e+00f,
41
- 4.596316337585e+00f, 4.562654972076e+00f, 4.554360389709e+00f,
42
- 4.566910743713e+00f, 4.562489986420e+00f, 4.562412738800e+00f,
43
- 4.585299491882e+00f, 4.600179672241e+00f, 4.592845916748e+00f,
44
- 4.585922718048e+00f, 4.583496570587e+00f, 4.626092910767e+00f,
45
- 4.626957893372e+00f, 4.626289367676e+00f, 4.637005805969e+00f,
46
- 4.683015823364e+00f, 4.726813793182e+00f, 4.734289646149e+00f,
47
- 4.753227233887e+00f, 4.849722862244e+00f, 4.869434833527e+00f,
48
- 4.884482860565e+00f, 4.921327114105e+00f, 4.959212303162e+00f,
49
- 4.996619224548e+00f, 5.044823646545e+00f, 5.072216987610e+00f,
50
- 5.096439361572e+00f, 1.152136917114e+02f};
51
-
52
- const float AUP_AED_STFTWindow_Hann768[768] = {
53
- 0.0000000e+00f, 1.6733041e-05f, 6.6931045e-05f, 1.5059065e-04f,
54
- 2.6770626e-04f, 4.1827004e-04f, 6.0227190e-04f, 8.1969953e-04f,
55
- 1.0705384e-03f, 1.3547717e-03f, 1.6723803e-03f, 2.0233432e-03f,
56
- 2.4076367e-03f, 2.8252351e-03f, 3.2761105e-03f, 3.7602327e-03f,
57
- 4.2775693e-03f, 4.8280857e-03f, 5.4117450e-03f, 6.0285082e-03f,
58
- 6.6783340e-03f, 7.3611788e-03f, 8.0769970e-03f, 8.8257407e-03f,
59
- 9.6073598e-03f, 1.0421802e-02f, 1.1269013e-02f, 1.2148935e-02f,
60
- 1.3061510e-02f, 1.4006678e-02f, 1.4984373e-02f, 1.5994532e-02f,
61
- 1.7037087e-02f, 1.8111967e-02f, 1.9219101e-02f, 2.0358415e-02f,
62
- 2.1529832e-02f, 2.2733274e-02f, 2.3968661e-02f, 2.5235910e-02f,
63
- 2.6534935e-02f, 2.7865651e-02f, 2.9227967e-02f, 3.0621794e-02f,
64
- 3.2047037e-02f, 3.3503601e-02f, 3.4991388e-02f, 3.6510300e-02f,
65
- 3.8060234e-02f, 3.9641086e-02f, 4.1252752e-02f, 4.2895122e-02f,
66
- 4.4568088e-02f, 4.6271536e-02f, 4.8005353e-02f, 4.9769424e-02f,
67
- 5.1563629e-02f, 5.3387849e-02f, 5.5241962e-02f, 5.7125844e-02f,
68
- 5.9039368e-02f, 6.0982406e-02f, 6.2954829e-02f, 6.4956504e-02f,
69
- 6.6987298e-02f, 6.9047074e-02f, 7.1135695e-02f, 7.3253021e-02f,
70
- 7.5398909e-02f, 7.7573217e-02f, 7.9775799e-02f, 8.2006508e-02f,
71
- 8.4265194e-02f, 8.6551706e-02f, 8.8865891e-02f, 9.1207593e-02f,
72
- 9.3576658e-02f, 9.5972925e-02f, 9.8396234e-02f, 1.0084642e-01f,
73
- 1.0332333e-01f, 1.0582679e-01f, 1.0835663e-01f, 1.1091268e-01f,
74
- 1.1349477e-01f, 1.1610274e-01f, 1.1873640e-01f, 1.2139558e-01f,
75
- 1.2408010e-01f, 1.2678978e-01f, 1.2952444e-01f, 1.3228389e-01f,
76
- 1.3506796e-01f, 1.3787646e-01f, 1.4070919e-01f, 1.4356597e-01f,
77
- 1.4644661e-01f, 1.4935091e-01f, 1.5227868e-01f, 1.5522973e-01f,
78
- 1.5820385e-01f, 1.6120085e-01f, 1.6422052e-01f, 1.6726267e-01f,
79
- 1.7032709e-01f, 1.7341358e-01f, 1.7652192e-01f, 1.7965192e-01f,
80
- 1.8280336e-01f, 1.8597603e-01f, 1.8916971e-01f, 1.9238420e-01f,
81
- 1.9561929e-01f, 1.9887474e-01f, 2.0215035e-01f, 2.0544589e-01f,
82
- 2.0876115e-01f, 2.1209590e-01f, 2.1544993e-01f, 2.1882300e-01f,
83
- 2.2221488e-01f, 2.2562536e-01f, 2.2905421e-01f, 2.3250119e-01f,
84
- 2.3596607e-01f, 2.3944863e-01f, 2.4294863e-01f, 2.4646583e-01f,
85
- 2.5000000e-01f, 2.5355090e-01f, 2.5711830e-01f, 2.6070196e-01f,
86
- 2.6430163e-01f, 2.6791708e-01f, 2.7154806e-01f, 2.7519434e-01f,
87
- 2.7885565e-01f, 2.8253178e-01f, 2.8622245e-01f, 2.8992744e-01f,
88
- 2.9364649e-01f, 2.9737934e-01f, 3.0112576e-01f, 3.0488549e-01f,
89
- 3.0865828e-01f, 3.1244388e-01f, 3.1624203e-01f, 3.2005248e-01f,
90
- 3.2387498e-01f, 3.2770926e-01f, 3.3155507e-01f, 3.3541216e-01f,
91
- 3.3928027e-01f, 3.4315913e-01f, 3.4704849e-01f, 3.5094809e-01f,
92
- 3.5485766e-01f, 3.5877695e-01f, 3.6270569e-01f, 3.6664362e-01f,
93
- 3.7059048e-01f, 3.7454600e-01f, 3.7850991e-01f, 3.8248196e-01f,
94
- 3.8646187e-01f, 3.9044938e-01f, 3.9444422e-01f, 3.9844613e-01f,
95
- 4.0245484e-01f, 4.0647007e-01f, 4.1049157e-01f, 4.1451906e-01f,
96
- 4.1855226e-01f, 4.2259092e-01f, 4.2663476e-01f, 4.3068351e-01f,
97
- 4.3473690e-01f, 4.3879466e-01f, 4.4285652e-01f, 4.4692220e-01f,
98
- 4.5099143e-01f, 4.5506394e-01f, 4.5913946e-01f, 4.6321772e-01f,
99
- 4.6729844e-01f, 4.7138134e-01f, 4.7546616e-01f, 4.7955263e-01f,
100
- 4.8364046e-01f, 4.8772939e-01f, 4.9181913e-01f, 4.9590943e-01f,
101
- 5.0000000e-01f, 5.0409057e-01f, 5.0818087e-01f, 5.1227061e-01f,
102
- 5.1635954e-01f, 5.2044737e-01f, 5.2453384e-01f, 5.2861866e-01f,
103
- 5.3270156e-01f, 5.3678228e-01f, 5.4086054e-01f, 5.4493606e-01f,
104
- 5.4900857e-01f, 5.5307780e-01f, 5.5714348e-01f, 5.6120534e-01f,
105
- 5.6526310e-01f, 5.6931649e-01f, 5.7336524e-01f, 5.7740908e-01f,
106
- 5.8144774e-01f, 5.8548094e-01f, 5.8950843e-01f, 5.9352993e-01f,
107
- 5.9754516e-01f, 6.0155387e-01f, 6.0555578e-01f, 6.0955062e-01f,
108
- 6.1353813e-01f, 6.1751804e-01f, 6.2149009e-01f, 6.2545400e-01f,
109
- 6.2940952e-01f, 6.3335638e-01f, 6.3729431e-01f, 6.4122305e-01f,
110
- 6.4514234e-01f, 6.4905191e-01f, 6.5295151e-01f, 6.5684087e-01f,
111
- 6.6071973e-01f, 6.6458784e-01f, 6.6844493e-01f, 6.7229074e-01f,
112
- 6.7612502e-01f, 6.7994752e-01f, 6.8375797e-01f, 6.8755612e-01f,
113
- 6.9134172e-01f, 6.9511451e-01f, 6.9887424e-01f, 7.0262066e-01f,
114
- 7.0635351e-01f, 7.1007256e-01f, 7.1377755e-01f, 7.1746822e-01f,
115
- 7.2114435e-01f, 7.2480566e-01f, 7.2845194e-01f, 7.3208292e-01f,
116
- 7.3569837e-01f, 7.3929804e-01f, 7.4288170e-01f, 7.4644910e-01f,
117
- 7.5000000e-01f, 7.5353417e-01f, 7.5705137e-01f, 7.6055137e-01f,
118
- 7.6403393e-01f, 7.6749881e-01f, 7.7094579e-01f, 7.7437464e-01f,
119
- 7.7778512e-01f, 7.8117700e-01f, 7.8455007e-01f, 7.8790410e-01f,
120
- 7.9123885e-01f, 7.9455411e-01f, 7.9784965e-01f, 8.0112526e-01f,
121
- 8.0438071e-01f, 8.0761580e-01f, 8.1083029e-01f, 8.1402397e-01f,
122
- 8.1719664e-01f, 8.2034808e-01f, 8.2347808e-01f, 8.2658642e-01f,
123
- 8.2967291e-01f, 8.3273733e-01f, 8.3577948e-01f, 8.3879915e-01f,
124
- 8.4179615e-01f, 8.4477027e-01f, 8.4772132e-01f, 8.5064909e-01f,
125
- 8.5355339e-01f, 8.5643403e-01f, 8.5929081e-01f, 8.6212354e-01f,
126
- 8.6493204e-01f, 8.6771611e-01f, 8.7047556e-01f, 8.7321022e-01f,
127
- 8.7591990e-01f, 8.7860442e-01f, 8.8126360e-01f, 8.8389726e-01f,
128
- 8.8650523e-01f, 8.8908732e-01f, 8.9164337e-01f, 8.9417321e-01f,
129
- 8.9667667e-01f, 8.9915358e-01f, 9.0160377e-01f, 9.0402708e-01f,
130
- 9.0642334e-01f, 9.0879241e-01f, 9.1113411e-01f, 9.1344829e-01f,
131
- 9.1573481e-01f, 9.1799349e-01f, 9.2022420e-01f, 9.2242678e-01f,
132
- 9.2460109e-01f, 9.2674698e-01f, 9.2886431e-01f, 9.3095293e-01f,
133
- 9.3301270e-01f, 9.3504350e-01f, 9.3704517e-01f, 9.3901759e-01f,
134
- 9.4096063e-01f, 9.4287416e-01f, 9.4475804e-01f, 9.4661215e-01f,
135
- 9.4843637e-01f, 9.5023058e-01f, 9.5199465e-01f, 9.5372846e-01f,
136
- 9.5543191e-01f, 9.5710488e-01f, 9.5874725e-01f, 9.6035891e-01f,
137
- 9.6193977e-01f, 9.6348970e-01f, 9.6500861e-01f, 9.6649640e-01f,
138
- 9.6795296e-01f, 9.6937821e-01f, 9.7077203e-01f, 9.7213435e-01f,
139
- 9.7346506e-01f, 9.7476409e-01f, 9.7603134e-01f, 9.7726673e-01f,
140
- 9.7847017e-01f, 9.7964159e-01f, 9.8078090e-01f, 9.8188803e-01f,
141
- 9.8296291e-01f, 9.8400547e-01f, 9.8501563e-01f, 9.8599332e-01f,
142
- 9.8693849e-01f, 9.8785107e-01f, 9.8873099e-01f, 9.8957820e-01f,
143
- 9.9039264e-01f, 9.9117426e-01f, 9.9192300e-01f, 9.9263882e-01f,
144
- 9.9332167e-01f, 9.9397149e-01f, 9.9458825e-01f, 9.9517191e-01f,
145
- 9.9572243e-01f, 9.9623977e-01f, 9.9672389e-01f, 9.9717476e-01f,
146
- 9.9759236e-01f, 9.9797666e-01f, 9.9832762e-01f, 9.9864523e-01f,
147
- 9.9892946e-01f, 9.9918030e-01f, 9.9939773e-01f, 9.9958173e-01f,
148
- 9.9973229e-01f, 9.9984941e-01f, 9.9993307e-01f, 9.9998327e-01f,
149
- 1.0000000e+00f, 9.9998327e-01f, 9.9993307e-01f, 9.9984941e-01f,
150
- 9.9973229e-01f, 9.9958173e-01f, 9.9939773e-01f, 9.9918030e-01f,
151
- 9.9892946e-01f, 9.9864523e-01f, 9.9832762e-01f, 9.9797666e-01f,
152
- 9.9759236e-01f, 9.9717476e-01f, 9.9672389e-01f, 9.9623977e-01f,
153
- 9.9572243e-01f, 9.9517191e-01f, 9.9458825e-01f, 9.9397149e-01f,
154
- 9.9332167e-01f, 9.9263882e-01f, 9.9192300e-01f, 9.9117426e-01f,
155
- 9.9039264e-01f, 9.8957820e-01f, 9.8873099e-01f, 9.8785107e-01f,
156
- 9.8693849e-01f, 9.8599332e-01f, 9.8501563e-01f, 9.8400547e-01f,
157
- 9.8296291e-01f, 9.8188803e-01f, 9.8078090e-01f, 9.7964159e-01f,
158
- 9.7847017e-01f, 9.7726673e-01f, 9.7603134e-01f, 9.7476409e-01f,
159
- 9.7346506e-01f, 9.7213435e-01f, 9.7077203e-01f, 9.6937821e-01f,
160
- 9.6795296e-01f, 9.6649640e-01f, 9.6500861e-01f, 9.6348970e-01f,
161
- 9.6193977e-01f, 9.6035891e-01f, 9.5874725e-01f, 9.5710488e-01f,
162
- 9.5543191e-01f, 9.5372846e-01f, 9.5199465e-01f, 9.5023058e-01f,
163
- 9.4843637e-01f, 9.4661215e-01f, 9.4475804e-01f, 9.4287416e-01f,
164
- 9.4096063e-01f, 9.3901759e-01f, 9.3704517e-01f, 9.3504350e-01f,
165
- 9.3301270e-01f, 9.3095293e-01f, 9.2886431e-01f, 9.2674698e-01f,
166
- 9.2460109e-01f, 9.2242678e-01f, 9.2022420e-01f, 9.1799349e-01f,
167
- 9.1573481e-01f, 9.1344829e-01f, 9.1113411e-01f, 9.0879241e-01f,
168
- 9.0642334e-01f, 9.0402708e-01f, 9.0160377e-01f, 8.9915358e-01f,
169
- 8.9667667e-01f, 8.9417321e-01f, 8.9164337e-01f, 8.8908732e-01f,
170
- 8.8650523e-01f, 8.8389726e-01f, 8.8126360e-01f, 8.7860442e-01f,
171
- 8.7591990e-01f, 8.7321022e-01f, 8.7047556e-01f, 8.6771611e-01f,
172
- 8.6493204e-01f, 8.6212354e-01f, 8.5929081e-01f, 8.5643403e-01f,
173
- 8.5355339e-01f, 8.5064909e-01f, 8.4772132e-01f, 8.4477027e-01f,
174
- 8.4179615e-01f, 8.3879915e-01f, 8.3577948e-01f, 8.3273733e-01f,
175
- 8.2967291e-01f, 8.2658642e-01f, 8.2347808e-01f, 8.2034808e-01f,
176
- 8.1719664e-01f, 8.1402397e-01f, 8.1083029e-01f, 8.0761580e-01f,
177
- 8.0438071e-01f, 8.0112526e-01f, 7.9784965e-01f, 7.9455411e-01f,
178
- 7.9123885e-01f, 7.8790410e-01f, 7.8455007e-01f, 7.8117700e-01f,
179
- 7.7778512e-01f, 7.7437464e-01f, 7.7094579e-01f, 7.6749881e-01f,
180
- 7.6403393e-01f, 7.6055137e-01f, 7.5705137e-01f, 7.5353417e-01f,
181
- 7.5000000e-01f, 7.4644910e-01f, 7.4288170e-01f, 7.3929804e-01f,
182
- 7.3569837e-01f, 7.3208292e-01f, 7.2845194e-01f, 7.2480566e-01f,
183
- 7.2114435e-01f, 7.1746822e-01f, 7.1377755e-01f, 7.1007256e-01f,
184
- 7.0635351e-01f, 7.0262066e-01f, 6.9887424e-01f, 6.9511451e-01f,
185
- 6.9134172e-01f, 6.8755612e-01f, 6.8375797e-01f, 6.7994752e-01f,
186
- 6.7612502e-01f, 6.7229074e-01f, 6.6844493e-01f, 6.6458784e-01f,
187
- 6.6071973e-01f, 6.5684087e-01f, 6.5295151e-01f, 6.4905191e-01f,
188
- 6.4514234e-01f, 6.4122305e-01f, 6.3729431e-01f, 6.3335638e-01f,
189
- 6.2940952e-01f, 6.2545400e-01f, 6.2149009e-01f, 6.1751804e-01f,
190
- 6.1353813e-01f, 6.0955062e-01f, 6.0555578e-01f, 6.0155387e-01f,
191
- 5.9754516e-01f, 5.9352993e-01f, 5.8950843e-01f, 5.8548094e-01f,
192
- 5.8144774e-01f, 5.7740908e-01f, 5.7336524e-01f, 5.6931649e-01f,
193
- 5.6526310e-01f, 5.6120534e-01f, 5.5714348e-01f, 5.5307780e-01f,
194
- 5.4900857e-01f, 5.4493606e-01f, 5.4086054e-01f, 5.3678228e-01f,
195
- 5.3270156e-01f, 5.2861866e-01f, 5.2453384e-01f, 5.2044737e-01f,
196
- 5.1635954e-01f, 5.1227061e-01f, 5.0818087e-01f, 5.0409057e-01f,
197
- 5.0000000e-01f, 4.9590943e-01f, 4.9181913e-01f, 4.8772939e-01f,
198
- 4.8364046e-01f, 4.7955263e-01f, 4.7546616e-01f, 4.7138134e-01f,
199
- 4.6729844e-01f, 4.6321772e-01f, 4.5913946e-01f, 4.5506394e-01f,
200
- 4.5099143e-01f, 4.4692220e-01f, 4.4285652e-01f, 4.3879466e-01f,
201
- 4.3473690e-01f, 4.3068351e-01f, 4.2663476e-01f, 4.2259092e-01f,
202
- 4.1855226e-01f, 4.1451906e-01f, 4.1049157e-01f, 4.0647007e-01f,
203
- 4.0245484e-01f, 3.9844613e-01f, 3.9444422e-01f, 3.9044938e-01f,
204
- 3.8646187e-01f, 3.8248196e-01f, 3.7850991e-01f, 3.7454600e-01f,
205
- 3.7059048e-01f, 3.6664362e-01f, 3.6270569e-01f, 3.5877695e-01f,
206
- 3.5485766e-01f, 3.5094809e-01f, 3.4704849e-01f, 3.4315913e-01f,
207
- 3.3928027e-01f, 3.3541216e-01f, 3.3155507e-01f, 3.2770926e-01f,
208
- 3.2387498e-01f, 3.2005248e-01f, 3.1624203e-01f, 3.1244388e-01f,
209
- 3.0865828e-01f, 3.0488549e-01f, 3.0112576e-01f, 2.9737934e-01f,
210
- 2.9364649e-01f, 2.8992744e-01f, 2.8622245e-01f, 2.8253178e-01f,
211
- 2.7885565e-01f, 2.7519434e-01f, 2.7154806e-01f, 2.6791708e-01f,
212
- 2.6430163e-01f, 2.6070196e-01f, 2.5711830e-01f, 2.5355090e-01f,
213
- 2.5000000e-01f, 2.4646583e-01f, 2.4294863e-01f, 2.3944863e-01f,
214
- 2.3596607e-01f, 2.3250119e-01f, 2.2905421e-01f, 2.2562536e-01f,
215
- 2.2221488e-01f, 2.1882300e-01f, 2.1544993e-01f, 2.1209590e-01f,
216
- 2.0876115e-01f, 2.0544589e-01f, 2.0215035e-01f, 1.9887474e-01f,
217
- 1.9561929e-01f, 1.9238420e-01f, 1.8916971e-01f, 1.8597603e-01f,
218
- 1.8280336e-01f, 1.7965192e-01f, 1.7652192e-01f, 1.7341358e-01f,
219
- 1.7032709e-01f, 1.6726267e-01f, 1.6422052e-01f, 1.6120085e-01f,
220
- 1.5820385e-01f, 1.5522973e-01f, 1.5227868e-01f, 1.4935091e-01f,
221
- 1.4644661e-01f, 1.4356597e-01f, 1.4070919e-01f, 1.3787646e-01f,
222
- 1.3506796e-01f, 1.3228389e-01f, 1.2952444e-01f, 1.2678978e-01f,
223
- 1.2408010e-01f, 1.2139558e-01f, 1.1873640e-01f, 1.1610274e-01f,
224
- 1.1349477e-01f, 1.1091268e-01f, 1.0835663e-01f, 1.0582679e-01f,
225
- 1.0332333e-01f, 1.0084642e-01f, 9.8396234e-02f, 9.5972925e-02f,
226
- 9.3576658e-02f, 9.1207593e-02f, 8.8865891e-02f, 8.6551706e-02f,
227
- 8.4265194e-02f, 8.2006508e-02f, 7.9775799e-02f, 7.7573217e-02f,
228
- 7.5398909e-02f, 7.3253021e-02f, 7.1135695e-02f, 6.9047074e-02f,
229
- 6.6987298e-02f, 6.4956504e-02f, 6.2954829e-02f, 6.0982406e-02f,
230
- 5.9039368e-02f, 5.7125844e-02f, 5.5241962e-02f, 5.3387849e-02f,
231
- 5.1563629e-02f, 4.9769424e-02f, 4.8005353e-02f, 4.6271536e-02f,
232
- 4.4568088e-02f, 4.2895122e-02f, 4.1252752e-02f, 3.9641086e-02f,
233
- 3.8060234e-02f, 3.6510300e-02f, 3.4991388e-02f, 3.3503601e-02f,
234
- 3.2047037e-02f, 3.0621794e-02f, 2.9227967e-02f, 2.7865651e-02f,
235
- 2.6534935e-02f, 2.5235910e-02f, 2.3968661e-02f, 2.2733274e-02f,
236
- 2.1529832e-02f, 2.0358415e-02f, 1.9219101e-02f, 1.8111967e-02f,
237
- 1.7037087e-02f, 1.5994532e-02f, 1.4984373e-02f, 1.4006678e-02f,
238
- 1.3061510e-02f, 1.2148935e-02f, 1.1269013e-02f, 1.0421802e-02f,
239
- 9.6073598e-03f, 8.8257407e-03f, 8.0769970e-03f, 7.3611788e-03f,
240
- 6.6783340e-03f, 6.0285082e-03f, 5.4117450e-03f, 4.8280857e-03f,
241
- 4.2775693e-03f, 3.7602327e-03f, 3.2761105e-03f, 2.8252351e-03f,
242
- 2.4076367e-03f, 2.0233432e-03f, 1.6723803e-03f, 1.3547717e-03f,
243
- 1.0705384e-03f, 8.1969953e-04f, 6.0227190e-04f, 4.1827004e-04f,
244
- 2.6770626e-04f, 1.5059065e-04f, 6.6931045e-05f, 1.6733041e-05f};
245
-
246
- #endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/fftw.c DELETED
The diff for this file is too large to render. See raw diff
 
src/fftw.h DELETED
@@ -1,47 +0,0 @@
1
- //
2
- // Copyright © 2025 Agora
3
- // This file is part of TEN Framework, an open source project.
4
- // Licensed under the Apache License, Version 2.0, with certain conditions.
5
- // Refer to the "LICENSE" file in the root directory for more information.
6
- //
7
- #ifndef __FFTW_H__
8
- #define __FFTW_H__
9
-
10
- #include <stdio.h>
11
-
12
- #ifdef __cplusplus
13
- extern "C" {
14
- #endif /* __cplusplus */
15
- // Spectrum Storage Format definition:
16
- // format1: [Real-0, Real-Nyq, Real-1, Imag-1, Real-2, Imag-2, ...]
17
- // format2: [Real-0, Real-1, (-1)*Imag-1, Real-2, (-1)*Imag-2, ..., Real-Nyq]
18
-
19
- // the following functions assume input and output spectrum to be stored in
20
- // format2
21
- void AUP_FFTW_r2c_256(float* in, float* out);
22
- void AUP_FFTW_c2r_256(float* in, float* out);
23
-
24
- void AUP_FFTW_c2r_512(float* in, float* out);
25
- void AUP_FFTW_r2c_512(float* in, float* out);
26
-
27
- void AUP_FFTW_r2c_1024(float* in, float* out);
28
- void AUP_FFTW_c2r_1024(float* in, float* out);
29
-
30
- void AUP_FFTW_r2c_2048(float* in, float* out);
31
- void AUP_FFTW_c2r_2048(float* in, float* out);
32
-
33
- void AUP_FFTW_r2c_4096(float* in, float* out);
34
- void AUP_FFTW_c2r_4096(float* in, float* out);
35
-
36
- // if direction == 0: format1->format2
37
- // if direction == 1: format2->format1
38
- void AUP_FFTW_InplaceTransf(int direction, int fftSz, float* inplaceTranfBuf);
39
-
40
- void AUP_FFTW_RescaleFFTOut(int fftSz, float* inplaceBuf);
41
- void AUP_FFTW_RescaleIFFTOut(int fftSz, float* inplaceBuf);
42
-
43
- #ifdef __cplusplus
44
- }
45
- #endif /* __cplusplus */
46
-
47
- #endif // __FFTW_H__