File size: 3,157 Bytes
b743710
b50f2a2
b743710
b50f2a2
 
b743710
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77be746
 
b743710
 
 
 
 
 
 
 
 
 
 
 
 
 
77be746
b743710
77be746
 
b743710
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b50f2a2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
//
// Copyright © 2025 Agora
// This file is part of TEN Framework, an open source project.
// Licensed under the Apache License, Version 2.0, with certain conditions.
// Refer to the "LICENSE" file in the root directory for more information.
//
#ifndef TEN_VAD_H
#define TEN_VAD_H

#if defined(__APPLE__) || defined(__ANDROID__) || defined(__linux__)
#define TENVAD_API __attribute__((visibility("default")))
#elif defined(_WIN32) || defined(__CYGWIN__)
#ifdef TENVAD_EXPORTS
#define TENVAD_API __declspec(dllexport)
#else
#define TENVAD_API __declspec(dllimport)
#endif
#else
#define TENVAD_API
#endif

#include <stddef.h> /* size_t */
#include <stdint.h> /* int16_t */

#ifdef __cplusplus
extern "C"
{
#endif

  /**
   * @typedef ten_vad_handle
   * @brief Opaque handle for ten_vad instance.
   */
  typedef void *ten_vad_handle_t;

  /**
   * @brief Create and initialize a ten_vad instance.
   *
   * @param[out] handle       Pointer to receive the vad handle.
   * @param[in]  hop_size     The number of samples between the start points of
   * two consecutive analysis frames. (e.g., 256).
   * @param[in]  threshold    VAD detection threshold ranging from [0.0, 1.0]
   * This threshold is used to determine voice activity by comparing with the output probability.
   * When probability >= threshold, voice is detected.
   * @return 0 on success, or -1 error occurs.
   */
  TENVAD_API int ten_vad_create(ten_vad_handle_t *handle, size_t hop_size,
                                float threshold);

  /**
   * @brief Process one audio frame for voice activity detection.
   * Must call ten_vad_init() before calling this, and ten_vad_destroy() when done.
   *
   * @param[in]  handle           Valid VAD handle returned by ten_vad_create().
   * @param[in]  audio_data       Pointer to an array of int16_t samples,
   * buffer length must equal the hop size specified at ten_vad_create.
   * @param[in]  audio_data_length  size of audio_data buffer, here should be equal to hop_size.
   * @param[out] out_probability  Pointer to a float (size 1) that receives the
   * voice activity probability in the range [0.0, 1.0], where higher values indicate higher confidence in voice presence.
   * @param[out] out_flag         Pointer to an int (size 1) that receives the
   * binary voice activity decision: 0: no voice, 1: voice detected.
   * This flag is set to 1 when out_probability >= threshold, and 0 otherwise.
   * @return 0 on success, or -1 error occurs.
   */
  TENVAD_API int ten_vad_process(ten_vad_handle_t handle, const int16_t *audio_data, size_t audio_data_length,
                                 float *out_probability, int *out_flag);

  /**
   * @brief Destroy a ten_vad instance and release its resources.
   *
   * @param[in,out] handle Pointer to the ten_vad handle; set to NULL on return.
   * @return 0 on success, or -1 error occurs.
   */
  TENVAD_API int ten_vad_destroy(ten_vad_handle_t *handle);

  /**
   * @brief Get the ten_vad library version string.
   *
   * @return The version string (e.g., "1.0.0").
   */
  TENVAD_API const char *ten_vad_get_version(void);

#ifdef __cplusplus
}
#endif

#endif /* TEN_VAD_H */