File size: 2,036 Bytes
b50f2a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
//
// Copyright © 2025 Agora
// This file is part of TEN Framework, an open source project.
// Licensed under the Apache License, Version 2.0, with certain conditions.
// Refer to the "LICENSE" file in the root directory for more information.
//
#include <cassert>
#include "ten_vad.h"
#include "aed_st.h"
#include "aed.h"

static void int16_to_float(const int16_t* inputs, int inputLen, float* output) {
  for (int i = 0; i < inputLen; ++i) {
    output[i] = float(inputs[i]);
  }
}

int ten_vad_create(ten_vad_handle_t* handle, size_t hop_size, float threshold) {
  if (AUP_Aed_create(handle) < 0) {
    return -1;
  }
  Aed_St* stHdl = nullptr;
  Aed_StaticCfg aedStCfg;
  aedStCfg.enableFlag = 1;
  aedStCfg.fftSz = 0;
  aedStCfg.hopSz = hop_size;
  aedStCfg.anaWindowSz = 0;
  aedStCfg.frqInputAvailableFlag = 0;
  stHdl = (Aed_St*)(*handle);
  stHdl->dynamCfg.extVoiceThr = threshold;

  if (AUP_Aed_memAllocate(*handle, &aedStCfg) < 0) {
    return -1;
  }
  if (AUP_Aed_init(*handle) < 0) {
    return -1;
  }
  return 0;
}

int ten_vad_process(ten_vad_handle_t handle, const int16_t* audio_data,
                    size_t audio_data_length, float* out_probability,
                    int* out_flag) {
  if (handle == nullptr || audio_data == nullptr ||
      out_probability == nullptr || out_flag == nullptr) {
    return -1;
  }
  Aed_St* ptr = (Aed_St*)handle;
  assert(audio_data_length == ptr->stCfg.hopSz);
  int16_to_float(audio_data, audio_data_length, ptr->inputFloatBuff);
  Aed_InputData aedInputData;
  Aed_OutputData aedOutputData;
  aedInputData.binPower = NULL;
  aedInputData.hopSz = ptr->stCfg.hopSz;
  aedInputData.nBins = -1;
  aedInputData.timeSignal = ptr->inputFloatBuff;
  int ret = AUP_Aed_proc(handle, &aedInputData, &aedOutputData);
  if (ret == 0) {
    *out_probability = aedOutputData.voiceProb;
    *out_flag = aedOutputData.vadRes;
  }
  return ret;
}

int ten_vad_destroy(ten_vad_handle_t* handle) {
  return AUP_Aed_destroy(handle);
}

const char* ten_vad_get_version(void) { return "1.0"; }