| | using System; |
| | using System.Runtime.InteropServices; |
| | using UnityEngine; |
| |
|
| | public class TenVADRunner : IDisposable |
| | { |
| | #if UNITY_WEBGL && !UNITY_EDITOR |
| | [DllImport("__Internal")] |
| | private static extern int WebGLTenVad_Create(int hopSize, float threshold); |
| |
|
| | [DllImport("__Internal")] |
| | private static extern int WebGLTenVad_Process(int instanceId, short[] audioData, int audioDataLength, out float outProbability, out int outFlag); |
| |
|
| | [DllImport("__Internal")] |
| | private static extern int WebGLTenVad_Destroy(int instanceId); |
| |
|
| | [DllImport("__Internal")] |
| | private static extern int WebGLTenVad_GetState(); |
| |
|
| | private const int WebGlStateLoading = 0; |
| | private const int WebGlStateError = -1; |
| | private const int WebGlPending = -2; |
| |
|
| | private int webGlInstanceId; |
| | private readonly int webGlHopSize; |
| | private readonly float webGlThreshold; |
| | #else |
| | private const string DllName = "ten_vad"; |
| |
|
| | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] |
| | private static extern int ten_vad_create(out IntPtr handle, UIntPtr hop_size, float threshold); |
| |
|
| | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] |
| | private static extern int ten_vad_process(IntPtr handle, short[] audio_data, UIntPtr audio_data_length, out float out_probability, out int out_flag); |
| |
|
| | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] |
| | private static extern int ten_vad_destroy(ref IntPtr handle); |
| |
|
| | private IntPtr vadHandle = IntPtr.Zero; |
| | #endif |
| | private bool isDisposed = false; |
| | private bool useFallbackVad = false; |
| | private readonly float fallbackThreshold; |
| | private float fallbackSmoothedProbability; |
| |
|
| | #if UNITY_WEBGL && !UNITY_EDITOR |
| | private const float FallbackRmsScale = 140f; |
| | #else |
| | private const float FallbackRmsScale = 20f; |
| | #endif |
| | private const float FallbackSmoothing = 0.35f; |
| |
|
| | public TenVADRunner(UIntPtr hopSize, float threshold) |
| | { |
| | fallbackThreshold = Mathf.Clamp01(threshold); |
| | fallbackSmoothedProbability = 0f; |
| |
|
| | #if UNITY_WEBGL && !UNITY_EDITOR |
| | webGlHopSize = Math.Max(1, (int)hopSize.ToUInt64()); |
| | webGlThreshold = threshold; |
| | TryInitializeWebGlVad(); |
| | #else |
| | try |
| | { |
| | int result = ten_vad_create(out vadHandle, hopSize, threshold); |
| | if (result != 0 || vadHandle == IntPtr.Zero) |
| | { |
| | EnableFallback($"Failed to create VAD Handle. (Error Code: {result})"); |
| | return; |
| | } |
| | } |
| | catch (DllNotFoundException ex) |
| | { |
| | EnableFallback($"Native VAD library '{DllName}' was not found. {ex.Message}"); |
| | } |
| | catch (EntryPointNotFoundException ex) |
| | { |
| | EnableFallback($"Native VAD entry point was not found. {ex.Message}"); |
| | } |
| | catch (Exception ex) |
| | { |
| | EnableFallback($"Native VAD initialization failed. {ex.Message}"); |
| | } |
| | #endif |
| | } |
| |
|
| | public int Process(short[] audioData, out float probability, out int flag) |
| | { |
| | if (isDisposed) |
| | { |
| | throw new ObjectDisposedException(nameof(TenVADRunner), "The VAD instance has already been disposed."); |
| | } |
| | if (audioData == null || audioData.Length == 0) |
| | { |
| | probability = 0; |
| | flag = 0; |
| | return -1; |
| | } |
| |
|
| | if (!useFallbackVad) |
| | { |
| | #if UNITY_WEBGL && !UNITY_EDITOR |
| | if (webGlInstanceId <= 0) |
| | { |
| | if (!TryInitializeWebGlVad()) |
| | return ProcessWithFallback(audioData, out probability, out flag); |
| | } |
| |
|
| | try |
| | { |
| | int result = WebGLTenVad_Process(webGlInstanceId, audioData, audioData.Length, out probability, out flag); |
| | if (result == 0) |
| | return result; |
| |
|
| | if (result == WebGlPending) |
| | return ProcessWithFallback(audioData, out probability, out flag); |
| |
|
| | int state = SafeGetWebGlState(); |
| | EnableFallback($"WebGL ten_vad processing failed. result={result}, state={state}"); |
| | } |
| | catch (Exception ex) |
| | { |
| | EnableFallback($"WebGL ten_vad processing bridge failed. {ex.Message}"); |
| | } |
| | #else |
| | if (vadHandle != IntPtr.Zero) |
| | { |
| | try |
| | { |
| | int result = ten_vad_process(vadHandle, audioData, (UIntPtr)audioData.Length, out probability, out flag); |
| | return result; |
| | } |
| | catch (DllNotFoundException ex) |
| | { |
| | EnableFallback($"Native VAD library '{DllName}' disappeared at runtime. {ex.Message}"); |
| | } |
| | catch (EntryPointNotFoundException ex) |
| | { |
| | EnableFallback($"Native VAD entry point missing at runtime. {ex.Message}"); |
| | } |
| | catch (Exception ex) |
| | { |
| | EnableFallback($"Native VAD processing failed. {ex.Message}"); |
| | } |
| | } |
| | #endif |
| | } |
| |
|
| | return ProcessWithFallback(audioData, out probability, out flag); |
| | } |
| | |
| | public void Dispose() |
| | { |
| | if (isDisposed) |
| | return; |
| |
|
| | #if UNITY_WEBGL && !UNITY_EDITOR |
| | if (!useFallbackVad && webGlInstanceId > 0) |
| | { |
| | try |
| | { |
| | WebGLTenVad_Destroy(webGlInstanceId); |
| | } |
| | catch (Exception) |
| | { |
| | |
| | } |
| |
|
| | webGlInstanceId = 0; |
| | } |
| | #else |
| | if (!useFallbackVad && vadHandle != IntPtr.Zero) |
| | { |
| | try |
| | { |
| | ten_vad_destroy(ref vadHandle); |
| | } |
| | catch (Exception) |
| | { |
| | |
| | } |
| |
|
| | vadHandle = IntPtr.Zero; |
| | } |
| | #endif |
| |
|
| | isDisposed = true; |
| | } |
| |
|
| | private void EnableFallback(string reason) |
| | { |
| | if (useFallbackVad) |
| | return; |
| |
|
| | useFallbackVad = true; |
| | #if UNITY_WEBGL && !UNITY_EDITOR |
| | webGlInstanceId = 0; |
| | Debug.LogWarning($"[TenVADRunner] Falling back to RMS VAD on WebGL. Reason: {reason}"); |
| | #else |
| | vadHandle = IntPtr.Zero; |
| | Debug.LogWarning($"[TenVADRunner] Falling back to simple RMS VAD. Reason: {reason}"); |
| | #endif |
| | } |
| |
|
| | #if UNITY_WEBGL && !UNITY_EDITOR |
| | private bool TryInitializeWebGlVad() |
| | { |
| | if (webGlInstanceId > 0) |
| | return true; |
| |
|
| | int createResult; |
| | try |
| | { |
| | createResult = WebGLTenVad_Create(webGlHopSize, webGlThreshold); |
| | } |
| | catch (Exception ex) |
| | { |
| | EnableFallback($"WebGL ten_vad bridge is unavailable. {ex.Message}"); |
| | return false; |
| | } |
| |
|
| | if (createResult > 0) |
| | { |
| | webGlInstanceId = createResult; |
| | return true; |
| | } |
| |
|
| | if (createResult == WebGlPending || createResult == WebGlStateLoading) |
| | { |
| | return false; |
| | } |
| |
|
| | int state = SafeGetWebGlState(); |
| | if (state == WebGlStateError || createResult < 0) |
| | { |
| | EnableFallback($"WebGL ten_vad initialization failed. createResult={createResult}, state={state}"); |
| | } |
| | return false; |
| | } |
| |
|
| | private int SafeGetWebGlState() |
| | { |
| | try |
| | { |
| | return WebGLTenVad_GetState(); |
| | } |
| | catch |
| | { |
| | return WebGlStateError; |
| | } |
| | } |
| | #endif |
| |
|
| | private int ProcessWithFallback(short[] audioData, out float probability, out int flag) |
| | { |
| | double sumSquares = 0d; |
| | for (int i = 0; i < audioData.Length; i++) |
| | { |
| | float sample = audioData[i] / 32768f; |
| | sumSquares += sample * sample; |
| | } |
| |
|
| | float rms = (float)Math.Sqrt(sumSquares / audioData.Length); |
| | float rawProbability = Mathf.Clamp01(rms * FallbackRmsScale); |
| | fallbackSmoothedProbability = Mathf.Lerp(fallbackSmoothedProbability, rawProbability, FallbackSmoothing); |
| |
|
| | probability = fallbackSmoothedProbability; |
| | flag = probability >= fallbackThreshold ? 1 : 0; |
| | return 0; |
| | } |
| | } |
| |
|