File size: 4,336 Bytes
ff70a9c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | /**************************************************************************************************
*
* Copyright (c) 2019-2026 Axera Semiconductor (Ningbo) Co., Ltd. All Rights Reserved.
*
* This source file is the property of Axera Semiconductor (Ningbo) Co., Ltd. and
* may not be copied or distributed in any isomorphic form without the prior
* written consent of Axera Semiconductor (Ningbo) Co., Ltd.
*
**************************************************************************************************/
#ifndef _AX_ASR_API_H_
#define _AX_ASR_API_H_
#ifdef __cplusplus
extern "C" {
#endif
#define AX_ASR_API __attribute__((visibility("default")))
// Supported asr
enum AX_ASR_TYPE_E {
AX_WHISPER_TINY = 0,
AX_WHISPER_BASE,
AX_WHISPER_SMALL,
AX_WHISPER_TURBO,
AX_SENSEVOICE
};
/**
* @brief Opaque handle type for asr ASR context
*
* This handle encapsulates all internal state of the asr ASR system.
* The actual implementation is hidden from C callers to maintain ABI stability.
*/
typedef void* AX_ASR_HANDLE;
/**
* @brief Initialize the asr ASR system with specific configuration
*
* Creates and initializes a new asr ASR context with the specified
* model type, model path, and language. This function loads the appropriate
* models, configures the recognizer, and prepares it for speech recognition.
*
* @param model_type Type of asr model to use
* @param model_path Directory path where model files are stored
* Model files are expected to be in the format: *.axmodel
*
* @return AX_ASR_HANDLE Opaque handle to the initialized asr context,
* or NULL if initialization fails
*
* @note The caller is responsible for calling AX_ASR_Uninit() to free
* resources when the handle is no longer needed.
* @example
* // Initialize recognition with whisper tiny model
* AX_ASR_HANDLE handle = AX_ASR_Init(WHISPER_TINY, "./models-ax650/");
*
*/
AX_ASR_API AX_ASR_HANDLE AX_ASR_Init(AX_ASR_TYPE_E asr_type, const char* model_path);
/**
* @brief Deinitialize and release asr ASR resources
*
* Cleans up all resources associated with the asr context, including
* unloading models, freeing memory, and releasing hardware resources.
*
* @param handle asr context handle obtained from AX_ASR_Init()
*
* @warning After calling this function, the handle becomes invalid and
* should not be used in any subsequent API calls.
*/
AX_ASR_API void AX_ASR_Uninit(AX_ASR_HANDLE handle);
/**
* @brief Perform speech recognition and return dynamically allocated string
*
* @param handle asr context handle
* @param wav_file Path to the input 16k pcmf32 WAV audio file
* @param language Preferred language,
* For whisper, check https://whisper-api.com/docs/languages/
* For sensevoice, support auto, zh, en, yue, ja, ko
* @param result Pointer to receive the allocated result string
*
* @return int Status code (0 = success, <0 = error)
*
* @note The returned string is allocated with malloc() and must be freed
* by the caller using free() when no longer needed.
*/
AX_ASR_API int AX_ASR_RunFile(AX_ASR_HANDLE handle,
const char* wav_file,
const char* language,
char** result);
/**
* @brief Perform speech recognition and return dynamically allocated string
*
* @param handle asr context handle
* @param pcm_data 16k Mono PCM f32 data, range from -1.0 to 1.0,
* will be resampled if not 16k
* @param num_samples Sample num of PCM data
* @param sample_rate Sample rate of input audio
* @param language Preferred language,
* For whisper, check https://whisper-api.com/docs/languages/
* For sensevoice, support auto, zh, en, yue, ja, ko
* @param result Pointer to receive the allocated result string
*
* @return int Status code (0 = success, <0 = error)
*
* @note The returned string is allocated with malloc() and must be freed
* by the caller using free() when no longer needed.
*/
AX_ASR_API int AX_ASR_RunPCM(AX_ASR_HANDLE handle,
float* pcm_data,
int num_samples,
int sample_rate,
const char* language,
char** result);
#ifdef __cplusplus
}
#endif
#endif // _AX_ASR_API_H_ |