| /************************************************************************************************** | |
| * | |
| * Copyright (c) 2019-2026 Axera Semiconductor (Ningbo) Co., Ltd. All Rights Reserved. | |
| * | |
| * This source file is the property of Axera Semiconductor (Ningbo) Co., Ltd. and | |
| * may not be copied or distributed in any isomorphic form without the prior | |
| * written consent of Axera Semiconductor (Ningbo) Co., Ltd. | |
| * | |
| **************************************************************************************************/ | |
| extern "C" { | |
| // Supported asr | |
| enum AX_ASR_TYPE_E { | |
| AX_WHISPER_TINY = 0, | |
| AX_WHISPER_BASE, | |
| AX_WHISPER_SMALL, | |
| AX_WHISPER_TURBO, | |
| AX_SENSEVOICE | |
| }; | |
| /** | |
| * @brief Opaque handle type for asr ASR context | |
| * | |
| * This handle encapsulates all internal state of the asr ASR system. | |
| * The actual implementation is hidden from C callers to maintain ABI stability. | |
| */ | |
| typedef void* AX_ASR_HANDLE; | |
| /** | |
| * @brief Initialize the asr ASR system with specific configuration | |
| * | |
| * Creates and initializes a new asr ASR context with the specified | |
| * model type, model path, and language. This function loads the appropriate | |
| * models, configures the recognizer, and prepares it for speech recognition. | |
| * | |
| * @param model_type Type of asr model to use | |
| * @param model_path Directory path where model files are stored | |
| * Model files are expected to be in the format: *.axmodel | |
| * | |
| * @return AX_ASR_HANDLE Opaque handle to the initialized asr context, | |
| * or NULL if initialization fails | |
| * | |
| * @note The caller is responsible for calling AX_ASR_Uninit() to free | |
| * resources when the handle is no longer needed. | |
| * @example | |
| * // Initialize recognition with whisper tiny model | |
| * AX_ASR_HANDLE handle = AX_ASR_Init(WHISPER_TINY, "./models-ax650/"); | |
| * | |
| */ | |
| AX_ASR_API AX_ASR_HANDLE AX_ASR_Init(AX_ASR_TYPE_E asr_type, const char* model_path); | |
| /** | |
| * @brief Deinitialize and release asr ASR resources | |
| * | |
| * Cleans up all resources associated with the asr context, including | |
| * unloading models, freeing memory, and releasing hardware resources. | |
| * | |
| * @param handle asr context handle obtained from AX_ASR_Init() | |
| * | |
| * @warning After calling this function, the handle becomes invalid and | |
| * should not be used in any subsequent API calls. | |
| */ | |
| AX_ASR_API void AX_ASR_Uninit(AX_ASR_HANDLE handle); | |
| /** | |
| * @brief Perform speech recognition and return dynamically allocated string | |
| * | |
| * @param handle asr context handle | |
| * @param wav_file Path to the input 16k pcmf32 WAV audio file | |
| * @param language Preferred language, | |
| * For whisper, check https://whisper-api.com/docs/languages/ | |
| * For sensevoice, support auto, zh, en, yue, ja, ko | |
| * @param result Pointer to receive the allocated result string | |
| * | |
| * @return int Status code (0 = success, <0 = error) | |
| * | |
| * @note The returned string is allocated with malloc() and must be freed | |
| * by the caller using free() when no longer needed. | |
| */ | |
| AX_ASR_API int AX_ASR_RunFile(AX_ASR_HANDLE handle, | |
| const char* wav_file, | |
| const char* language, | |
| char** result); | |
| /** | |
| * @brief Perform speech recognition and return dynamically allocated string | |
| * | |
| * @param handle asr context handle | |
| * @param pcm_data 16k Mono PCM f32 data, range from -1.0 to 1.0, | |
| * will be resampled if not 16k | |
| * @param num_samples Sample num of PCM data | |
| * @param sample_rate Sample rate of input audio | |
| * @param language Preferred language, | |
| * For whisper, check https://whisper-api.com/docs/languages/ | |
| * For sensevoice, support auto, zh, en, yue, ja, ko | |
| * @param result Pointer to receive the allocated result string | |
| * | |
| * @return int Status code (0 = success, <0 = error) | |
| * | |
| * @note The returned string is allocated with malloc() and must be freed | |
| * by the caller using free() when no longer needed. | |
| */ | |
| AX_ASR_API int AX_ASR_RunPCM(AX_ASR_HANDLE handle, | |
| float* pcm_data, | |
| int num_samples, | |
| int sample_rate, | |
| const char* language, | |
| char** result); | |
| } | |