File size: 4,161 Bytes
798e40d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | /**
* @file ax_whisper_api.h
* @brief AX Whisper API header - C-compatible interface for Whisper ASR system
* @note This header provides a C interface to the Whisper speech recognition system
*/
#ifndef _AX_WHISPER_API_H_
#define _AX_WHISPER_API_H_
#ifdef __cplusplus
extern "C" {
#endif
#define AX_WHISPER_API __attribute__((visibility("default")))
/**
* @brief Opaque handle type for Whisper ASR context
*
* This handle encapsulates all internal state of the Whisper ASR system.
* The actual implementation is hidden from C callers to maintain ABI stability.
*/
typedef void* AX_WHISPER_HANDLE;
/**
* @brief Initialize the Whisper ASR system with specific configuration
*
* Creates and initializes a new Whisper ASR context with the specified
* model type, model path, and language. This function loads the appropriate
* models, configures the recognizer, and prepares it for speech recognition.
*
* @param model_type Type of Whisper model to use (e.g., "tiny", "base", "small", "medium", "large")
* or custom model identifier
* @param model_path Directory path where model files are stored
* Model files are expected to be in the format:
* - {model_path}/{model_type}/{model_type}-encoder.axmodel
* - {model_path}/{model_type}/{model_type}-decoder.axmodel
* - {model_path}/{model_type}/{model_type}-tokens.txt
* - {model_path}/{model_type}/{model_type}_config.json
* @param language Language code for recognition (e.g., "en", "zh", "ja", "ko")
* Use "auto" for automatic language detection if supported
*
* @return AX_WHISPER_HANDLE Opaque handle to the initialized Whisper context,
* or NULL if initialization fails
*
* @note The caller is responsible for calling AX_WHISPER_Uninit() to free
* resources when the handle is no longer needed.
* @note If language is not supported by the model, the function may fall back
* to a default language or return NULL.
* @example
* // Initialize English recognition with base model
* AX_WHISPER_HANDLE handle = AX_WHISPER_Init("base", "../models-ax650", "en");
*
*/
AX_WHISPER_API AX_WHISPER_HANDLE AX_WHISPER_Init(const char* model_type, const char* model_path, const char* language);
/**
* @brief Deinitialize and release Whisper ASR resources
*
* Cleans up all resources associated with the Whisper context, including
* unloading models, freeing memory, and releasing hardware resources.
*
* @param handle Whisper context handle obtained from AX_WHISPER_Init()
*
* @warning After calling this function, the handle becomes invalid and
* should not be used in any subsequent API calls.
*/
AX_WHISPER_API void AX_WHISPER_Uninit(AX_WHISPER_HANDLE handle);
/**
* @brief Perform speech recognition and return dynamically allocated string
*
* @param handle Whisper context handle
* @param wav_file Path to the input 16k pcmf32 WAV audio file
* @param result Pointer to receive the allocated result string
*
* @return int Status code (0 = success, <0 = error)
*
* @note The returned string is allocated with malloc() and must be freed
* by the caller using free() when no longer needed.
*/
AX_WHISPER_API int AX_WHISPER_RunFile(AX_WHISPER_HANDLE handle,
const char* wav_file,
char** result);
/**
* @brief Perform speech recognition and return dynamically allocated string
*
* @param handle Whisper context handle
* @param pcm_data 16k Mono PCM f32 data, range from -1.0 to 1.0
* @param num_samples Sample num of PCM data
* @param result Pointer to receive the allocated result string
*
* @return int Status code (0 = success, <0 = error)
*
* @note The returned string is allocated with malloc() and must be freed
* by the caller using free() when no longer needed.
*/
AX_WHISPER_API int AX_WHISPER_RunPCM(AX_WHISPER_HANDLE handle,
float* pcm_data,
int num_samples,
char** result);
#ifdef __cplusplus
}
#endif
#endif // _AX_WHISPER_API_H_ |