File size: 4,161 Bytes
798e40d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/**
 * @file ax_whisper_api.h
 * @brief AX Whisper API header - C-compatible interface for Whisper ASR system
 * @note This header provides a C interface to the Whisper speech recognition system
 */

#ifndef _AX_WHISPER_API_H_
#define _AX_WHISPER_API_H_

#ifdef __cplusplus
extern "C" {
#endif

#define AX_WHISPER_API __attribute__((visibility("default")))

/**
 * @brief Opaque handle type for Whisper ASR context
 * 
 * This handle encapsulates all internal state of the Whisper ASR system.
 * The actual implementation is hidden from C callers to maintain ABI stability.
 */
typedef void* AX_WHISPER_HANDLE;

/**
 * @brief Initialize the Whisper ASR system with specific configuration
 * 
 * Creates and initializes a new Whisper ASR context with the specified
 * model type, model path, and language. This function loads the appropriate
 * models, configures the recognizer, and prepares it for speech recognition.
 * 
 * @param model_type Type of Whisper model to use (e.g., "tiny", "base", "small", "medium", "large")
 *                   or custom model identifier
 * @param model_path Directory path where model files are stored
 *                   Model files are expected to be in the format:
 *                   - {model_path}/{model_type}/{model_type}-encoder.axmodel
 *                   - {model_path}/{model_type}/{model_type}-decoder.axmodel
 *                   - {model_path}/{model_type}/{model_type}-tokens.txt
 *                   - {model_path}/{model_type}/{model_type}_config.json
 * @param language Language code for recognition (e.g., "en", "zh", "ja", "ko")
 *                 Use "auto" for automatic language detection if supported
 * 
 * @return AX_WHISPER_HANDLE Opaque handle to the initialized Whisper context,
 *         or NULL if initialization fails
 * 
 * @note The caller is responsible for calling AX_WHISPER_Uninit() to free
 *       resources when the handle is no longer needed.
 * @note If language is not supported by the model, the function may fall back
 *       to a default language or return NULL.
 * @example
 *   // Initialize English recognition with base model
 *   AX_WHISPER_HANDLE handle = AX_WHISPER_Init("base", "../models-ax650", "en");
 *   
 */
AX_WHISPER_API AX_WHISPER_HANDLE AX_WHISPER_Init(const char* model_type, const char* model_path, const char* language);

/**
 * @brief Deinitialize and release Whisper ASR resources
 * 
 * Cleans up all resources associated with the Whisper context, including
 * unloading models, freeing memory, and releasing hardware resources.
 * 
 * @param handle Whisper context handle obtained from AX_WHISPER_Init()
 * 
 * @warning After calling this function, the handle becomes invalid and
 *          should not be used in any subsequent API calls.
 */
AX_WHISPER_API void AX_WHISPER_Uninit(AX_WHISPER_HANDLE handle);

/**
 * @brief Perform speech recognition and return dynamically allocated string
 * 
 * @param handle Whisper context handle
 * @param wav_file Path to the input 16k pcmf32 WAV audio file
 * @param result Pointer to receive the allocated result string
 * 
 * @return int Status code (0 = success, <0 = error)
 * 
 * @note The returned string is allocated with malloc() and must be freed
 *       by the caller using free() when no longer needed.
 */
AX_WHISPER_API int AX_WHISPER_RunFile(AX_WHISPER_HANDLE handle, 
                   const char* wav_file, 
                   char** result);

/**
 * @brief Perform speech recognition and return dynamically allocated string
 * 
 * @param handle Whisper context handle
 * @param pcm_data 16k Mono PCM f32 data, range from -1.0 to 1.0
 * @param num_samples Sample num of PCM data
 * @param result Pointer to receive the allocated result string
 * 
 * @return int Status code (0 = success, <0 = error)
 * 
 * @note The returned string is allocated with malloc() and must be freed
 *       by the caller using free() when no longer needed.
 */
AX_WHISPER_API int AX_WHISPER_RunPCM(AX_WHISPER_HANDLE handle, 
                   float* pcm_data, 
                   int num_samples,
                   char** result);                   

#ifdef __cplusplus
}
#endif

#endif // _AX_WHISPER_API_H_