// // SPDX-FileCopyrightText: Hadad // SPDX-License-Identifier: Apache-2.0 // #ifndef POCKET_TTS_AUDIO_PROCESSOR_HPP #define POCKET_TTS_AUDIO_PROCESSOR_HPP #include "memory_pool.hpp" #include #include #include #include namespace pocket_tts_accelerator { struct WavFileHeader { char riff_marker[4]; std::uint32_t file_size; char wave_marker[4]; char format_marker[4]; std::uint32_t format_chunk_size; std::uint16_t audio_format; std::uint16_t number_of_channels; std::uint32_t sample_rate; std::uint32_t byte_rate; std::uint16_t block_align; std::uint16_t bits_per_sample; char data_marker[4]; std::uint32_t data_size; }; struct AudioData { std::vector samples; std::uint32_t sample_rate; std::uint16_t number_of_channels; std::uint16_t bits_per_sample; bool is_valid; std::string error_message; }; struct AudioProcessingResult { std::vector processed_samples; std::uint32_t output_sample_rate; bool success; std::string error_message; }; class AudioProcessor { public: explicit AudioProcessor(MemoryPool& shared_memory_pool); ~AudioProcessor(); AudioProcessor(const AudioProcessor&) = delete; AudioProcessor& operator=(const AudioProcessor&) = delete; AudioData read_wav_file(const std::string& file_path); bool write_wav_file(const std::string& file_path, const AudioData& audio_data); AudioProcessingResult convert_to_mono(const AudioData& input_audio); AudioProcessingResult convert_to_pcm_int16(const AudioData& input_audio); AudioProcessingResult resample_audio(const AudioData& input_audio, std::uint32_t target_sample_rate); AudioProcessingResult normalize_audio(const AudioData& input_audio, float target_peak_level); AudioProcessingResult process_audio_for_voice_cloning( const std::string& input_file_path, const std::string& output_file_path ); static bool validate_wav_header(const WavFileHeader& header); static std::size_t calculate_audio_duration_milliseconds(const AudioData& audio_data); private: void convert_float32_to_int16(const float* input, std::int16_t* output, std::size_t sample_count); void convert_int32_to_int16(const std::int32_t* input, std::int16_t* output, std::size_t sample_count); void convert_uint8_to_int16(const std::uint8_t* input, std::int16_t* output, std::size_t sample_count); void mix_channels_to_mono(const std::int16_t* input, std::int16_t* output, std::size_t frame_count, std::uint16_t channel_count); MemoryPool& memory_pool; }; } #endif