|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef POCKET_TTS_AUDIO_PROCESSOR_HPP |
|
|
#define POCKET_TTS_AUDIO_PROCESSOR_HPP |
|
|
|
|
|
#include "memory_pool.hpp" |
|
|
#include <cstddef> |
|
|
#include <cstdint> |
|
|
#include <string> |
|
|
#include <vector> |
|
|
|
|
|
namespace pocket_tts_accelerator { |
|
|
|
|
|
struct WavFileHeader { |
|
|
char riff_marker[4]; |
|
|
std::uint32_t file_size; |
|
|
char wave_marker[4]; |
|
|
char format_marker[4]; |
|
|
std::uint32_t format_chunk_size; |
|
|
std::uint16_t audio_format; |
|
|
std::uint16_t number_of_channels; |
|
|
std::uint32_t sample_rate; |
|
|
std::uint32_t byte_rate; |
|
|
std::uint16_t block_align; |
|
|
std::uint16_t bits_per_sample; |
|
|
char data_marker[4]; |
|
|
std::uint32_t data_size; |
|
|
}; |
|
|
|
|
|
struct AudioData { |
|
|
std::vector<std::int16_t> samples; |
|
|
std::uint32_t sample_rate; |
|
|
std::uint16_t number_of_channels; |
|
|
std::uint16_t bits_per_sample; |
|
|
bool is_valid; |
|
|
std::string error_message; |
|
|
}; |
|
|
|
|
|
struct AudioProcessingResult { |
|
|
std::vector<std::int16_t> processed_samples; |
|
|
std::uint32_t output_sample_rate; |
|
|
bool success; |
|
|
std::string error_message; |
|
|
}; |
|
|
|
|
|
class AudioProcessor { |
|
|
public: |
|
|
explicit AudioProcessor(MemoryPool& shared_memory_pool); |
|
|
~AudioProcessor(); |
|
|
|
|
|
AudioProcessor(const AudioProcessor&) = delete; |
|
|
AudioProcessor& operator=(const AudioProcessor&) = delete; |
|
|
|
|
|
AudioData read_wav_file(const std::string& file_path); |
|
|
bool write_wav_file(const std::string& file_path, const AudioData& audio_data); |
|
|
|
|
|
AudioProcessingResult convert_to_mono(const AudioData& input_audio); |
|
|
AudioProcessingResult convert_to_pcm_int16(const AudioData& input_audio); |
|
|
AudioProcessingResult resample_audio(const AudioData& input_audio, std::uint32_t target_sample_rate); |
|
|
AudioProcessingResult normalize_audio(const AudioData& input_audio, float target_peak_level); |
|
|
|
|
|
AudioProcessingResult process_audio_for_voice_cloning( |
|
|
const std::string& input_file_path, |
|
|
const std::string& output_file_path |
|
|
); |
|
|
|
|
|
static bool validate_wav_header(const WavFileHeader& header); |
|
|
static std::size_t calculate_audio_duration_milliseconds(const AudioData& audio_data); |
|
|
|
|
|
private: |
|
|
void convert_float32_to_int16(const float* input, std::int16_t* output, std::size_t sample_count); |
|
|
void convert_int32_to_int16(const std::int32_t* input, std::int16_t* output, std::size_t sample_count); |
|
|
void convert_uint8_to_int16(const std::uint8_t* input, std::int16_t* output, std::size_t sample_count); |
|
|
void mix_channels_to_mono(const std::int16_t* input, std::int16_t* output, std::size_t frame_count, std::uint16_t channel_count); |
|
|
|
|
|
MemoryPool& memory_pool; |
|
|
}; |
|
|
|
|
|
} |
|
|
|
|
|
#endif |