Spaces:
Runtime error
Runtime error
File size: 2,675 Bytes
a57f260 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
//
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
// SPDX-License-Identifier: Apache-2.0
//
#ifndef POCKET_TTS_AUDIO_PROCESSOR_HPP
#define POCKET_TTS_AUDIO_PROCESSOR_HPP
#include "memory_pool.hpp"
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
namespace pocket_tts_accelerator {
struct WavFileHeader {
char riff_marker[4];
std::uint32_t file_size;
char wave_marker[4];
char format_marker[4];
std::uint32_t format_chunk_size;
std::uint16_t audio_format;
std::uint16_t number_of_channels;
std::uint32_t sample_rate;
std::uint32_t byte_rate;
std::uint16_t block_align;
std::uint16_t bits_per_sample;
char data_marker[4];
std::uint32_t data_size;
};
struct AudioData {
std::vector<std::int16_t> samples;
std::uint32_t sample_rate;
std::uint16_t number_of_channels;
std::uint16_t bits_per_sample;
bool is_valid;
std::string error_message;
};
struct AudioProcessingResult {
std::vector<std::int16_t> processed_samples;
std::uint32_t output_sample_rate;
bool success;
std::string error_message;
};
class AudioProcessor {
public:
explicit AudioProcessor(MemoryPool& shared_memory_pool);
~AudioProcessor();
AudioProcessor(const AudioProcessor&) = delete;
AudioProcessor& operator=(const AudioProcessor&) = delete;
AudioData read_wav_file(const std::string& file_path);
bool write_wav_file(const std::string& file_path, const AudioData& audio_data);
AudioProcessingResult convert_to_mono(const AudioData& input_audio);
AudioProcessingResult convert_to_pcm_int16(const AudioData& input_audio);
AudioProcessingResult resample_audio(const AudioData& input_audio, std::uint32_t target_sample_rate);
AudioProcessingResult normalize_audio(const AudioData& input_audio, float target_peak_level);
AudioProcessingResult process_audio_for_voice_cloning(
const std::string& input_file_path,
const std::string& output_file_path
);
static bool validate_wav_header(const WavFileHeader& header);
static std::size_t calculate_audio_duration_milliseconds(const AudioData& audio_data);
private:
void convert_float32_to_int16(const float* input, std::int16_t* output, std::size_t sample_count);
void convert_int32_to_int16(const std::int32_t* input, std::int16_t* output, std::size_t sample_count);
void convert_uint8_to_int16(const std::uint8_t* input, std::int16_t* output, std::size_t sample_count);
void mix_channels_to_mono(const std::int16_t* input, std::int16_t* output, std::size_t frame_count, std::uint16_t channel_count);
MemoryPool& memory_pool;
};
}
#endif |