Pocket TTS: Switch to simple demo.
Browse files- .dockerignore +0 -3
- Dockerfile +1 -16
- README.md +3 -0
- accelerator/CMakeLists.txt +0 -36
- accelerator/include/accelerator_core.hpp +0 -69
- accelerator/include/audio_processor.hpp +0 -84
- accelerator/include/ipc_handler.hpp +0 -107
- accelerator/include/memory_pool.hpp +0 -79
- accelerator/include/thread_pool.hpp +0 -83
- accelerator/src/accelerator_core.cpp +0 -558
- accelerator/src/audio_processor.cpp +0 -352
- accelerator/src/ipc_handler.cpp +0 -226
- accelerator/src/main.cpp +0 -83
- accelerator/src/memory_pool.cpp +0 -216
- accelerator/src/thread_pool.cpp +0 -84
- app.py +0 -372
- assets/css/styles.py +0 -161
- assets/static/footer.py +0 -32
- assets/static/header.py +0 -18
- assets/static/sidebar.py +0 -44
- assets/static/title.py +0 -15
- config.py +0 -126
- src/accelerator/client.py +0 -583
- src/audio/converter.py +0 -344
- src/audio/validator.py +0 -268
- src/core/authentication.py +0 -23
- src/core/memory.py +0 -394
- src/core/state.py +0 -147
- src/generation/handler.py +0 -309
- src/tts/manager.py +0 -341
- src/ui/handlers.py +0 -58
- src/ui/state.py +0 -43
- src/validation/text.py +0 -20
.dockerignore
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
Dockerfile
|
| 2 |
-
LICENSE
|
| 3 |
-
README.md
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile
CHANGED
|
@@ -3,19 +3,4 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
FROM hadadrjt/pocket-tts:hf-
|
| 7 |
-
|
| 8 |
-
WORKDIR /app
|
| 9 |
-
|
| 10 |
-
COPY . .
|
| 11 |
-
|
| 12 |
-
RUN mkdir build \
|
| 13 |
-
&& cd build \
|
| 14 |
-
&& cmake \
|
| 15 |
-
-DCMAKE_BUILD_TYPE=Release \
|
| 16 |
-
-DCMAKE_INSTALL_PREFIX=/app \
|
| 17 |
-
../accelerator \
|
| 18 |
-
&& make -j$(nproc) \
|
| 19 |
-
&& make install \
|
| 20 |
-
&& cd .. \
|
| 21 |
-
&& rm -rf accelerator build
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
FROM hadadrjt/pocket-tts:hf-simple-demo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -8,4 +8,7 @@ colorTo: yellow
|
|
| 8 |
sdk: docker
|
| 9 |
app_port: 7860
|
| 10 |
pinned: false
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
|
|
|
| 8 |
sdk: docker
|
| 9 |
app_port: 7860
|
| 10 |
pinned: false
|
| 11 |
+
models:
|
| 12 |
+
- kyutai/pocket-tts
|
| 13 |
+
- kyutai/tts-voices
|
| 14 |
---
|
accelerator/CMakeLists.txt
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
cmake_minimum_required(VERSION 3.31.6)
|
| 7 |
-
|
| 8 |
-
project(pocket_tts_accelerator VERSION 0.0.0 LANGUAGES CXX)
|
| 9 |
-
|
| 10 |
-
set(CMAKE_CXX_STANDARD 17)
|
| 11 |
-
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
| 12 |
-
set(CMAKE_CXX_EXTENSIONS OFF)
|
| 13 |
-
|
| 14 |
-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -ffast-math -funroll-loops")
|
| 15 |
-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic")
|
| 16 |
-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
| 17 |
-
|
| 18 |
-
find_package(Threads REQUIRED)
|
| 19 |
-
|
| 20 |
-
set(ACCELERATOR_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
| 21 |
-
|
| 22 |
-
set(ACCELERATOR_SOURCES
|
| 23 |
-
src/main.cpp
|
| 24 |
-
src/accelerator_core.cpp
|
| 25 |
-
src/audio_processor.cpp
|
| 26 |
-
src/ipc_handler.cpp
|
| 27 |
-
src/memory_pool.cpp
|
| 28 |
-
src/thread_pool.cpp
|
| 29 |
-
)
|
| 30 |
-
|
| 31 |
-
add_executable(pocket_tts_accelerator ${ACCELERATOR_SOURCES})
|
| 32 |
-
|
| 33 |
-
target_include_directories(pocket_tts_accelerator PRIVATE ${ACCELERATOR_INCLUDE_DIR})
|
| 34 |
-
target_link_libraries(pocket_tts_accelerator PRIVATE Threads::Threads)
|
| 35 |
-
|
| 36 |
-
install(TARGETS pocket_tts_accelerator DESTINATION bin)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/include/accelerator_core.hpp
DELETED
|
@@ -1,69 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#ifndef POCKET_TTS_ACCELERATOR_CORE_HPP
|
| 7 |
-
#define POCKET_TTS_ACCELERATOR_CORE_HPP
|
| 8 |
-
|
| 9 |
-
#include "audio_processor.hpp"
|
| 10 |
-
#include "ipc_handler.hpp"
|
| 11 |
-
#include "memory_pool.hpp"
|
| 12 |
-
#include "thread_pool.hpp"
|
| 13 |
-
#include <atomic>
|
| 14 |
-
#include <memory>
|
| 15 |
-
#include <string>
|
| 16 |
-
|
| 17 |
-
namespace pocket_tts_accelerator {
|
| 18 |
-
|
| 19 |
-
struct AcceleratorConfiguration {
|
| 20 |
-
std::size_t number_of_worker_threads;
|
| 21 |
-
std::size_t memory_pool_size_bytes;
|
| 22 |
-
std::string ipc_socket_path;
|
| 23 |
-
bool enable_verbose_logging;
|
| 24 |
-
};
|
| 25 |
-
|
| 26 |
-
class AcceleratorCore {
|
| 27 |
-
public:
|
| 28 |
-
explicit AcceleratorCore(const AcceleratorConfiguration& configuration);
|
| 29 |
-
~AcceleratorCore();
|
| 30 |
-
|
| 31 |
-
AcceleratorCore(const AcceleratorCore&) = delete;
|
| 32 |
-
AcceleratorCore& operator=(const AcceleratorCore&) = delete;
|
| 33 |
-
|
| 34 |
-
bool initialize();
|
| 35 |
-
void run();
|
| 36 |
-
void shutdown();
|
| 37 |
-
|
| 38 |
-
bool is_running() const;
|
| 39 |
-
std::string get_status_string() const;
|
| 40 |
-
|
| 41 |
-
static AcceleratorConfiguration get_default_configuration();
|
| 42 |
-
|
| 43 |
-
private:
|
| 44 |
-
void register_all_command_handlers();
|
| 45 |
-
void setup_signal_handlers();
|
| 46 |
-
|
| 47 |
-
std::vector<std::uint8_t> handle_ping_command(const std::vector<std::uint8_t>& payload);
|
| 48 |
-
std::vector<std::uint8_t> handle_process_audio_command(const std::vector<std::uint8_t>& payload);
|
| 49 |
-
std::vector<std::uint8_t> handle_convert_to_mono_command(const std::vector<std::uint8_t>& payload);
|
| 50 |
-
std::vector<std::uint8_t> handle_convert_to_pcm_command(const std::vector<std::uint8_t>& payload);
|
| 51 |
-
std::vector<std::uint8_t> handle_resample_audio_command(const std::vector<std::uint8_t>& payload);
|
| 52 |
-
std::vector<std::uint8_t> handle_get_memory_stats_command(const std::vector<std::uint8_t>& payload);
|
| 53 |
-
std::vector<std::uint8_t> handle_clear_memory_pool_command(const std::vector<std::uint8_t>& payload);
|
| 54 |
-
std::vector<std::uint8_t> handle_shutdown_command(const std::vector<std::uint8_t>& payload);
|
| 55 |
-
|
| 56 |
-
void log_message(const std::string& message) const;
|
| 57 |
-
|
| 58 |
-
AcceleratorConfiguration config;
|
| 59 |
-
std::unique_ptr<MemoryPool> memory_pool;
|
| 60 |
-
std::unique_ptr<ThreadPool> thread_pool;
|
| 61 |
-
std::unique_ptr<AudioProcessor> audio_processor;
|
| 62 |
-
std::unique_ptr<IpcHandler> ipc_handler;
|
| 63 |
-
std::atomic<bool> is_initialized;
|
| 64 |
-
std::atomic<bool> should_shutdown;
|
| 65 |
-
};
|
| 66 |
-
|
| 67 |
-
}
|
| 68 |
-
|
| 69 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/include/audio_processor.hpp
DELETED
|
@@ -1,84 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#ifndef POCKET_TTS_AUDIO_PROCESSOR_HPP
|
| 7 |
-
#define POCKET_TTS_AUDIO_PROCESSOR_HPP
|
| 8 |
-
|
| 9 |
-
#include "memory_pool.hpp"
|
| 10 |
-
#include <cstddef>
|
| 11 |
-
#include <cstdint>
|
| 12 |
-
#include <string>
|
| 13 |
-
#include <vector>
|
| 14 |
-
|
| 15 |
-
namespace pocket_tts_accelerator {
|
| 16 |
-
|
| 17 |
-
struct WavFileHeader {
|
| 18 |
-
char riff_marker[4];
|
| 19 |
-
std::uint32_t file_size;
|
| 20 |
-
char wave_marker[4];
|
| 21 |
-
char format_marker[4];
|
| 22 |
-
std::uint32_t format_chunk_size;
|
| 23 |
-
std::uint16_t audio_format;
|
| 24 |
-
std::uint16_t number_of_channels;
|
| 25 |
-
std::uint32_t sample_rate;
|
| 26 |
-
std::uint32_t byte_rate;
|
| 27 |
-
std::uint16_t block_align;
|
| 28 |
-
std::uint16_t bits_per_sample;
|
| 29 |
-
char data_marker[4];
|
| 30 |
-
std::uint32_t data_size;
|
| 31 |
-
};
|
| 32 |
-
|
| 33 |
-
struct AudioData {
|
| 34 |
-
std::vector<std::int16_t> samples;
|
| 35 |
-
std::uint32_t sample_rate;
|
| 36 |
-
std::uint16_t number_of_channels;
|
| 37 |
-
std::uint16_t bits_per_sample;
|
| 38 |
-
bool is_valid;
|
| 39 |
-
std::string error_message;
|
| 40 |
-
};
|
| 41 |
-
|
| 42 |
-
struct AudioProcessingResult {
|
| 43 |
-
std::vector<std::int16_t> processed_samples;
|
| 44 |
-
std::uint32_t output_sample_rate;
|
| 45 |
-
bool success;
|
| 46 |
-
std::string error_message;
|
| 47 |
-
};
|
| 48 |
-
|
| 49 |
-
class AudioProcessor {
|
| 50 |
-
public:
|
| 51 |
-
explicit AudioProcessor(MemoryPool& shared_memory_pool);
|
| 52 |
-
~AudioProcessor();
|
| 53 |
-
|
| 54 |
-
AudioProcessor(const AudioProcessor&) = delete;
|
| 55 |
-
AudioProcessor& operator=(const AudioProcessor&) = delete;
|
| 56 |
-
|
| 57 |
-
AudioData read_wav_file(const std::string& file_path);
|
| 58 |
-
bool write_wav_file(const std::string& file_path, const AudioData& audio_data);
|
| 59 |
-
|
| 60 |
-
AudioProcessingResult convert_to_mono(const AudioData& input_audio);
|
| 61 |
-
AudioProcessingResult convert_to_pcm_int16(const AudioData& input_audio);
|
| 62 |
-
AudioProcessingResult resample_audio(const AudioData& input_audio, std::uint32_t target_sample_rate);
|
| 63 |
-
AudioProcessingResult normalize_audio(const AudioData& input_audio, float target_peak_level);
|
| 64 |
-
|
| 65 |
-
AudioProcessingResult process_audio_for_voice_cloning(
|
| 66 |
-
const std::string& input_file_path,
|
| 67 |
-
const std::string& output_file_path
|
| 68 |
-
);
|
| 69 |
-
|
| 70 |
-
static bool validate_wav_header(const WavFileHeader& header);
|
| 71 |
-
static std::size_t calculate_audio_duration_milliseconds(const AudioData& audio_data);
|
| 72 |
-
|
| 73 |
-
private:
|
| 74 |
-
void convert_float32_to_int16(const float* input, std::int16_t* output, std::size_t sample_count);
|
| 75 |
-
void convert_int32_to_int16(const std::int32_t* input, std::int16_t* output, std::size_t sample_count);
|
| 76 |
-
void convert_uint8_to_int16(const std::uint8_t* input, std::int16_t* output, std::size_t sample_count);
|
| 77 |
-
void mix_channels_to_mono(const std::int16_t* input, std::int16_t* output, std::size_t frame_count, std::uint16_t channel_count);
|
| 78 |
-
|
| 79 |
-
MemoryPool& memory_pool;
|
| 80 |
-
};
|
| 81 |
-
|
| 82 |
-
}
|
| 83 |
-
|
| 84 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/include/ipc_handler.hpp
DELETED
|
@@ -1,107 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#ifndef POCKET_TTS_IPC_HANDLER_HPP
|
| 7 |
-
#define POCKET_TTS_IPC_HANDLER_HPP
|
| 8 |
-
|
| 9 |
-
#include <atomic>
|
| 10 |
-
#include <cstddef>
|
| 11 |
-
#include <cstdint>
|
| 12 |
-
#include <functional>
|
| 13 |
-
#include <memory>
|
| 14 |
-
#include <mutex>
|
| 15 |
-
#include <string>
|
| 16 |
-
#include <thread>
|
| 17 |
-
#include <vector>
|
| 18 |
-
|
| 19 |
-
namespace pocket_tts_accelerator {
|
| 20 |
-
|
| 21 |
-
enum class CommandType : std::uint32_t {
|
| 22 |
-
PING = 0,
|
| 23 |
-
PROCESS_AUDIO = 1,
|
| 24 |
-
CONVERT_TO_MONO = 2,
|
| 25 |
-
CONVERT_TO_PCM = 3,
|
| 26 |
-
RESAMPLE_AUDIO = 4,
|
| 27 |
-
GET_MEMORY_STATS = 5,
|
| 28 |
-
CLEAR_MEMORY_POOL = 6,
|
| 29 |
-
SHUTDOWN = 7,
|
| 30 |
-
UNKNOWN = 255
|
| 31 |
-
};
|
| 32 |
-
|
| 33 |
-
enum class ResponseStatus : std::uint32_t {
|
| 34 |
-
SUCCESS = 0,
|
| 35 |
-
ERROR_INVALID_COMMAND = 1,
|
| 36 |
-
ERROR_FILE_NOT_FOUND = 2,
|
| 37 |
-
ERROR_PROCESSING_FAILED = 3,
|
| 38 |
-
ERROR_MEMORY_ALLOCATION = 4,
|
| 39 |
-
ERROR_INTERNAL = 5
|
| 40 |
-
};
|
| 41 |
-
|
| 42 |
-
struct RequestHeader {
|
| 43 |
-
std::uint32_t magic_number;
|
| 44 |
-
std::uint32_t command_type;
|
| 45 |
-
std::uint32_t payload_size;
|
| 46 |
-
std::uint32_t request_id;
|
| 47 |
-
};
|
| 48 |
-
|
| 49 |
-
struct ResponseHeader {
|
| 50 |
-
std::uint32_t magic_number;
|
| 51 |
-
std::uint32_t status_code;
|
| 52 |
-
std::uint32_t payload_size;
|
| 53 |
-
std::uint32_t request_id;
|
| 54 |
-
};
|
| 55 |
-
|
| 56 |
-
struct ProcessAudioRequest {
|
| 57 |
-
char input_file_path[512];
|
| 58 |
-
char output_file_path[512];
|
| 59 |
-
std::uint32_t target_sample_rate;
|
| 60 |
-
std::uint32_t options_flags;
|
| 61 |
-
};
|
| 62 |
-
|
| 63 |
-
struct MemoryStatsResponse {
|
| 64 |
-
std::uint64_t total_allocated_bytes;
|
| 65 |
-
std::uint64_t total_used_bytes;
|
| 66 |
-
std::uint64_t block_count;
|
| 67 |
-
};
|
| 68 |
-
|
| 69 |
-
class IpcHandler {
|
| 70 |
-
public:
|
| 71 |
-
using CommandHandlerFunction = std::function<std::vector<std::uint8_t>(const std::vector<std::uint8_t>&)>;
|
| 72 |
-
|
| 73 |
-
explicit IpcHandler(const std::string& socket_path);
|
| 74 |
-
~IpcHandler();
|
| 75 |
-
|
| 76 |
-
IpcHandler(const IpcHandler&) = delete;
|
| 77 |
-
IpcHandler& operator=(const IpcHandler&) = delete;
|
| 78 |
-
|
| 79 |
-
bool start_server();
|
| 80 |
-
void stop_server();
|
| 81 |
-
bool is_running() const;
|
| 82 |
-
|
| 83 |
-
void register_command_handler(CommandType command_type, CommandHandlerFunction handler);
|
| 84 |
-
void set_shutdown_callback(std::function<void()> callback);
|
| 85 |
-
|
| 86 |
-
static constexpr std::uint32_t PROTOCOL_MAGIC_NUMBER = 0x50545453;
|
| 87 |
-
static constexpr std::size_t MAXIMUM_PAYLOAD_SIZE = 16 * 1024 * 1024;
|
| 88 |
-
static constexpr int CONNECTION_BACKLOG = 5;
|
| 89 |
-
|
| 90 |
-
private:
|
| 91 |
-
void accept_connections_loop();
|
| 92 |
-
void handle_client_connection(int client_socket_fd);
|
| 93 |
-
bool send_response(int socket_fd, const ResponseHeader& header, const std::vector<std::uint8_t>& payload);
|
| 94 |
-
bool receive_request(int socket_fd, RequestHeader& header, std::vector<std::uint8_t>& payload);
|
| 95 |
-
|
| 96 |
-
std::string socket_file_path;
|
| 97 |
-
int server_socket_fd;
|
| 98 |
-
std::atomic<bool> is_server_running;
|
| 99 |
-
std::thread accept_thread;
|
| 100 |
-
std::mutex handlers_mutex;
|
| 101 |
-
std::unordered_map<CommandType, CommandHandlerFunction> command_handlers;
|
| 102 |
-
std::function<void()> shutdown_callback;
|
| 103 |
-
};
|
| 104 |
-
|
| 105 |
-
}
|
| 106 |
-
|
| 107 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/include/memory_pool.hpp
DELETED
|
@@ -1,79 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#ifndef POCKET_TTS_MEMORY_POOL_HPP
|
| 7 |
-
#define POCKET_TTS_MEMORY_POOL_HPP
|
| 8 |
-
|
| 9 |
-
#include <atomic>
|
| 10 |
-
#include <cstddef>
|
| 11 |
-
#include <cstdint>
|
| 12 |
-
#include <memory>
|
| 13 |
-
#include <mutex>
|
| 14 |
-
#include <unordered_map>
|
| 15 |
-
#include <vector>
|
| 16 |
-
|
| 17 |
-
namespace pocket_tts_accelerator {
|
| 18 |
-
|
| 19 |
-
struct MemoryBlock {
|
| 20 |
-
std::unique_ptr<std::uint8_t[]> data;
|
| 21 |
-
std::size_t block_size;
|
| 22 |
-
bool is_in_use;
|
| 23 |
-
std::uint64_t last_access_timestamp;
|
| 24 |
-
};
|
| 25 |
-
|
| 26 |
-
class MemoryPool {
|
| 27 |
-
public:
|
| 28 |
-
explicit MemoryPool(std::size_t initial_pool_size_bytes = 64 * 1024 * 1024);
|
| 29 |
-
~MemoryPool();
|
| 30 |
-
|
| 31 |
-
MemoryPool(const MemoryPool&) = delete;
|
| 32 |
-
MemoryPool& operator=(const MemoryPool&) = delete;
|
| 33 |
-
MemoryPool(MemoryPool&&) = delete;
|
| 34 |
-
MemoryPool& operator=(MemoryPool&&) = delete;
|
| 35 |
-
|
| 36 |
-
std::uint8_t* allocate(std::size_t requested_size_bytes);
|
| 37 |
-
void deallocate(std::uint8_t* pointer);
|
| 38 |
-
void clear_unused_blocks();
|
| 39 |
-
void reset_pool();
|
| 40 |
-
|
| 41 |
-
std::size_t get_total_allocated_bytes() const;
|
| 42 |
-
std::size_t get_total_used_bytes() const;
|
| 43 |
-
std::size_t get_block_count() const;
|
| 44 |
-
|
| 45 |
-
private:
|
| 46 |
-
std::size_t find_suitable_block_index(std::size_t requested_size) const;
|
| 47 |
-
void create_new_block(std::size_t block_size);
|
| 48 |
-
std::uint64_t get_current_timestamp() const;
|
| 49 |
-
|
| 50 |
-
std::vector<MemoryBlock> memory_blocks;
|
| 51 |
-
std::unordered_map<std::uint8_t*, std::size_t> pointer_to_block_index;
|
| 52 |
-
mutable std::mutex pool_mutex;
|
| 53 |
-
std::size_t total_allocated_bytes;
|
| 54 |
-
std::size_t total_used_bytes;
|
| 55 |
-
std::size_t maximum_pool_size_bytes;
|
| 56 |
-
};
|
| 57 |
-
|
| 58 |
-
class ScopedMemoryAllocation {
|
| 59 |
-
public:
|
| 60 |
-
ScopedMemoryAllocation(MemoryPool& pool, std::size_t size);
|
| 61 |
-
~ScopedMemoryAllocation();
|
| 62 |
-
|
| 63 |
-
ScopedMemoryAllocation(const ScopedMemoryAllocation&) = delete;
|
| 64 |
-
ScopedMemoryAllocation& operator=(const ScopedMemoryAllocation&) = delete;
|
| 65 |
-
ScopedMemoryAllocation(ScopedMemoryAllocation&& other) noexcept;
|
| 66 |
-
ScopedMemoryAllocation& operator=(ScopedMemoryAllocation&& other) noexcept;
|
| 67 |
-
|
| 68 |
-
std::uint8_t* get() const;
|
| 69 |
-
std::size_t size() const;
|
| 70 |
-
|
| 71 |
-
private:
|
| 72 |
-
MemoryPool* memory_pool_pointer;
|
| 73 |
-
std::uint8_t* allocated_pointer;
|
| 74 |
-
std::size_t allocation_size;
|
| 75 |
-
};
|
| 76 |
-
|
| 77 |
-
}
|
| 78 |
-
|
| 79 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/include/thread_pool.hpp
DELETED
|
@@ -1,83 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#ifndef POCKET_TTS_THREAD_POOL_HPP
|
| 7 |
-
#define POCKET_TTS_THREAD_POOL_HPP
|
| 8 |
-
|
| 9 |
-
#include <atomic>
|
| 10 |
-
#include <condition_variable>
|
| 11 |
-
#include <functional>
|
| 12 |
-
#include <future>
|
| 13 |
-
#include <memory>
|
| 14 |
-
#include <mutex>
|
| 15 |
-
#include <queue>
|
| 16 |
-
#include <thread>
|
| 17 |
-
#include <vector>
|
| 18 |
-
|
| 19 |
-
namespace pocket_tts_accelerator {
|
| 20 |
-
|
| 21 |
-
class ThreadPool {
|
| 22 |
-
public:
|
| 23 |
-
explicit ThreadPool(std::size_t number_of_threads);
|
| 24 |
-
~ThreadPool();
|
| 25 |
-
|
| 26 |
-
ThreadPool(const ThreadPool&) = delete;
|
| 27 |
-
ThreadPool& operator=(const ThreadPool&) = delete;
|
| 28 |
-
ThreadPool(ThreadPool&&) = delete;
|
| 29 |
-
ThreadPool& operator=(ThreadPool&&) = delete;
|
| 30 |
-
|
| 31 |
-
template<typename FunctionType, typename... ArgumentTypes>
|
| 32 |
-
auto submit_task(FunctionType&& function, ArgumentTypes&&... arguments)
|
| 33 |
-
-> std::future<typename std::invoke_result<FunctionType, ArgumentTypes...>::type>;
|
| 34 |
-
|
| 35 |
-
void shutdown();
|
| 36 |
-
bool is_running() const;
|
| 37 |
-
std::size_t get_pending_task_count() const;
|
| 38 |
-
std::size_t get_thread_count() const;
|
| 39 |
-
|
| 40 |
-
private:
|
| 41 |
-
void worker_thread_function();
|
| 42 |
-
|
| 43 |
-
std::vector<std::thread> worker_threads;
|
| 44 |
-
std::queue<std::function<void()>> task_queue;
|
| 45 |
-
mutable std::mutex queue_mutex;
|
| 46 |
-
std::condition_variable task_available_condition;
|
| 47 |
-
std::atomic<bool> should_stop;
|
| 48 |
-
std::atomic<bool> is_stopped;
|
| 49 |
-
std::size_t thread_count;
|
| 50 |
-
};
|
| 51 |
-
|
| 52 |
-
template<typename FunctionType, typename... ArgumentTypes>
|
| 53 |
-
auto ThreadPool::submit_task(FunctionType&& function, ArgumentTypes&&... arguments)
|
| 54 |
-
-> std::future<typename std::invoke_result<FunctionType, ArgumentTypes...>::type> {
|
| 55 |
-
|
| 56 |
-
using ReturnType = typename std::invoke_result<FunctionType, ArgumentTypes...>::type;
|
| 57 |
-
|
| 58 |
-
auto packaged_task = std::make_shared<std::packaged_task<ReturnType()>>(
|
| 59 |
-
std::bind(std::forward<FunctionType>(function), std::forward<ArgumentTypes>(arguments)...)
|
| 60 |
-
);
|
| 61 |
-
|
| 62 |
-
std::future<ReturnType> result_future = packaged_task->get_future();
|
| 63 |
-
|
| 64 |
-
{
|
| 65 |
-
std::unique_lock<std::mutex> lock(queue_mutex);
|
| 66 |
-
|
| 67 |
-
if (should_stop.load()) {
|
| 68 |
-
throw std::runtime_error("Cannot submit task to stopped thread pool");
|
| 69 |
-
}
|
| 70 |
-
|
| 71 |
-
task_queue.emplace([packaged_task]() {
|
| 72 |
-
(*packaged_task)();
|
| 73 |
-
});
|
| 74 |
-
}
|
| 75 |
-
|
| 76 |
-
task_available_condition.notify_one();
|
| 77 |
-
|
| 78 |
-
return result_future;
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
-
}
|
| 82 |
-
|
| 83 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/src/accelerator_core.cpp
DELETED
|
@@ -1,558 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#include "accelerator_core.hpp"
|
| 7 |
-
#include <chrono>
|
| 8 |
-
#include <cstring>
|
| 9 |
-
#include <ctime>
|
| 10 |
-
#include <iomanip>
|
| 11 |
-
#include <iostream>
|
| 12 |
-
#include <sstream>
|
| 13 |
-
#include <signal.h>
|
| 14 |
-
|
| 15 |
-
namespace pocket_tts_accelerator {
|
| 16 |
-
|
| 17 |
-
static AcceleratorCore* global_accelerator_instance = nullptr;
|
| 18 |
-
static volatile sig_atomic_t last_received_signal = 0;
|
| 19 |
-
|
| 20 |
-
static void signal_handler_function(int signal_number) {
|
| 21 |
-
last_received_signal = signal_number;
|
| 22 |
-
if (global_accelerator_instance != nullptr) {
|
| 23 |
-
global_accelerator_instance->shutdown();
|
| 24 |
-
}
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
AcceleratorCore::AcceleratorCore(const AcceleratorConfiguration& configuration)
|
| 28 |
-
: config(configuration)
|
| 29 |
-
, is_initialized(false)
|
| 30 |
-
, should_shutdown(false) {
|
| 31 |
-
}
|
| 32 |
-
|
| 33 |
-
AcceleratorCore::~AcceleratorCore() {
|
| 34 |
-
shutdown();
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
bool AcceleratorCore::initialize() {
|
| 38 |
-
if (is_initialized.load()) {
|
| 39 |
-
return true;
|
| 40 |
-
}
|
| 41 |
-
|
| 42 |
-
log_message("Initializing Pocket TTS Accelerator...");
|
| 43 |
-
|
| 44 |
-
memory_pool = std::make_unique<MemoryPool>(config.memory_pool_size_bytes);
|
| 45 |
-
log_message("Memory pool initialized with " + std::to_string(config.memory_pool_size_bytes / (1024 * 1024)) + " MB");
|
| 46 |
-
|
| 47 |
-
thread_pool = std::make_unique<ThreadPool>(config.number_of_worker_threads);
|
| 48 |
-
log_message("Thread pool initialized with " + std::to_string(config.number_of_worker_threads) + " worker threads");
|
| 49 |
-
|
| 50 |
-
audio_processor = std::make_unique<AudioProcessor>(*memory_pool);
|
| 51 |
-
log_message("Audio processor initialized");
|
| 52 |
-
|
| 53 |
-
ipc_handler = std::make_unique<IpcHandler>(config.ipc_socket_path);
|
| 54 |
-
log_message("IPC handler created for socket: " + config.ipc_socket_path);
|
| 55 |
-
|
| 56 |
-
register_all_command_handlers();
|
| 57 |
-
|
| 58 |
-
ipc_handler->set_shutdown_callback([this]() {
|
| 59 |
-
this->shutdown();
|
| 60 |
-
});
|
| 61 |
-
|
| 62 |
-
if (!ipc_handler->start_server()) {
|
| 63 |
-
log_message("ERROR: Failed to start IPC server");
|
| 64 |
-
return false;
|
| 65 |
-
}
|
| 66 |
-
|
| 67 |
-
log_message("IPC server started successfully");
|
| 68 |
-
|
| 69 |
-
global_accelerator_instance = this;
|
| 70 |
-
setup_signal_handlers();
|
| 71 |
-
|
| 72 |
-
is_initialized.store(true);
|
| 73 |
-
log_message("Pocket TTS Accelerator initialized successfully");
|
| 74 |
-
|
| 75 |
-
return true;
|
| 76 |
-
}
|
| 77 |
-
|
| 78 |
-
void AcceleratorCore::run() {
|
| 79 |
-
if (!is_initialized.load()) {
|
| 80 |
-
log_message("ERROR: Accelerator not initialized");
|
| 81 |
-
return;
|
| 82 |
-
}
|
| 83 |
-
|
| 84 |
-
log_message("Accelerator running and waiting for commands...");
|
| 85 |
-
|
| 86 |
-
while (!should_shutdown.load()) {
|
| 87 |
-
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
| 88 |
-
|
| 89 |
-
if (last_received_signal != 0) {
|
| 90 |
-
log_message("Received signal: " + std::to_string(last_received_signal));
|
| 91 |
-
last_received_signal = 0;
|
| 92 |
-
}
|
| 93 |
-
}
|
| 94 |
-
|
| 95 |
-
log_message("Accelerator main loop exited");
|
| 96 |
-
}
|
| 97 |
-
|
| 98 |
-
void AcceleratorCore::shutdown() {
|
| 99 |
-
if (should_shutdown.exchange(true)) {
|
| 100 |
-
return;
|
| 101 |
-
}
|
| 102 |
-
|
| 103 |
-
log_message("Shutting down Pocket TTS Accelerator...");
|
| 104 |
-
|
| 105 |
-
if (ipc_handler) {
|
| 106 |
-
ipc_handler->stop_server();
|
| 107 |
-
log_message("IPC server stopped");
|
| 108 |
-
}
|
| 109 |
-
|
| 110 |
-
if (thread_pool) {
|
| 111 |
-
thread_pool->shutdown();
|
| 112 |
-
log_message("Thread pool shut down");
|
| 113 |
-
}
|
| 114 |
-
|
| 115 |
-
if (memory_pool) {
|
| 116 |
-
memory_pool->reset_pool();
|
| 117 |
-
log_message("Memory pool reset");
|
| 118 |
-
}
|
| 119 |
-
|
| 120 |
-
is_initialized.store(false);
|
| 121 |
-
log_message("Pocket TTS Accelerator shut down complete");
|
| 122 |
-
}
|
| 123 |
-
|
| 124 |
-
bool AcceleratorCore::is_running() const {
|
| 125 |
-
return is_initialized.load() && !should_shutdown.load();
|
| 126 |
-
}
|
| 127 |
-
|
| 128 |
-
std::string AcceleratorCore::get_status_string() const {
|
| 129 |
-
if (!is_initialized.load()) {
|
| 130 |
-
return "Not initialized";
|
| 131 |
-
}
|
| 132 |
-
|
| 133 |
-
if (should_shutdown.load()) {
|
| 134 |
-
return "Shutting down";
|
| 135 |
-
}
|
| 136 |
-
|
| 137 |
-
return "Running";
|
| 138 |
-
}
|
| 139 |
-
|
| 140 |
-
AcceleratorConfiguration AcceleratorCore::get_default_configuration() {
|
| 141 |
-
AcceleratorConfiguration default_config;
|
| 142 |
-
default_config.number_of_worker_threads = 2;
|
| 143 |
-
default_config.memory_pool_size_bytes = 64 * 1024 * 1024;
|
| 144 |
-
default_config.ipc_socket_path = "/tmp/pocket_tts_accelerator.sock";
|
| 145 |
-
default_config.enable_verbose_logging = true;
|
| 146 |
-
return default_config;
|
| 147 |
-
}
|
| 148 |
-
|
| 149 |
-
void AcceleratorCore::register_all_command_handlers() {
|
| 150 |
-
ipc_handler->register_command_handler(
|
| 151 |
-
CommandType::PING,
|
| 152 |
-
[this](const std::vector<std::uint8_t>& payload) {
|
| 153 |
-
return this->handle_ping_command(payload);
|
| 154 |
-
}
|
| 155 |
-
);
|
| 156 |
-
|
| 157 |
-
ipc_handler->register_command_handler(
|
| 158 |
-
CommandType::PROCESS_AUDIO,
|
| 159 |
-
[this](const std::vector<std::uint8_t>& payload) {
|
| 160 |
-
return this->handle_process_audio_command(payload);
|
| 161 |
-
}
|
| 162 |
-
);
|
| 163 |
-
|
| 164 |
-
ipc_handler->register_command_handler(
|
| 165 |
-
CommandType::CONVERT_TO_MONO,
|
| 166 |
-
[this](const std::vector<std::uint8_t>& payload) {
|
| 167 |
-
return this->handle_convert_to_mono_command(payload);
|
| 168 |
-
}
|
| 169 |
-
);
|
| 170 |
-
|
| 171 |
-
ipc_handler->register_command_handler(
|
| 172 |
-
CommandType::CONVERT_TO_PCM,
|
| 173 |
-
[this](const std::vector<std::uint8_t>& payload) {
|
| 174 |
-
return this->handle_convert_to_pcm_command(payload);
|
| 175 |
-
}
|
| 176 |
-
);
|
| 177 |
-
|
| 178 |
-
ipc_handler->register_command_handler(
|
| 179 |
-
CommandType::RESAMPLE_AUDIO,
|
| 180 |
-
[this](const std::vector<std::uint8_t>& payload) {
|
| 181 |
-
return this->handle_resample_audio_command(payload);
|
| 182 |
-
}
|
| 183 |
-
);
|
| 184 |
-
|
| 185 |
-
ipc_handler->register_command_handler(
|
| 186 |
-
CommandType::GET_MEMORY_STATS,
|
| 187 |
-
[this](const std::vector<std::uint8_t>& payload) {
|
| 188 |
-
return this->handle_get_memory_stats_command(payload);
|
| 189 |
-
}
|
| 190 |
-
);
|
| 191 |
-
|
| 192 |
-
ipc_handler->register_command_handler(
|
| 193 |
-
CommandType::CLEAR_MEMORY_POOL,
|
| 194 |
-
[this](const std::vector<std::uint8_t>& payload) {
|
| 195 |
-
return this->handle_clear_memory_pool_command(payload);
|
| 196 |
-
}
|
| 197 |
-
);
|
| 198 |
-
|
| 199 |
-
ipc_handler->register_command_handler(
|
| 200 |
-
CommandType::SHUTDOWN,
|
| 201 |
-
[this](const std::vector<std::uint8_t>& payload) {
|
| 202 |
-
return this->handle_shutdown_command(payload);
|
| 203 |
-
}
|
| 204 |
-
);
|
| 205 |
-
|
| 206 |
-
log_message("All command handlers registered");
|
| 207 |
-
}
|
| 208 |
-
|
| 209 |
-
void AcceleratorCore::setup_signal_handlers() {
|
| 210 |
-
signal(SIGINT, signal_handler_function);
|
| 211 |
-
signal(SIGTERM, signal_handler_function);
|
| 212 |
-
}
|
| 213 |
-
|
| 214 |
-
std::vector<std::uint8_t> AcceleratorCore::handle_ping_command(const std::vector<std::uint8_t>& payload) {
|
| 215 |
-
std::string payload_content;
|
| 216 |
-
if (!payload.empty()) {
|
| 217 |
-
payload_content = std::string(payload.begin(), payload.end());
|
| 218 |
-
log_message("Received PING command with payload: " + payload_content);
|
| 219 |
-
} else {
|
| 220 |
-
log_message("Received PING command");
|
| 221 |
-
}
|
| 222 |
-
|
| 223 |
-
std::string response_message = "PONG";
|
| 224 |
-
if (!payload_content.empty()) {
|
| 225 |
-
response_message += ":" + payload_content;
|
| 226 |
-
}
|
| 227 |
-
|
| 228 |
-
return std::vector<std::uint8_t>(response_message.begin(), response_message.end());
|
| 229 |
-
}
|
| 230 |
-
|
| 231 |
-
std::vector<std::uint8_t> AcceleratorCore::handle_process_audio_command(const std::vector<std::uint8_t>& payload) {
|
| 232 |
-
log_message("Received PROCESS_AUDIO command with payload size: " + std::to_string(payload.size()) + " bytes");
|
| 233 |
-
|
| 234 |
-
if (payload.size() < sizeof(ProcessAudioRequest)) {
|
| 235 |
-
std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
|
| 236 |
-
log_message(error_message);
|
| 237 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 238 |
-
}
|
| 239 |
-
|
| 240 |
-
ProcessAudioRequest request;
|
| 241 |
-
std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
|
| 242 |
-
|
| 243 |
-
std::string input_path(request.input_file_path);
|
| 244 |
-
std::string output_path(request.output_file_path);
|
| 245 |
-
|
| 246 |
-
log_message("Processing audio from: " + input_path + " to: " + output_path);
|
| 247 |
-
|
| 248 |
-
auto future_result = thread_pool->submit_task([this, input_path, output_path]() {
|
| 249 |
-
return this->audio_processor->process_audio_for_voice_cloning(input_path, output_path);
|
| 250 |
-
});
|
| 251 |
-
|
| 252 |
-
AudioProcessingResult result = future_result.get();
|
| 253 |
-
|
| 254 |
-
if (result.success) {
|
| 255 |
-
log_message("Audio processing completed successfully");
|
| 256 |
-
std::string success_message = "SUCCESS:" + output_path;
|
| 257 |
-
return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
|
| 258 |
-
} else {
|
| 259 |
-
log_message("Audio processing failed: " + result.error_message);
|
| 260 |
-
std::string error_message = "ERROR:" + result.error_message;
|
| 261 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 262 |
-
}
|
| 263 |
-
}
|
| 264 |
-
|
| 265 |
-
std::vector<std::uint8_t> AcceleratorCore::handle_convert_to_mono_command(const std::vector<std::uint8_t>& payload) {
|
| 266 |
-
log_message("Received CONVERT_TO_MONO command with payload size: " + std::to_string(payload.size()) + " bytes");
|
| 267 |
-
|
| 268 |
-
if (payload.size() < sizeof(ProcessAudioRequest)) {
|
| 269 |
-
std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
|
| 270 |
-
log_message(error_message);
|
| 271 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 272 |
-
}
|
| 273 |
-
|
| 274 |
-
ProcessAudioRequest request;
|
| 275 |
-
std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
|
| 276 |
-
|
| 277 |
-
std::string input_path(request.input_file_path);
|
| 278 |
-
std::string output_path(request.output_file_path);
|
| 279 |
-
|
| 280 |
-
log_message("Converting to mono from: " + input_path + " to: " + output_path);
|
| 281 |
-
|
| 282 |
-
AudioData audio_data = audio_processor->read_wav_file(input_path);
|
| 283 |
-
|
| 284 |
-
if (!audio_data.is_valid) {
|
| 285 |
-
log_message("Failed to read input file: " + audio_data.error_message);
|
| 286 |
-
std::string error_message = "ERROR:" + audio_data.error_message;
|
| 287 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 288 |
-
}
|
| 289 |
-
|
| 290 |
-
AudioProcessingResult result = audio_processor->convert_to_mono(audio_data);
|
| 291 |
-
|
| 292 |
-
if (!result.success) {
|
| 293 |
-
log_message("Mono conversion failed: " + result.error_message);
|
| 294 |
-
std::string error_message = "ERROR:" + result.error_message;
|
| 295 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 296 |
-
}
|
| 297 |
-
|
| 298 |
-
AudioData output_audio;
|
| 299 |
-
output_audio.samples = std::move(result.processed_samples);
|
| 300 |
-
output_audio.sample_rate = result.output_sample_rate;
|
| 301 |
-
output_audio.number_of_channels = 1;
|
| 302 |
-
output_audio.bits_per_sample = 16;
|
| 303 |
-
output_audio.is_valid = true;
|
| 304 |
-
|
| 305 |
-
if (!audio_processor->write_wav_file(output_path, output_audio)) {
|
| 306 |
-
log_message("Failed to write output file: " + output_path);
|
| 307 |
-
std::string error_message = "ERROR:Failed to write output file";
|
| 308 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 309 |
-
}
|
| 310 |
-
|
| 311 |
-
log_message("Mono conversion completed successfully: " + output_path);
|
| 312 |
-
std::string success_message = "SUCCESS:" + output_path;
|
| 313 |
-
return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
|
| 314 |
-
}
|
| 315 |
-
|
| 316 |
-
std::vector<std::uint8_t> AcceleratorCore::handle_convert_to_pcm_command(const std::vector<std::uint8_t>& payload) {
|
| 317 |
-
log_message("Received CONVERT_TO_PCM command with payload size: " + std::to_string(payload.size()) + " bytes");
|
| 318 |
-
|
| 319 |
-
if (payload.size() < sizeof(ProcessAudioRequest)) {
|
| 320 |
-
std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
|
| 321 |
-
log_message(error_message);
|
| 322 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 323 |
-
}
|
| 324 |
-
|
| 325 |
-
ProcessAudioRequest request;
|
| 326 |
-
std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
|
| 327 |
-
|
| 328 |
-
std::string input_path(request.input_file_path);
|
| 329 |
-
std::string output_path(request.output_file_path);
|
| 330 |
-
|
| 331 |
-
log_message("Converting to PCM from: " + input_path + " to: " + output_path);
|
| 332 |
-
|
| 333 |
-
AudioData audio_data = audio_processor->read_wav_file(input_path);
|
| 334 |
-
|
| 335 |
-
if (!audio_data.is_valid) {
|
| 336 |
-
log_message("Failed to read input file: " + audio_data.error_message);
|
| 337 |
-
std::string error_message = "ERROR:" + audio_data.error_message;
|
| 338 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 339 |
-
}
|
| 340 |
-
|
| 341 |
-
AudioData mono_audio;
|
| 342 |
-
|
| 343 |
-
if (audio_data.number_of_channels > 1) {
|
| 344 |
-
log_message("Input has " + std::to_string(audio_data.number_of_channels) + " channels, converting to mono");
|
| 345 |
-
AudioProcessingResult mono_result = audio_processor->convert_to_mono(audio_data);
|
| 346 |
-
|
| 347 |
-
if (!mono_result.success) {
|
| 348 |
-
log_message("Mono conversion failed: " + mono_result.error_message);
|
| 349 |
-
std::string error_message = "ERROR:" + mono_result.error_message;
|
| 350 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 351 |
-
}
|
| 352 |
-
|
| 353 |
-
mono_audio.samples = std::move(mono_result.processed_samples);
|
| 354 |
-
mono_audio.sample_rate = mono_result.output_sample_rate;
|
| 355 |
-
} else {
|
| 356 |
-
mono_audio.samples = std::move(audio_data.samples);
|
| 357 |
-
mono_audio.sample_rate = audio_data.sample_rate;
|
| 358 |
-
}
|
| 359 |
-
|
| 360 |
-
mono_audio.number_of_channels = 1;
|
| 361 |
-
mono_audio.bits_per_sample = 16;
|
| 362 |
-
mono_audio.is_valid = true;
|
| 363 |
-
|
| 364 |
-
if (!audio_processor->write_wav_file(output_path, mono_audio)) {
|
| 365 |
-
log_message("Failed to write output file: " + output_path);
|
| 366 |
-
std::string error_message = "ERROR:Failed to write output file";
|
| 367 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 368 |
-
}
|
| 369 |
-
|
| 370 |
-
log_message("PCM conversion completed successfully: " + output_path);
|
| 371 |
-
std::string success_message = "SUCCESS:" + output_path;
|
| 372 |
-
return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
|
| 373 |
-
}
|
| 374 |
-
|
| 375 |
-
std::vector<std::uint8_t> AcceleratorCore::handle_resample_audio_command(const std::vector<std::uint8_t>& payload) {
|
| 376 |
-
log_message("Received RESAMPLE_AUDIO command with payload size: " + std::to_string(payload.size()) + " bytes");
|
| 377 |
-
|
| 378 |
-
if (payload.size() < sizeof(ProcessAudioRequest)) {
|
| 379 |
-
std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
|
| 380 |
-
log_message(error_message);
|
| 381 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 382 |
-
}
|
| 383 |
-
|
| 384 |
-
ProcessAudioRequest request;
|
| 385 |
-
std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
|
| 386 |
-
|
| 387 |
-
std::string input_path(request.input_file_path);
|
| 388 |
-
std::string output_path(request.output_file_path);
|
| 389 |
-
std::uint32_t target_sample_rate = request.target_sample_rate;
|
| 390 |
-
|
| 391 |
-
log_message("Resampling audio from: " + input_path + " to: " + output_path + " at " + std::to_string(target_sample_rate) + " Hz");
|
| 392 |
-
|
| 393 |
-
AudioData audio_data = audio_processor->read_wav_file(input_path);
|
| 394 |
-
|
| 395 |
-
if (!audio_data.is_valid) {
|
| 396 |
-
log_message("Failed to read input file: " + audio_data.error_message);
|
| 397 |
-
std::string error_message = "ERROR:" + audio_data.error_message;
|
| 398 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 399 |
-
}
|
| 400 |
-
|
| 401 |
-
AudioProcessingResult result = audio_processor->resample_audio(audio_data, target_sample_rate);
|
| 402 |
-
|
| 403 |
-
if (!result.success) {
|
| 404 |
-
log_message("Resampling failed: " + result.error_message);
|
| 405 |
-
std::string error_message = "ERROR:" + result.error_message;
|
| 406 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 407 |
-
}
|
| 408 |
-
|
| 409 |
-
AudioData output_audio;
|
| 410 |
-
output_audio.samples = std::move(result.processed_samples);
|
| 411 |
-
output_audio.sample_rate = result.output_sample_rate;
|
| 412 |
-
output_audio.number_of_channels = audio_data.number_of_channels;
|
| 413 |
-
output_audio.bits_per_sample = 16;
|
| 414 |
-
output_audio.is_valid = true;
|
| 415 |
-
|
| 416 |
-
if (!audio_processor->write_wav_file(output_path, output_audio)) {
|
| 417 |
-
log_message("Failed to write output file: " + output_path);
|
| 418 |
-
std::string error_message = "ERROR:Failed to write output file";
|
| 419 |
-
return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
|
| 420 |
-
}
|
| 421 |
-
|
| 422 |
-
log_message("Resampling completed successfully: " + output_path);
|
| 423 |
-
std::string success_message = "SUCCESS:" + output_path;
|
| 424 |
-
return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
|
| 425 |
-
}
|
| 426 |
-
|
| 427 |
-
std::vector<std::uint8_t> AcceleratorCore::handle_get_memory_stats_command(const std::vector<std::uint8_t>& payload) {
|
| 428 |
-
std::uint32_t request_flags = 0;
|
| 429 |
-
|
| 430 |
-
if (payload.size() >= sizeof(std::uint32_t)) {
|
| 431 |
-
std::memcpy(&request_flags, payload.data(), sizeof(std::uint32_t));
|
| 432 |
-
log_message("Received GET_MEMORY_STATS command with flags: " + std::to_string(request_flags));
|
| 433 |
-
} else {
|
| 434 |
-
log_message("Received GET_MEMORY_STATS command with payload size: " + std::to_string(payload.size()) + " bytes");
|
| 435 |
-
}
|
| 436 |
-
|
| 437 |
-
MemoryStatsResponse stats;
|
| 438 |
-
stats.total_allocated_bytes = memory_pool->get_total_allocated_bytes();
|
| 439 |
-
stats.total_used_bytes = memory_pool->get_total_used_bytes();
|
| 440 |
-
stats.block_count = memory_pool->get_block_count();
|
| 441 |
-
|
| 442 |
-
bool include_detailed_log = (request_flags & 0x01) != 0;
|
| 443 |
-
|
| 444 |
-
if (include_detailed_log) {
|
| 445 |
-
log_message("Memory stats (detailed) - Allocated: " + std::to_string(stats.total_allocated_bytes) +
|
| 446 |
-
" bytes (" + std::to_string(stats.total_allocated_bytes / (1024 * 1024)) + " MB)" +
|
| 447 |
-
", Used: " + std::to_string(stats.total_used_bytes) +
|
| 448 |
-
" bytes (" + std::to_string(stats.total_used_bytes / (1024 * 1024)) + " MB)" +
|
| 449 |
-
", Blocks: " + std::to_string(stats.block_count) +
|
| 450 |
-
", Utilization: " + std::to_string(stats.total_allocated_bytes > 0 ?
|
| 451 |
-
(stats.total_used_bytes * 100 / stats.total_allocated_bytes) : 0) + "%");
|
| 452 |
-
} else {
|
| 453 |
-
log_message("Memory stats - Allocated: " + std::to_string(stats.total_allocated_bytes) +
|
| 454 |
-
" bytes, Used: " + std::to_string(stats.total_used_bytes) +
|
| 455 |
-
" bytes, Blocks: " + std::to_string(stats.block_count));
|
| 456 |
-
}
|
| 457 |
-
|
| 458 |
-
std::vector<std::uint8_t> response(sizeof(MemoryStatsResponse));
|
| 459 |
-
std::memcpy(response.data(), &stats, sizeof(MemoryStatsResponse));
|
| 460 |
-
|
| 461 |
-
return response;
|
| 462 |
-
}
|
| 463 |
-
|
| 464 |
-
std::vector<std::uint8_t> AcceleratorCore::handle_clear_memory_pool_command(const std::vector<std::uint8_t>& payload) {
|
| 465 |
-
std::uint32_t clear_flags = 0;
|
| 466 |
-
|
| 467 |
-
if (payload.size() >= sizeof(std::uint32_t)) {
|
| 468 |
-
std::memcpy(&clear_flags, payload.data(), sizeof(std::uint32_t));
|
| 469 |
-
log_message("Received CLEAR_MEMORY_POOL command with flags: " + std::to_string(clear_flags));
|
| 470 |
-
} else {
|
| 471 |
-
log_message("Received CLEAR_MEMORY_POOL command with payload size: " + std::to_string(payload.size()) + " bytes");
|
| 472 |
-
}
|
| 473 |
-
|
| 474 |
-
std::size_t blocks_before = memory_pool->get_block_count();
|
| 475 |
-
std::size_t allocated_before = memory_pool->get_total_allocated_bytes();
|
| 476 |
-
std::size_t used_before = memory_pool->get_total_used_bytes();
|
| 477 |
-
|
| 478 |
-
bool force_full_reset = (clear_flags & 0x01) != 0;
|
| 479 |
-
|
| 480 |
-
if (force_full_reset) {
|
| 481 |
-
log_message("Performing full memory pool reset (force flag set)");
|
| 482 |
-
memory_pool->reset_pool();
|
| 483 |
-
} else {
|
| 484 |
-
log_message("Clearing unused memory blocks");
|
| 485 |
-
memory_pool->clear_unused_blocks();
|
| 486 |
-
}
|
| 487 |
-
|
| 488 |
-
std::size_t blocks_after = memory_pool->get_block_count();
|
| 489 |
-
std::size_t allocated_after = memory_pool->get_total_allocated_bytes();
|
| 490 |
-
std::size_t used_after = memory_pool->get_total_used_bytes();
|
| 491 |
-
|
| 492 |
-
std::size_t blocks_freed = blocks_before - blocks_after;
|
| 493 |
-
std::size_t bytes_freed = allocated_before - allocated_after;
|
| 494 |
-
|
| 495 |
-
log_message("Memory pool cleared - Before: " + std::to_string(blocks_before) + " blocks (" +
|
| 496 |
-
std::to_string(allocated_before) + " bytes allocated, " +
|
| 497 |
-
std::to_string(used_before) + " bytes used) -> After: " +
|
| 498 |
-
std::to_string(blocks_after) + " blocks (" +
|
| 499 |
-
std::to_string(allocated_after) + " bytes allocated, " +
|
| 500 |
-
std::to_string(used_after) + " bytes used) -> Freed: " +
|
| 501 |
-
std::to_string(blocks_freed) + " blocks (" +
|
| 502 |
-
std::to_string(bytes_freed) + " bytes)");
|
| 503 |
-
|
| 504 |
-
std::string success_message = "SUCCESS:Freed " + std::to_string(blocks_freed) +
|
| 505 |
-
" blocks (" + std::to_string(bytes_freed) + " bytes)";
|
| 506 |
-
|
| 507 |
-
if (force_full_reset) {
|
| 508 |
-
success_message += " [full reset]";
|
| 509 |
-
}
|
| 510 |
-
|
| 511 |
-
return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
|
| 512 |
-
}
|
| 513 |
-
|
| 514 |
-
std::vector<std::uint8_t> AcceleratorCore::handle_shutdown_command(const std::vector<std::uint8_t>& payload) {
|
| 515 |
-
std::string shutdown_reason;
|
| 516 |
-
if (!payload.empty()) {
|
| 517 |
-
shutdown_reason = std::string(payload.begin(), payload.end());
|
| 518 |
-
log_message("Received SHUTDOWN command with reason: " + shutdown_reason);
|
| 519 |
-
} else {
|
| 520 |
-
log_message("Received SHUTDOWN command");
|
| 521 |
-
}
|
| 522 |
-
|
| 523 |
-
std::string success_message = "SUCCESS:Shutting down";
|
| 524 |
-
if (!shutdown_reason.empty()) {
|
| 525 |
-
success_message += " (reason: " + shutdown_reason + ")";
|
| 526 |
-
}
|
| 527 |
-
|
| 528 |
-
std::thread shutdown_thread([this]() {
|
| 529 |
-
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
| 530 |
-
this->shutdown();
|
| 531 |
-
});
|
| 532 |
-
shutdown_thread.detach();
|
| 533 |
-
|
| 534 |
-
return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
|
| 535 |
-
}
|
| 536 |
-
|
| 537 |
-
void AcceleratorCore::log_message(const std::string& message) const {
|
| 538 |
-
if (config.enable_verbose_logging) {
|
| 539 |
-
auto now = std::chrono::system_clock::now();
|
| 540 |
-
std::time_t time_t_now = std::chrono::system_clock::to_time_t(now);
|
| 541 |
-
|
| 542 |
-
auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(
|
| 543 |
-
now.time_since_epoch()
|
| 544 |
-
) % 1000;
|
| 545 |
-
|
| 546 |
-
std::tm time_info;
|
| 547 |
-
localtime_r(&time_t_now, &time_info);
|
| 548 |
-
|
| 549 |
-
std::ostringstream timestamp_stream;
|
| 550 |
-
timestamp_stream << std::put_time(&time_info, "%Y-%m-%d %H:%M:%S");
|
| 551 |
-
timestamp_stream << '.' << std::setfill('0') << std::setw(3) << milliseconds.count();
|
| 552 |
-
|
| 553 |
-
std::cout << "[" << timestamp_stream.str() << "] " << message << std::endl;
|
| 554 |
-
std::cout.flush();
|
| 555 |
-
}
|
| 556 |
-
}
|
| 557 |
-
|
| 558 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/src/audio_processor.cpp
DELETED
|
@@ -1,352 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#include "audio_processor.hpp"
|
| 7 |
-
#include <algorithm>
|
| 8 |
-
#include <cmath>
|
| 9 |
-
#include <cstring>
|
| 10 |
-
#include <fstream>
|
| 11 |
-
|
| 12 |
-
namespace pocket_tts_accelerator {
|
| 13 |
-
|
| 14 |
-
AudioProcessor::AudioProcessor(MemoryPool& shared_memory_pool)
|
| 15 |
-
: memory_pool(shared_memory_pool) {
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
-
AudioProcessor::~AudioProcessor() {
|
| 19 |
-
}
|
| 20 |
-
|
| 21 |
-
AudioData AudioProcessor::read_wav_file(const std::string& file_path) {
|
| 22 |
-
AudioData result;
|
| 23 |
-
result.is_valid = false;
|
| 24 |
-
|
| 25 |
-
std::ifstream file_stream(file_path, std::ios::binary);
|
| 26 |
-
|
| 27 |
-
if (!file_stream.is_open()) {
|
| 28 |
-
result.error_message = "Failed to open file: " + file_path;
|
| 29 |
-
return result;
|
| 30 |
-
}
|
| 31 |
-
|
| 32 |
-
WavFileHeader header;
|
| 33 |
-
file_stream.read(reinterpret_cast<char*>(&header), sizeof(WavFileHeader));
|
| 34 |
-
|
| 35 |
-
if (file_stream.gcount() < static_cast<std::streamsize>(sizeof(WavFileHeader))) {
|
| 36 |
-
result.error_message = "File is too small to be a valid WAV file";
|
| 37 |
-
return result;
|
| 38 |
-
}
|
| 39 |
-
|
| 40 |
-
if (!validate_wav_header(header)) {
|
| 41 |
-
result.error_message = "Invalid WAV file header";
|
| 42 |
-
return result;
|
| 43 |
-
}
|
| 44 |
-
|
| 45 |
-
result.sample_rate = header.sample_rate;
|
| 46 |
-
result.number_of_channels = header.number_of_channels;
|
| 47 |
-
result.bits_per_sample = header.bits_per_sample;
|
| 48 |
-
|
| 49 |
-
std::size_t sample_count = header.data_size / (header.bits_per_sample / 8);
|
| 50 |
-
result.samples.resize(sample_count);
|
| 51 |
-
|
| 52 |
-
if (header.bits_per_sample == 16) {
|
| 53 |
-
file_stream.read(reinterpret_cast<char*>(result.samples.data()), header.data_size);
|
| 54 |
-
} else if (header.bits_per_sample == 8) {
|
| 55 |
-
std::vector<std::uint8_t> raw_data(sample_count);
|
| 56 |
-
file_stream.read(reinterpret_cast<char*>(raw_data.data()), header.data_size);
|
| 57 |
-
convert_uint8_to_int16(raw_data.data(), result.samples.data(), sample_count);
|
| 58 |
-
} else if (header.bits_per_sample == 32) {
|
| 59 |
-
if (header.audio_format == 3) {
|
| 60 |
-
std::vector<float> raw_data(sample_count);
|
| 61 |
-
file_stream.read(reinterpret_cast<char*>(raw_data.data()), header.data_size);
|
| 62 |
-
convert_float32_to_int16(raw_data.data(), result.samples.data(), sample_count);
|
| 63 |
-
} else {
|
| 64 |
-
std::vector<std::int32_t> raw_data(sample_count);
|
| 65 |
-
file_stream.read(reinterpret_cast<char*>(raw_data.data()), header.data_size);
|
| 66 |
-
convert_int32_to_int16(raw_data.data(), result.samples.data(), sample_count);
|
| 67 |
-
}
|
| 68 |
-
}
|
| 69 |
-
|
| 70 |
-
result.is_valid = true;
|
| 71 |
-
return result;
|
| 72 |
-
}
|
| 73 |
-
|
| 74 |
-
bool AudioProcessor::write_wav_file(const std::string& file_path, const AudioData& audio_data) {
|
| 75 |
-
std::ofstream file_stream(file_path, std::ios::binary);
|
| 76 |
-
|
| 77 |
-
if (!file_stream.is_open()) {
|
| 78 |
-
return false;
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
-
std::uint32_t data_size = static_cast<std::uint32_t>(audio_data.samples.size() * sizeof(std::int16_t));
|
| 82 |
-
std::uint32_t file_size = data_size + 36;
|
| 83 |
-
|
| 84 |
-
WavFileHeader header;
|
| 85 |
-
std::memcpy(header.riff_marker, "RIFF", 4);
|
| 86 |
-
header.file_size = file_size;
|
| 87 |
-
std::memcpy(header.wave_marker, "WAVE", 4);
|
| 88 |
-
std::memcpy(header.format_marker, "fmt ", 4);
|
| 89 |
-
header.format_chunk_size = 16;
|
| 90 |
-
header.audio_format = 1;
|
| 91 |
-
header.number_of_channels = audio_data.number_of_channels;
|
| 92 |
-
header.sample_rate = audio_data.sample_rate;
|
| 93 |
-
header.bits_per_sample = 16;
|
| 94 |
-
header.byte_rate = audio_data.sample_rate * audio_data.number_of_channels * 2;
|
| 95 |
-
header.block_align = audio_data.number_of_channels * 2;
|
| 96 |
-
std::memcpy(header.data_marker, "data", 4);
|
| 97 |
-
header.data_size = data_size;
|
| 98 |
-
|
| 99 |
-
file_stream.write(reinterpret_cast<const char*>(&header), sizeof(WavFileHeader));
|
| 100 |
-
file_stream.write(reinterpret_cast<const char*>(audio_data.samples.data()), data_size);
|
| 101 |
-
|
| 102 |
-
return file_stream.good();
|
| 103 |
-
}
|
| 104 |
-
|
| 105 |
-
AudioProcessingResult AudioProcessor::convert_to_mono(const AudioData& input_audio) {
|
| 106 |
-
AudioProcessingResult result;
|
| 107 |
-
result.success = false;
|
| 108 |
-
|
| 109 |
-
if (!input_audio.is_valid) {
|
| 110 |
-
result.error_message = "Invalid input audio";
|
| 111 |
-
return result;
|
| 112 |
-
}
|
| 113 |
-
|
| 114 |
-
if (input_audio.number_of_channels == 1) {
|
| 115 |
-
result.processed_samples = input_audio.samples;
|
| 116 |
-
result.output_sample_rate = input_audio.sample_rate;
|
| 117 |
-
result.success = true;
|
| 118 |
-
return result;
|
| 119 |
-
}
|
| 120 |
-
|
| 121 |
-
std::size_t frame_count = input_audio.samples.size() / input_audio.number_of_channels;
|
| 122 |
-
result.processed_samples.resize(frame_count);
|
| 123 |
-
|
| 124 |
-
mix_channels_to_mono(
|
| 125 |
-
input_audio.samples.data(),
|
| 126 |
-
result.processed_samples.data(),
|
| 127 |
-
frame_count,
|
| 128 |
-
input_audio.number_of_channels
|
| 129 |
-
);
|
| 130 |
-
|
| 131 |
-
result.output_sample_rate = input_audio.sample_rate;
|
| 132 |
-
result.success = true;
|
| 133 |
-
return result;
|
| 134 |
-
}
|
| 135 |
-
|
| 136 |
-
AudioProcessingResult AudioProcessor::convert_to_pcm_int16(const AudioData& input_audio) {
|
| 137 |
-
AudioProcessingResult result;
|
| 138 |
-
result.success = false;
|
| 139 |
-
|
| 140 |
-
if (!input_audio.is_valid) {
|
| 141 |
-
result.error_message = "Invalid input audio";
|
| 142 |
-
return result;
|
| 143 |
-
}
|
| 144 |
-
|
| 145 |
-
result.processed_samples = input_audio.samples;
|
| 146 |
-
result.output_sample_rate = input_audio.sample_rate;
|
| 147 |
-
result.success = true;
|
| 148 |
-
return result;
|
| 149 |
-
}
|
| 150 |
-
|
| 151 |
-
AudioProcessingResult AudioProcessor::resample_audio(const AudioData& input_audio, std::uint32_t target_sample_rate) {
|
| 152 |
-
AudioProcessingResult result;
|
| 153 |
-
result.success = false;
|
| 154 |
-
|
| 155 |
-
if (!input_audio.is_valid) {
|
| 156 |
-
result.error_message = "Invalid input audio";
|
| 157 |
-
return result;
|
| 158 |
-
}
|
| 159 |
-
|
| 160 |
-
if (input_audio.sample_rate == target_sample_rate) {
|
| 161 |
-
result.processed_samples = input_audio.samples;
|
| 162 |
-
result.output_sample_rate = target_sample_rate;
|
| 163 |
-
result.success = true;
|
| 164 |
-
return result;
|
| 165 |
-
}
|
| 166 |
-
|
| 167 |
-
double ratio = static_cast<double>(target_sample_rate) / static_cast<double>(input_audio.sample_rate);
|
| 168 |
-
std::size_t output_sample_count = static_cast<std::size_t>(input_audio.samples.size() * ratio);
|
| 169 |
-
|
| 170 |
-
result.processed_samples.resize(output_sample_count);
|
| 171 |
-
|
| 172 |
-
for (std::size_t output_index = 0; output_index < output_sample_count; ++output_index) {
|
| 173 |
-
double source_position = output_index / ratio;
|
| 174 |
-
std::size_t source_index_floor = static_cast<std::size_t>(source_position);
|
| 175 |
-
std::size_t source_index_ceil = source_index_floor + 1;
|
| 176 |
-
double fractional_part = source_position - source_index_floor;
|
| 177 |
-
|
| 178 |
-
if (source_index_ceil >= input_audio.samples.size()) {
|
| 179 |
-
source_index_ceil = input_audio.samples.size() - 1;
|
| 180 |
-
}
|
| 181 |
-
|
| 182 |
-
double interpolated_value =
|
| 183 |
-
input_audio.samples[source_index_floor] * (1.0 - fractional_part) +
|
| 184 |
-
input_audio.samples[source_index_ceil] * fractional_part;
|
| 185 |
-
|
| 186 |
-
result.processed_samples[output_index] = static_cast<std::int16_t>(
|
| 187 |
-
std::clamp(interpolated_value, -32768.0, 32767.0)
|
| 188 |
-
);
|
| 189 |
-
}
|
| 190 |
-
|
| 191 |
-
result.output_sample_rate = target_sample_rate;
|
| 192 |
-
result.success = true;
|
| 193 |
-
return result;
|
| 194 |
-
}
|
| 195 |
-
|
| 196 |
-
AudioProcessingResult AudioProcessor::normalize_audio(const AudioData& input_audio, float target_peak_level) {
|
| 197 |
-
AudioProcessingResult result;
|
| 198 |
-
result.success = false;
|
| 199 |
-
|
| 200 |
-
if (!input_audio.is_valid) {
|
| 201 |
-
result.error_message = "Invalid input audio";
|
| 202 |
-
return result;
|
| 203 |
-
}
|
| 204 |
-
|
| 205 |
-
std::int16_t max_absolute_value = 0;
|
| 206 |
-
for (const std::int16_t sample : input_audio.samples) {
|
| 207 |
-
std::int16_t absolute_value = static_cast<std::int16_t>(std::abs(sample));
|
| 208 |
-
if (absolute_value > max_absolute_value) {
|
| 209 |
-
max_absolute_value = absolute_value;
|
| 210 |
-
}
|
| 211 |
-
}
|
| 212 |
-
|
| 213 |
-
if (max_absolute_value == 0) {
|
| 214 |
-
result.processed_samples = input_audio.samples;
|
| 215 |
-
result.output_sample_rate = input_audio.sample_rate;
|
| 216 |
-
result.success = true;
|
| 217 |
-
return result;
|
| 218 |
-
}
|
| 219 |
-
|
| 220 |
-
float normalization_factor = (target_peak_level * 32767.0f) / static_cast<float>(max_absolute_value);
|
| 221 |
-
|
| 222 |
-
result.processed_samples.resize(input_audio.samples.size());
|
| 223 |
-
|
| 224 |
-
for (std::size_t index = 0; index < input_audio.samples.size(); ++index) {
|
| 225 |
-
float normalized_sample = static_cast<float>(input_audio.samples[index]) * normalization_factor;
|
| 226 |
-
result.processed_samples[index] = static_cast<std::int16_t>(
|
| 227 |
-
std::clamp(normalized_sample, -32768.0f, 32767.0f)
|
| 228 |
-
);
|
| 229 |
-
}
|
| 230 |
-
|
| 231 |
-
result.output_sample_rate = input_audio.sample_rate;
|
| 232 |
-
result.success = true;
|
| 233 |
-
return result;
|
| 234 |
-
}
|
| 235 |
-
|
| 236 |
-
AudioProcessingResult AudioProcessor::process_audio_for_voice_cloning(
|
| 237 |
-
const std::string& input_file_path,
|
| 238 |
-
const std::string& output_file_path
|
| 239 |
-
) {
|
| 240 |
-
AudioProcessingResult result;
|
| 241 |
-
result.success = false;
|
| 242 |
-
|
| 243 |
-
AudioData input_audio = read_wav_file(input_file_path);
|
| 244 |
-
|
| 245 |
-
if (!input_audio.is_valid) {
|
| 246 |
-
result.error_message = "Failed to read input file: " + input_audio.error_message;
|
| 247 |
-
return result;
|
| 248 |
-
}
|
| 249 |
-
|
| 250 |
-
AudioProcessingResult mono_result = convert_to_mono(input_audio);
|
| 251 |
-
|
| 252 |
-
if (!mono_result.success) {
|
| 253 |
-
result.error_message = "Failed to convert to mono: " + mono_result.error_message;
|
| 254 |
-
return result;
|
| 255 |
-
}
|
| 256 |
-
|
| 257 |
-
AudioData mono_audio;
|
| 258 |
-
mono_audio.samples = std::move(mono_result.processed_samples);
|
| 259 |
-
mono_audio.sample_rate = mono_result.output_sample_rate;
|
| 260 |
-
mono_audio.number_of_channels = 1;
|
| 261 |
-
mono_audio.bits_per_sample = 16;
|
| 262 |
-
mono_audio.is_valid = true;
|
| 263 |
-
|
| 264 |
-
if (!write_wav_file(output_file_path, mono_audio)) {
|
| 265 |
-
result.error_message = "Failed to write output file";
|
| 266 |
-
return result;
|
| 267 |
-
}
|
| 268 |
-
|
| 269 |
-
result.processed_samples = std::move(mono_audio.samples);
|
| 270 |
-
result.output_sample_rate = mono_audio.sample_rate;
|
| 271 |
-
result.success = true;
|
| 272 |
-
return result;
|
| 273 |
-
}
|
| 274 |
-
|
| 275 |
-
bool AudioProcessor::validate_wav_header(const WavFileHeader& header) {
|
| 276 |
-
if (std::memcmp(header.riff_marker, "RIFF", 4) != 0) {
|
| 277 |
-
return false;
|
| 278 |
-
}
|
| 279 |
-
|
| 280 |
-
if (std::memcmp(header.wave_marker, "WAVE", 4) != 0) {
|
| 281 |
-
return false;
|
| 282 |
-
}
|
| 283 |
-
|
| 284 |
-
if (std::memcmp(header.format_marker, "fmt ", 4) != 0) {
|
| 285 |
-
return false;
|
| 286 |
-
}
|
| 287 |
-
|
| 288 |
-
if (header.audio_format != 1 && header.audio_format != 3) {
|
| 289 |
-
return false;
|
| 290 |
-
}
|
| 291 |
-
|
| 292 |
-
if (header.number_of_channels < 1 || header.number_of_channels > 16) {
|
| 293 |
-
return false;
|
| 294 |
-
}
|
| 295 |
-
|
| 296 |
-
if (header.sample_rate < 100 || header.sample_rate > 384000) {
|
| 297 |
-
return false;
|
| 298 |
-
}
|
| 299 |
-
|
| 300 |
-
if (header.bits_per_sample != 8 && header.bits_per_sample != 16 && header.bits_per_sample != 32) {
|
| 301 |
-
return false;
|
| 302 |
-
}
|
| 303 |
-
|
| 304 |
-
return true;
|
| 305 |
-
}
|
| 306 |
-
|
| 307 |
-
std::size_t AudioProcessor::calculate_audio_duration_milliseconds(const AudioData& audio_data) {
|
| 308 |
-
if (!audio_data.is_valid || audio_data.sample_rate == 0) {
|
| 309 |
-
return 0;
|
| 310 |
-
}
|
| 311 |
-
|
| 312 |
-
std::size_t frame_count = audio_data.samples.size() / audio_data.number_of_channels;
|
| 313 |
-
return (frame_count * 1000) / audio_data.sample_rate;
|
| 314 |
-
}
|
| 315 |
-
|
| 316 |
-
void AudioProcessor::convert_float32_to_int16(const float* input, std::int16_t* output, std::size_t sample_count) {
|
| 317 |
-
for (std::size_t index = 0; index < sample_count; ++index) {
|
| 318 |
-
float clamped_value = std::clamp(input[index], -1.0f, 1.0f);
|
| 319 |
-
output[index] = static_cast<std::int16_t>(clamped_value * 32767.0f);
|
| 320 |
-
}
|
| 321 |
-
}
|
| 322 |
-
|
| 323 |
-
void AudioProcessor::convert_int32_to_int16(const std::int32_t* input, std::int16_t* output, std::size_t sample_count) {
|
| 324 |
-
for (std::size_t index = 0; index < sample_count; ++index) {
|
| 325 |
-
output[index] = static_cast<std::int16_t>(input[index] >> 16);
|
| 326 |
-
}
|
| 327 |
-
}
|
| 328 |
-
|
| 329 |
-
void AudioProcessor::convert_uint8_to_int16(const std::uint8_t* input, std::int16_t* output, std::size_t sample_count) {
|
| 330 |
-
for (std::size_t index = 0; index < sample_count; ++index) {
|
| 331 |
-
output[index] = static_cast<std::int16_t>((static_cast<std::int16_t>(input[index]) - 128) * 256);
|
| 332 |
-
}
|
| 333 |
-
}
|
| 334 |
-
|
| 335 |
-
void AudioProcessor::mix_channels_to_mono(
|
| 336 |
-
const std::int16_t* input,
|
| 337 |
-
std::int16_t* output,
|
| 338 |
-
std::size_t frame_count,
|
| 339 |
-
std::uint16_t channel_count
|
| 340 |
-
) {
|
| 341 |
-
for (std::size_t frame_index = 0; frame_index < frame_count; ++frame_index) {
|
| 342 |
-
std::int32_t sum = 0;
|
| 343 |
-
|
| 344 |
-
for (std::uint16_t channel_index = 0; channel_index < channel_count; ++channel_index) {
|
| 345 |
-
sum += input[frame_index * channel_count + channel_index];
|
| 346 |
-
}
|
| 347 |
-
|
| 348 |
-
output[frame_index] = static_cast<std::int16_t>(sum / channel_count);
|
| 349 |
-
}
|
| 350 |
-
}
|
| 351 |
-
|
| 352 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/src/ipc_handler.cpp
DELETED
|
@@ -1,226 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#include "ipc_handler.hpp"
|
| 7 |
-
#include <cstring>
|
| 8 |
-
#include <iostream>
|
| 9 |
-
#include <sys/socket.h>
|
| 10 |
-
#include <sys/un.h>
|
| 11 |
-
#include <unistd.h>
|
| 12 |
-
|
| 13 |
-
namespace pocket_tts_accelerator {
|
| 14 |
-
|
| 15 |
-
IpcHandler::IpcHandler(const std::string& socket_path)
|
| 16 |
-
: socket_file_path(socket_path)
|
| 17 |
-
, server_socket_fd(-1)
|
| 18 |
-
, is_server_running(false) {
|
| 19 |
-
}
|
| 20 |
-
|
| 21 |
-
IpcHandler::~IpcHandler() {
|
| 22 |
-
stop_server();
|
| 23 |
-
}
|
| 24 |
-
|
| 25 |
-
bool IpcHandler::start_server() {
|
| 26 |
-
if (is_server_running.load()) {
|
| 27 |
-
return true;
|
| 28 |
-
}
|
| 29 |
-
|
| 30 |
-
unlink(socket_file_path.c_str());
|
| 31 |
-
|
| 32 |
-
server_socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
| 33 |
-
|
| 34 |
-
if (server_socket_fd < 0) {
|
| 35 |
-
std::cerr << "Failed to create socket: " << strerror(errno) << std::endl;
|
| 36 |
-
return false;
|
| 37 |
-
}
|
| 38 |
-
|
| 39 |
-
struct sockaddr_un server_address;
|
| 40 |
-
std::memset(&server_address, 0, sizeof(server_address));
|
| 41 |
-
server_address.sun_family = AF_UNIX;
|
| 42 |
-
std::strncpy(server_address.sun_path, socket_file_path.c_str(), sizeof(server_address.sun_path) - 1);
|
| 43 |
-
|
| 44 |
-
if (bind(server_socket_fd, reinterpret_cast<struct sockaddr*>(&server_address), sizeof(server_address)) < 0) {
|
| 45 |
-
std::cerr << "Failed to bind socket: " << strerror(errno) << std::endl;
|
| 46 |
-
close(server_socket_fd);
|
| 47 |
-
server_socket_fd = -1;
|
| 48 |
-
return false;
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
if (listen(server_socket_fd, CONNECTION_BACKLOG) < 0) {
|
| 52 |
-
std::cerr << "Failed to listen on socket: " << strerror(errno) << std::endl;
|
| 53 |
-
close(server_socket_fd);
|
| 54 |
-
server_socket_fd = -1;
|
| 55 |
-
return false;
|
| 56 |
-
}
|
| 57 |
-
|
| 58 |
-
is_server_running.store(true);
|
| 59 |
-
accept_thread = std::thread(&IpcHandler::accept_connections_loop, this);
|
| 60 |
-
|
| 61 |
-
return true;
|
| 62 |
-
}
|
| 63 |
-
|
| 64 |
-
void IpcHandler::stop_server() {
|
| 65 |
-
if (!is_server_running.load()) {
|
| 66 |
-
return;
|
| 67 |
-
}
|
| 68 |
-
|
| 69 |
-
is_server_running.store(false);
|
| 70 |
-
|
| 71 |
-
if (server_socket_fd >= 0) {
|
| 72 |
-
shutdown(server_socket_fd, SHUT_RDWR);
|
| 73 |
-
close(server_socket_fd);
|
| 74 |
-
server_socket_fd = -1;
|
| 75 |
-
}
|
| 76 |
-
|
| 77 |
-
if (accept_thread.joinable()) {
|
| 78 |
-
accept_thread.join();
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
-
unlink(socket_file_path.c_str());
|
| 82 |
-
}
|
| 83 |
-
|
| 84 |
-
bool IpcHandler::is_running() const {
|
| 85 |
-
return is_server_running.load();
|
| 86 |
-
}
|
| 87 |
-
|
| 88 |
-
void IpcHandler::register_command_handler(CommandType command_type, CommandHandlerFunction handler) {
|
| 89 |
-
std::unique_lock<std::mutex> lock(handlers_mutex);
|
| 90 |
-
command_handlers[command_type] = std::move(handler);
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
void IpcHandler::set_shutdown_callback(std::function<void()> callback) {
|
| 94 |
-
shutdown_callback = std::move(callback);
|
| 95 |
-
}
|
| 96 |
-
|
| 97 |
-
void IpcHandler::accept_connections_loop() {
|
| 98 |
-
while (is_server_running.load()) {
|
| 99 |
-
struct sockaddr_un client_address;
|
| 100 |
-
socklen_t client_address_length = sizeof(client_address);
|
| 101 |
-
|
| 102 |
-
int client_socket_fd = accept(
|
| 103 |
-
server_socket_fd,
|
| 104 |
-
reinterpret_cast<struct sockaddr*>(&client_address),
|
| 105 |
-
&client_address_length
|
| 106 |
-
);
|
| 107 |
-
|
| 108 |
-
if (client_socket_fd < 0) {
|
| 109 |
-
if (!is_server_running.load()) {
|
| 110 |
-
break;
|
| 111 |
-
}
|
| 112 |
-
continue;
|
| 113 |
-
}
|
| 114 |
-
|
| 115 |
-
handle_client_connection(client_socket_fd);
|
| 116 |
-
close(client_socket_fd);
|
| 117 |
-
}
|
| 118 |
-
}
|
| 119 |
-
|
| 120 |
-
void IpcHandler::handle_client_connection(int client_socket_fd) {
|
| 121 |
-
RequestHeader request_header;
|
| 122 |
-
std::vector<std::uint8_t> request_payload;
|
| 123 |
-
|
| 124 |
-
if (!receive_request(client_socket_fd, request_header, request_payload)) {
|
| 125 |
-
return;
|
| 126 |
-
}
|
| 127 |
-
|
| 128 |
-
if (request_header.magic_number != PROTOCOL_MAGIC_NUMBER) {
|
| 129 |
-
ResponseHeader error_response;
|
| 130 |
-
error_response.magic_number = PROTOCOL_MAGIC_NUMBER;
|
| 131 |
-
error_response.status_code = static_cast<std::uint32_t>(ResponseStatus::ERROR_INVALID_COMMAND);
|
| 132 |
-
error_response.payload_size = 0;
|
| 133 |
-
error_response.request_id = request_header.request_id;
|
| 134 |
-
send_response(client_socket_fd, error_response, {});
|
| 135 |
-
return;
|
| 136 |
-
}
|
| 137 |
-
|
| 138 |
-
CommandType command_type = static_cast<CommandType>(request_header.command_type);
|
| 139 |
-
|
| 140 |
-
std::vector<std::uint8_t> response_payload;
|
| 141 |
-
ResponseStatus status = ResponseStatus::SUCCESS;
|
| 142 |
-
|
| 143 |
-
{
|
| 144 |
-
std::unique_lock<std::mutex> lock(handlers_mutex);
|
| 145 |
-
auto handler_iterator = command_handlers.find(command_type);
|
| 146 |
-
|
| 147 |
-
if (handler_iterator != command_handlers.end()) {
|
| 148 |
-
try {
|
| 149 |
-
response_payload = handler_iterator->second(request_payload);
|
| 150 |
-
} catch (const std::exception& exception) {
|
| 151 |
-
std::cerr << "Handler exception: " << exception.what() << std::endl;
|
| 152 |
-
status = ResponseStatus::ERROR_INTERNAL;
|
| 153 |
-
} catch (...) {
|
| 154 |
-
std::cerr << "Handler unknown exception" << std::endl;
|
| 155 |
-
status = ResponseStatus::ERROR_INTERNAL;
|
| 156 |
-
}
|
| 157 |
-
} else {
|
| 158 |
-
status = ResponseStatus::ERROR_INVALID_COMMAND;
|
| 159 |
-
}
|
| 160 |
-
}
|
| 161 |
-
|
| 162 |
-
ResponseHeader response_header;
|
| 163 |
-
response_header.magic_number = PROTOCOL_MAGIC_NUMBER;
|
| 164 |
-
response_header.status_code = static_cast<std::uint32_t>(status);
|
| 165 |
-
response_header.payload_size = static_cast<std::uint32_t>(response_payload.size());
|
| 166 |
-
response_header.request_id = request_header.request_id;
|
| 167 |
-
|
| 168 |
-
send_response(client_socket_fd, response_header, response_payload);
|
| 169 |
-
}
|
| 170 |
-
|
| 171 |
-
bool IpcHandler::send_response(
|
| 172 |
-
int socket_fd,
|
| 173 |
-
const ResponseHeader& header,
|
| 174 |
-
const std::vector<std::uint8_t>& payload
|
| 175 |
-
) {
|
| 176 |
-
ssize_t bytes_written = write(socket_fd, &header, sizeof(ResponseHeader));
|
| 177 |
-
|
| 178 |
-
if (bytes_written != sizeof(ResponseHeader)) {
|
| 179 |
-
return false;
|
| 180 |
-
}
|
| 181 |
-
|
| 182 |
-
if (!payload.empty()) {
|
| 183 |
-
bytes_written = write(socket_fd, payload.data(), payload.size());
|
| 184 |
-
|
| 185 |
-
if (bytes_written != static_cast<ssize_t>(payload.size())) {
|
| 186 |
-
return false;
|
| 187 |
-
}
|
| 188 |
-
}
|
| 189 |
-
|
| 190 |
-
return true;
|
| 191 |
-
}
|
| 192 |
-
|
| 193 |
-
bool IpcHandler::receive_request(
|
| 194 |
-
int socket_fd,
|
| 195 |
-
RequestHeader& header,
|
| 196 |
-
std::vector<std::uint8_t>& payload
|
| 197 |
-
) {
|
| 198 |
-
ssize_t bytes_read = read(socket_fd, &header, sizeof(RequestHeader));
|
| 199 |
-
|
| 200 |
-
if (bytes_read != sizeof(RequestHeader)) {
|
| 201 |
-
return false;
|
| 202 |
-
}
|
| 203 |
-
|
| 204 |
-
if (header.payload_size > MAXIMUM_PAYLOAD_SIZE) {
|
| 205 |
-
return false;
|
| 206 |
-
}
|
| 207 |
-
|
| 208 |
-
if (header.payload_size > 0) {
|
| 209 |
-
payload.resize(header.payload_size);
|
| 210 |
-
std::size_t total_read = 0;
|
| 211 |
-
|
| 212 |
-
while (total_read < header.payload_size) {
|
| 213 |
-
bytes_read = read(socket_fd, payload.data() + total_read, header.payload_size - total_read);
|
| 214 |
-
|
| 215 |
-
if (bytes_read <= 0) {
|
| 216 |
-
return false;
|
| 217 |
-
}
|
| 218 |
-
|
| 219 |
-
total_read += static_cast<std::size_t>(bytes_read);
|
| 220 |
-
}
|
| 221 |
-
}
|
| 222 |
-
|
| 223 |
-
return true;
|
| 224 |
-
}
|
| 225 |
-
|
| 226 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/src/main.cpp
DELETED
|
@@ -1,83 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#include "accelerator_core.hpp"
|
| 7 |
-
#include <cstdlib>
|
| 8 |
-
#include <cstring>
|
| 9 |
-
#include <iostream>
|
| 10 |
-
#include <string>
|
| 11 |
-
|
| 12 |
-
void print_usage(const char* program_name) {
|
| 13 |
-
std::cout << "Usage: " << program_name << " [options]" << std::endl;
|
| 14 |
-
std::cout << std::endl;
|
| 15 |
-
std::cout << "Options:" << std::endl;
|
| 16 |
-
std::cout << " --socket PATH IPC socket path (default: /tmp/pocket_tts_accelerator.sock)" << std::endl;
|
| 17 |
-
std::cout << " --threads N Number of worker threads (default: 2)" << std::endl;
|
| 18 |
-
std::cout << " --memory MB Memory pool size in megabytes (default: 64)" << std::endl;
|
| 19 |
-
std::cout << " --quiet Disable verbose logging" << std::endl;
|
| 20 |
-
std::cout << " --help Show this help message" << std::endl;
|
| 21 |
-
std::cout.flush();
|
| 22 |
-
}
|
| 23 |
-
|
| 24 |
-
int main(int argc, char* argv[]) {
|
| 25 |
-
std::cout.setf(std::ios::unitbuf);
|
| 26 |
-
std::cerr.setf(std::ios::unitbuf);
|
| 27 |
-
|
| 28 |
-
pocket_tts_accelerator::AcceleratorConfiguration configuration =
|
| 29 |
-
pocket_tts_accelerator::AcceleratorCore::get_default_configuration();
|
| 30 |
-
|
| 31 |
-
for (int argument_index = 1; argument_index < argc; ++argument_index) {
|
| 32 |
-
std::string argument(argv[argument_index]);
|
| 33 |
-
|
| 34 |
-
if (argument == "--help" || argument == "-h") {
|
| 35 |
-
print_usage(argv[0]);
|
| 36 |
-
return 0;
|
| 37 |
-
}
|
| 38 |
-
|
| 39 |
-
if (argument == "--socket" && argument_index + 1 < argc) {
|
| 40 |
-
configuration.ipc_socket_path = argv[++argument_index];
|
| 41 |
-
continue;
|
| 42 |
-
}
|
| 43 |
-
|
| 44 |
-
if (argument == "--threads" && argument_index + 1 < argc) {
|
| 45 |
-
configuration.number_of_worker_threads = std::stoul(argv[++argument_index]);
|
| 46 |
-
continue;
|
| 47 |
-
}
|
| 48 |
-
|
| 49 |
-
if (argument == "--memory" && argument_index + 1 < argc) {
|
| 50 |
-
std::size_t memory_mb = std::stoul(argv[++argument_index]);
|
| 51 |
-
configuration.memory_pool_size_bytes = memory_mb * 1024 * 1024;
|
| 52 |
-
continue;
|
| 53 |
-
}
|
| 54 |
-
|
| 55 |
-
if (argument == "--quiet" || argument == "-q") {
|
| 56 |
-
configuration.enable_verbose_logging = false;
|
| 57 |
-
continue;
|
| 58 |
-
}
|
| 59 |
-
|
| 60 |
-
std::cerr << "Unknown argument: " << argument << std::endl;
|
| 61 |
-
print_usage(argv[0]);
|
| 62 |
-
return 1;
|
| 63 |
-
}
|
| 64 |
-
|
| 65 |
-
if (configuration.number_of_worker_threads < 1) {
|
| 66 |
-
configuration.number_of_worker_threads = 1;
|
| 67 |
-
}
|
| 68 |
-
|
| 69 |
-
if (configuration.number_of_worker_threads > 2) {
|
| 70 |
-
configuration.number_of_worker_threads = 2;
|
| 71 |
-
}
|
| 72 |
-
|
| 73 |
-
pocket_tts_accelerator::AcceleratorCore accelerator(configuration);
|
| 74 |
-
|
| 75 |
-
if (!accelerator.initialize()) {
|
| 76 |
-
std::cerr << "Failed to initialize accelerator" << std::endl;
|
| 77 |
-
return 1;
|
| 78 |
-
}
|
| 79 |
-
|
| 80 |
-
accelerator.run();
|
| 81 |
-
|
| 82 |
-
return 0;
|
| 83 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/src/memory_pool.cpp
DELETED
|
@@ -1,216 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#include "memory_pool.hpp"
|
| 7 |
-
#include <algorithm>
|
| 8 |
-
#include <chrono>
|
| 9 |
-
#include <cstring>
|
| 10 |
-
|
| 11 |
-
namespace pocket_tts_accelerator {
|
| 12 |
-
|
| 13 |
-
MemoryPool::MemoryPool(std::size_t initial_pool_size_bytes)
|
| 14 |
-
: total_allocated_bytes(0)
|
| 15 |
-
, total_used_bytes(0)
|
| 16 |
-
, maximum_pool_size_bytes(initial_pool_size_bytes) {
|
| 17 |
-
}
|
| 18 |
-
|
| 19 |
-
MemoryPool::~MemoryPool() {
|
| 20 |
-
reset_pool();
|
| 21 |
-
}
|
| 22 |
-
|
| 23 |
-
std::uint8_t* MemoryPool::allocate(std::size_t requested_size_bytes) {
|
| 24 |
-
std::unique_lock<std::mutex> lock(pool_mutex);
|
| 25 |
-
|
| 26 |
-
std::size_t block_index = find_suitable_block_index(requested_size_bytes);
|
| 27 |
-
|
| 28 |
-
if (block_index != static_cast<std::size_t>(-1)) {
|
| 29 |
-
MemoryBlock& existing_block = memory_blocks[block_index];
|
| 30 |
-
existing_block.is_in_use = true;
|
| 31 |
-
existing_block.last_access_timestamp = get_current_timestamp();
|
| 32 |
-
total_used_bytes += existing_block.block_size;
|
| 33 |
-
return existing_block.data.get();
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
if (total_allocated_bytes + requested_size_bytes > maximum_pool_size_bytes) {
|
| 37 |
-
clear_unused_blocks();
|
| 38 |
-
}
|
| 39 |
-
|
| 40 |
-
std::size_t aligned_size = ((requested_size_bytes + 63) / 64) * 64;
|
| 41 |
-
|
| 42 |
-
memory_blocks.push_back(MemoryBlock{
|
| 43 |
-
std::make_unique<std::uint8_t[]>(aligned_size),
|
| 44 |
-
aligned_size,
|
| 45 |
-
true,
|
| 46 |
-
get_current_timestamp()
|
| 47 |
-
});
|
| 48 |
-
|
| 49 |
-
std::uint8_t* allocated_pointer = memory_blocks.back().data.get();
|
| 50 |
-
pointer_to_block_index[allocated_pointer] = memory_blocks.size() - 1;
|
| 51 |
-
|
| 52 |
-
total_allocated_bytes += aligned_size;
|
| 53 |
-
total_used_bytes += aligned_size;
|
| 54 |
-
|
| 55 |
-
return allocated_pointer;
|
| 56 |
-
}
|
| 57 |
-
|
| 58 |
-
void MemoryPool::deallocate(std::uint8_t* pointer) {
|
| 59 |
-
if (pointer == nullptr) {
|
| 60 |
-
return;
|
| 61 |
-
}
|
| 62 |
-
|
| 63 |
-
std::unique_lock<std::mutex> lock(pool_mutex);
|
| 64 |
-
|
| 65 |
-
auto iterator = pointer_to_block_index.find(pointer);
|
| 66 |
-
|
| 67 |
-
if (iterator != pointer_to_block_index.end()) {
|
| 68 |
-
std::size_t block_index = iterator->second;
|
| 69 |
-
|
| 70 |
-
if (block_index < memory_blocks.size()) {
|
| 71 |
-
MemoryBlock& block = memory_blocks[block_index];
|
| 72 |
-
|
| 73 |
-
if (block.is_in_use) {
|
| 74 |
-
block.is_in_use = false;
|
| 75 |
-
block.last_access_timestamp = get_current_timestamp();
|
| 76 |
-
total_used_bytes -= block.block_size;
|
| 77 |
-
}
|
| 78 |
-
}
|
| 79 |
-
}
|
| 80 |
-
}
|
| 81 |
-
|
| 82 |
-
void MemoryPool::clear_unused_blocks() {
|
| 83 |
-
std::vector<std::size_t> indices_to_remove;
|
| 84 |
-
|
| 85 |
-
for (std::size_t index = 0; index < memory_blocks.size(); ++index) {
|
| 86 |
-
if (!memory_blocks[index].is_in_use) {
|
| 87 |
-
indices_to_remove.push_back(index);
|
| 88 |
-
}
|
| 89 |
-
}
|
| 90 |
-
|
| 91 |
-
std::sort(indices_to_remove.rbegin(), indices_to_remove.rend());
|
| 92 |
-
|
| 93 |
-
for (std::size_t index : indices_to_remove) {
|
| 94 |
-
std::uint8_t* pointer = memory_blocks[index].data.get();
|
| 95 |
-
total_allocated_bytes -= memory_blocks[index].block_size;
|
| 96 |
-
|
| 97 |
-
pointer_to_block_index.erase(pointer);
|
| 98 |
-
memory_blocks.erase(memory_blocks.begin() + static_cast<std::ptrdiff_t>(index));
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
-
for (std::size_t index = 0; index < memory_blocks.size(); ++index) {
|
| 102 |
-
pointer_to_block_index[memory_blocks[index].data.get()] = index;
|
| 103 |
-
}
|
| 104 |
-
}
|
| 105 |
-
|
| 106 |
-
void MemoryPool::reset_pool() {
|
| 107 |
-
std::unique_lock<std::mutex> lock(pool_mutex);
|
| 108 |
-
|
| 109 |
-
memory_blocks.clear();
|
| 110 |
-
pointer_to_block_index.clear();
|
| 111 |
-
total_allocated_bytes = 0;
|
| 112 |
-
total_used_bytes = 0;
|
| 113 |
-
}
|
| 114 |
-
|
| 115 |
-
std::size_t MemoryPool::get_total_allocated_bytes() const {
|
| 116 |
-
std::unique_lock<std::mutex> lock(pool_mutex);
|
| 117 |
-
return total_allocated_bytes;
|
| 118 |
-
}
|
| 119 |
-
|
| 120 |
-
std::size_t MemoryPool::get_total_used_bytes() const {
|
| 121 |
-
std::unique_lock<std::mutex> lock(pool_mutex);
|
| 122 |
-
return total_used_bytes;
|
| 123 |
-
}
|
| 124 |
-
|
| 125 |
-
std::size_t MemoryPool::get_block_count() const {
|
| 126 |
-
std::unique_lock<std::mutex> lock(pool_mutex);
|
| 127 |
-
return memory_blocks.size();
|
| 128 |
-
}
|
| 129 |
-
|
| 130 |
-
std::size_t MemoryPool::find_suitable_block_index(std::size_t requested_size) const {
|
| 131 |
-
std::size_t best_fit_index = static_cast<std::size_t>(-1);
|
| 132 |
-
std::size_t best_fit_size = static_cast<std::size_t>(-1);
|
| 133 |
-
|
| 134 |
-
for (std::size_t index = 0; index < memory_blocks.size(); ++index) {
|
| 135 |
-
const MemoryBlock& block = memory_blocks[index];
|
| 136 |
-
|
| 137 |
-
if (!block.is_in_use && block.block_size >= requested_size) {
|
| 138 |
-
if (block.block_size < best_fit_size) {
|
| 139 |
-
best_fit_size = block.block_size;
|
| 140 |
-
best_fit_index = index;
|
| 141 |
-
}
|
| 142 |
-
}
|
| 143 |
-
}
|
| 144 |
-
|
| 145 |
-
return best_fit_index;
|
| 146 |
-
}
|
| 147 |
-
|
| 148 |
-
void MemoryPool::create_new_block(std::size_t block_size) {
|
| 149 |
-
std::size_t aligned_size = ((block_size + 63) / 64) * 64;
|
| 150 |
-
|
| 151 |
-
memory_blocks.push_back(MemoryBlock{
|
| 152 |
-
std::make_unique<std::uint8_t[]>(aligned_size),
|
| 153 |
-
aligned_size,
|
| 154 |
-
false,
|
| 155 |
-
get_current_timestamp()
|
| 156 |
-
});
|
| 157 |
-
|
| 158 |
-
pointer_to_block_index[memory_blocks.back().data.get()] = memory_blocks.size() - 1;
|
| 159 |
-
total_allocated_bytes += aligned_size;
|
| 160 |
-
}
|
| 161 |
-
|
| 162 |
-
std::uint64_t MemoryPool::get_current_timestamp() const {
|
| 163 |
-
auto current_time = std::chrono::steady_clock::now();
|
| 164 |
-
auto duration = current_time.time_since_epoch();
|
| 165 |
-
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
|
| 166 |
-
}
|
| 167 |
-
|
| 168 |
-
ScopedMemoryAllocation::ScopedMemoryAllocation(MemoryPool& pool, std::size_t size)
|
| 169 |
-
: memory_pool_pointer(&pool)
|
| 170 |
-
, allocated_pointer(pool.allocate(size))
|
| 171 |
-
, allocation_size(size) {
|
| 172 |
-
}
|
| 173 |
-
|
| 174 |
-
ScopedMemoryAllocation::~ScopedMemoryAllocation() {
|
| 175 |
-
if (memory_pool_pointer != nullptr && allocated_pointer != nullptr) {
|
| 176 |
-
memory_pool_pointer->deallocate(allocated_pointer);
|
| 177 |
-
}
|
| 178 |
-
}
|
| 179 |
-
|
| 180 |
-
ScopedMemoryAllocation::ScopedMemoryAllocation(ScopedMemoryAllocation&& other) noexcept
|
| 181 |
-
: memory_pool_pointer(other.memory_pool_pointer)
|
| 182 |
-
, allocated_pointer(other.allocated_pointer)
|
| 183 |
-
, allocation_size(other.allocation_size) {
|
| 184 |
-
|
| 185 |
-
other.memory_pool_pointer = nullptr;
|
| 186 |
-
other.allocated_pointer = nullptr;
|
| 187 |
-
other.allocation_size = 0;
|
| 188 |
-
}
|
| 189 |
-
|
| 190 |
-
ScopedMemoryAllocation& ScopedMemoryAllocation::operator=(ScopedMemoryAllocation&& other) noexcept {
|
| 191 |
-
if (this != &other) {
|
| 192 |
-
if (memory_pool_pointer != nullptr && allocated_pointer != nullptr) {
|
| 193 |
-
memory_pool_pointer->deallocate(allocated_pointer);
|
| 194 |
-
}
|
| 195 |
-
|
| 196 |
-
memory_pool_pointer = other.memory_pool_pointer;
|
| 197 |
-
allocated_pointer = other.allocated_pointer;
|
| 198 |
-
allocation_size = other.allocation_size;
|
| 199 |
-
|
| 200 |
-
other.memory_pool_pointer = nullptr;
|
| 201 |
-
other.allocated_pointer = nullptr;
|
| 202 |
-
other.allocation_size = 0;
|
| 203 |
-
}
|
| 204 |
-
|
| 205 |
-
return *this;
|
| 206 |
-
}
|
| 207 |
-
|
| 208 |
-
std::uint8_t* ScopedMemoryAllocation::get() const {
|
| 209 |
-
return allocated_pointer;
|
| 210 |
-
}
|
| 211 |
-
|
| 212 |
-
std::size_t ScopedMemoryAllocation::size() const {
|
| 213 |
-
return allocation_size;
|
| 214 |
-
}
|
| 215 |
-
|
| 216 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accelerator/src/thread_pool.cpp
DELETED
|
@@ -1,84 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
// SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
//
|
| 5 |
-
|
| 6 |
-
#include "thread_pool.hpp"
|
| 7 |
-
|
| 8 |
-
namespace pocket_tts_accelerator {
|
| 9 |
-
|
| 10 |
-
ThreadPool::ThreadPool(std::size_t number_of_threads)
|
| 11 |
-
: should_stop(false)
|
| 12 |
-
, is_stopped(false)
|
| 13 |
-
, thread_count(number_of_threads) {
|
| 14 |
-
|
| 15 |
-
worker_threads.reserve(number_of_threads);
|
| 16 |
-
|
| 17 |
-
for (std::size_t thread_index = 0; thread_index < number_of_threads; ++thread_index) {
|
| 18 |
-
worker_threads.emplace_back(&ThreadPool::worker_thread_function, this);
|
| 19 |
-
}
|
| 20 |
-
}
|
| 21 |
-
|
| 22 |
-
ThreadPool::~ThreadPool() {
|
| 23 |
-
shutdown();
|
| 24 |
-
}
|
| 25 |
-
|
| 26 |
-
void ThreadPool::shutdown() {
|
| 27 |
-
{
|
| 28 |
-
std::unique_lock<std::mutex> lock(queue_mutex);
|
| 29 |
-
|
| 30 |
-
if (is_stopped.load()) {
|
| 31 |
-
return;
|
| 32 |
-
}
|
| 33 |
-
|
| 34 |
-
should_stop.store(true);
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
task_available_condition.notify_all();
|
| 38 |
-
|
| 39 |
-
for (std::thread& worker_thread : worker_threads) {
|
| 40 |
-
if (worker_thread.joinable()) {
|
| 41 |
-
worker_thread.join();
|
| 42 |
-
}
|
| 43 |
-
}
|
| 44 |
-
|
| 45 |
-
is_stopped.store(true);
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
bool ThreadPool::is_running() const {
|
| 49 |
-
return !should_stop.load() && !is_stopped.load();
|
| 50 |
-
}
|
| 51 |
-
|
| 52 |
-
std::size_t ThreadPool::get_pending_task_count() const {
|
| 53 |
-
std::unique_lock<std::mutex> lock(queue_mutex);
|
| 54 |
-
return task_queue.size();
|
| 55 |
-
}
|
| 56 |
-
|
| 57 |
-
std::size_t ThreadPool::get_thread_count() const {
|
| 58 |
-
return thread_count;
|
| 59 |
-
}
|
| 60 |
-
|
| 61 |
-
void ThreadPool::worker_thread_function() {
|
| 62 |
-
while (true) {
|
| 63 |
-
std::function<void()> task_to_execute;
|
| 64 |
-
|
| 65 |
-
{
|
| 66 |
-
std::unique_lock<std::mutex> lock(queue_mutex);
|
| 67 |
-
|
| 68 |
-
task_available_condition.wait(lock, [this] {
|
| 69 |
-
return should_stop.load() || !task_queue.empty();
|
| 70 |
-
});
|
| 71 |
-
|
| 72 |
-
if (should_stop.load() && task_queue.empty()) {
|
| 73 |
-
return;
|
| 74 |
-
}
|
| 75 |
-
|
| 76 |
-
task_to_execute = std::move(task_queue.front());
|
| 77 |
-
task_queue.pop();
|
| 78 |
-
}
|
| 79 |
-
|
| 80 |
-
task_to_execute();
|
| 81 |
-
}
|
| 82 |
-
}
|
| 83 |
-
|
| 84 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
DELETED
|
@@ -1,372 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import atexit
|
| 7 |
-
import math
|
| 8 |
-
import torch
|
| 9 |
-
import gradio as gr
|
| 10 |
-
from config import (
|
| 11 |
-
AVAILABLE_VOICES,
|
| 12 |
-
DEFAULT_VOICE,
|
| 13 |
-
DEFAULT_MODEL_VARIANT,
|
| 14 |
-
DEFAULT_TEMPERATURE,
|
| 15 |
-
DEFAULT_LSD_DECODE_STEPS,
|
| 16 |
-
DEFAULT_EOS_THRESHOLD,
|
| 17 |
-
DEFAULT_NOISE_CLAMP,
|
| 18 |
-
DEFAULT_FRAMES_AFTER_EOS,
|
| 19 |
-
MAXIMUM_INPUT_LENGTH,
|
| 20 |
-
VOICE_MODE_PRESET,
|
| 21 |
-
VOICE_MODE_CLONE,
|
| 22 |
-
EXAMPLE_PROMPTS,
|
| 23 |
-
ACCELERATOR_ENABLED,
|
| 24 |
-
ACCELERATOR_WORKER_THREADS
|
| 25 |
-
)
|
| 26 |
-
from src.core.authentication import authenticate_huggingface
|
| 27 |
-
authenticate_huggingface()
|
| 28 |
-
if ACCELERATOR_ENABLED:
|
| 29 |
-
from src.accelerator.client import start_accelerator_daemon, stop_accelerator_daemon
|
| 30 |
-
accelerator_started = start_accelerator_daemon()
|
| 31 |
-
if accelerator_started:
|
| 32 |
-
print("Accelerator daemon started successfully", flush=True)
|
| 33 |
-
else:
|
| 34 |
-
print("Accelerator daemon not available, using Python fallback", flush=True)
|
| 35 |
-
atexit.register(stop_accelerator_daemon)
|
| 36 |
-
from src.core.memory import start_background_cleanup_thread
|
| 37 |
-
start_background_cleanup_thread()
|
| 38 |
-
from src.generation.handler import (
|
| 39 |
-
perform_speech_generation,
|
| 40 |
-
request_generation_stop
|
| 41 |
-
)
|
| 42 |
-
from src.ui.state import (
|
| 43 |
-
check_generate_button_state,
|
| 44 |
-
calculate_character_count_display,
|
| 45 |
-
determine_clear_button_visibility,
|
| 46 |
-
update_voice_mode_visibility
|
| 47 |
-
)
|
| 48 |
-
from src.ui.handlers import (
|
| 49 |
-
switch_to_generating_state,
|
| 50 |
-
switch_to_idle_state,
|
| 51 |
-
perform_clear_action,
|
| 52 |
-
create_example_handler,
|
| 53 |
-
format_example_button_label
|
| 54 |
-
)
|
| 55 |
-
from assets.css.styles import CSS
|
| 56 |
-
from assets.static.title import TITLE
|
| 57 |
-
from assets.static.header import HEADER
|
| 58 |
-
from assets.static.footer import FOOTER
|
| 59 |
-
from assets.static.sidebar import SIDEBAR
|
| 60 |
-
|
| 61 |
-
with gr.Blocks(css=CSS, fill_height=False, fill_width=True) as app:
|
| 62 |
-
torch.set_num_threads(ACCELERATOR_WORKER_THREADS)
|
| 63 |
-
torch.set_num_interop_threads(ACCELERATOR_WORKER_THREADS)
|
| 64 |
-
|
| 65 |
-
ui_state = gr.State({"generating": False})
|
| 66 |
-
|
| 67 |
-
with gr.Sidebar():
|
| 68 |
-
gr.HTML(SIDEBAR())
|
| 69 |
-
|
| 70 |
-
with gr.Column(elem_classes="header-section"):
|
| 71 |
-
gr.HTML(TITLE())
|
| 72 |
-
gr.HTML(HEADER())
|
| 73 |
-
|
| 74 |
-
with gr.Row():
|
| 75 |
-
with gr.Column():
|
| 76 |
-
audio_output_component = gr.Audio(
|
| 77 |
-
label="Generated Speech Output",
|
| 78 |
-
type="filepath",
|
| 79 |
-
interactive=False
|
| 80 |
-
)
|
| 81 |
-
|
| 82 |
-
with gr.Accordion("Voice Selection", open=True):
|
| 83 |
-
voice_mode_radio = gr.Radio(
|
| 84 |
-
label="Voice Mode",
|
| 85 |
-
choices=[
|
| 86 |
-
VOICE_MODE_PRESET,
|
| 87 |
-
VOICE_MODE_CLONE
|
| 88 |
-
],
|
| 89 |
-
value=VOICE_MODE_PRESET,
|
| 90 |
-
info="Choose between preset voices or clone a voice from uploaded audio",
|
| 91 |
-
elem_id="voice-mode"
|
| 92 |
-
)
|
| 93 |
-
|
| 94 |
-
with gr.Column(visible=True) as preset_voice_container:
|
| 95 |
-
voice_preset_dropdown = gr.Dropdown(
|
| 96 |
-
label="Select Preset Voice",
|
| 97 |
-
choices=AVAILABLE_VOICES,
|
| 98 |
-
value=DEFAULT_VOICE
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
with gr.Column(visible=False) as clone_voice_container:
|
| 102 |
-
voice_clone_audio_input = gr.Audio(
|
| 103 |
-
label="Upload Audio for Voice Cloning",
|
| 104 |
-
type="filepath"
|
| 105 |
-
)
|
| 106 |
-
|
| 107 |
-
with gr.Accordion("Model Parameters", open=False):
|
| 108 |
-
with gr.Row():
|
| 109 |
-
temperature_slider = gr.Slider(
|
| 110 |
-
label="Temperature",
|
| 111 |
-
minimum=0.1,
|
| 112 |
-
maximum=2.0,
|
| 113 |
-
step=0.05,
|
| 114 |
-
value=DEFAULT_TEMPERATURE,
|
| 115 |
-
info="Higher values produce more expressive speech"
|
| 116 |
-
)
|
| 117 |
-
|
| 118 |
-
lsd_decode_steps_slider = gr.Slider(
|
| 119 |
-
label="LSD Decode Steps",
|
| 120 |
-
minimum=1,
|
| 121 |
-
maximum=20,
|
| 122 |
-
step=1,
|
| 123 |
-
value=DEFAULT_LSD_DECODE_STEPS,
|
| 124 |
-
info="More steps may improve quality but slower"
|
| 125 |
-
)
|
| 126 |
-
|
| 127 |
-
with gr.Row():
|
| 128 |
-
noise_clamp_slider = gr.Slider(
|
| 129 |
-
label="Noise Clamp",
|
| 130 |
-
minimum=0.0,
|
| 131 |
-
maximum=2.0,
|
| 132 |
-
step=0.05,
|
| 133 |
-
value=DEFAULT_NOISE_CLAMP,
|
| 134 |
-
info="Maximum noise sampling value (0 = disabled)"
|
| 135 |
-
)
|
| 136 |
-
|
| 137 |
-
eos_threshold_slider = gr.Slider(
|
| 138 |
-
label="End of Sequence Threshold",
|
| 139 |
-
minimum=-10.0,
|
| 140 |
-
maximum=0.0,
|
| 141 |
-
step=0.25,
|
| 142 |
-
value=DEFAULT_EOS_THRESHOLD,
|
| 143 |
-
info="Smaller values cause earlier completion"
|
| 144 |
-
)
|
| 145 |
-
|
| 146 |
-
with gr.Accordion("Advanced Settings", open=False):
|
| 147 |
-
model_variant_textbox = gr.Textbox(
|
| 148 |
-
label="Model Variant Identifier",
|
| 149 |
-
value=DEFAULT_MODEL_VARIANT,
|
| 150 |
-
info="Model signature for generation"
|
| 151 |
-
)
|
| 152 |
-
|
| 153 |
-
with gr.Row():
|
| 154 |
-
enable_custom_frames_checkbox = gr.Checkbox(
|
| 155 |
-
label="Enable Custom Frames After EOS",
|
| 156 |
-
value=False,
|
| 157 |
-
info="Manually control post-EOS frame generation"
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
frames_after_eos_slider = gr.Slider(
|
| 161 |
-
label="Frames After EOS",
|
| 162 |
-
minimum=0,
|
| 163 |
-
maximum=100,
|
| 164 |
-
step=1,
|
| 165 |
-
value=DEFAULT_FRAMES_AFTER_EOS,
|
| 166 |
-
info="Additional frames after end-of-sequence (80ms per frame)"
|
| 167 |
-
)
|
| 168 |
-
|
| 169 |
-
with gr.Column(scale=1):
|
| 170 |
-
text_input_component = gr.Textbox(
|
| 171 |
-
label="Prompt",
|
| 172 |
-
placeholder="Enter the text you want to convert to speech...",
|
| 173 |
-
lines=2,
|
| 174 |
-
max_lines=20,
|
| 175 |
-
max_length=MAXIMUM_INPUT_LENGTH,
|
| 176 |
-
autoscroll=True
|
| 177 |
-
)
|
| 178 |
-
|
| 179 |
-
character_count_display = gr.HTML(
|
| 180 |
-
f"""
|
| 181 |
-
<div class="character-count">
|
| 182 |
-
<span>0 / {MAXIMUM_INPUT_LENGTH}</span>
|
| 183 |
-
</div>
|
| 184 |
-
""",
|
| 185 |
-
visible=False
|
| 186 |
-
)
|
| 187 |
-
|
| 188 |
-
generate_button = gr.Button(
|
| 189 |
-
"Generate",
|
| 190 |
-
variant="primary",
|
| 191 |
-
size="lg",
|
| 192 |
-
interactive=False
|
| 193 |
-
)
|
| 194 |
-
|
| 195 |
-
stop_button = gr.Button(
|
| 196 |
-
"Stop",
|
| 197 |
-
variant="stop",
|
| 198 |
-
size="lg",
|
| 199 |
-
visible=False
|
| 200 |
-
)
|
| 201 |
-
|
| 202 |
-
clear_button = gr.Button(
|
| 203 |
-
"Clear",
|
| 204 |
-
variant="secondary",
|
| 205 |
-
size="lg",
|
| 206 |
-
visible=False
|
| 207 |
-
)
|
| 208 |
-
|
| 209 |
-
gr.HTML(
|
| 210 |
-
"""
|
| 211 |
-
<div class="example-prompts">
|
| 212 |
-
<h3>Example Prompts</h3>
|
| 213 |
-
<p>Click any example to generate speech with its assigned voice</p>
|
| 214 |
-
</div>
|
| 215 |
-
"""
|
| 216 |
-
)
|
| 217 |
-
|
| 218 |
-
example_buttons_list = []
|
| 219 |
-
num_examples = len(EXAMPLE_PROMPTS)
|
| 220 |
-
examples_per_row = 2
|
| 221 |
-
num_rows = math.ceil(num_examples / examples_per_row)
|
| 222 |
-
|
| 223 |
-
for row_idx in range(num_rows):
|
| 224 |
-
with gr.Row():
|
| 225 |
-
start_idx = row_idx * examples_per_row
|
| 226 |
-
end_idx = min(start_idx + examples_per_row, num_examples)
|
| 227 |
-
for i in range(start_idx, end_idx):
|
| 228 |
-
btn = gr.Button(
|
| 229 |
-
format_example_button_label(
|
| 230 |
-
EXAMPLE_PROMPTS[i]["text"],
|
| 231 |
-
EXAMPLE_PROMPTS[i]["voice"]
|
| 232 |
-
),
|
| 233 |
-
size="sm",
|
| 234 |
-
variant="secondary"
|
| 235 |
-
)
|
| 236 |
-
example_buttons_list.append(btn)
|
| 237 |
-
|
| 238 |
-
gr.HTML(FOOTER())
|
| 239 |
-
|
| 240 |
-
generation_inputs = [
|
| 241 |
-
text_input_component,
|
| 242 |
-
voice_mode_radio,
|
| 243 |
-
voice_preset_dropdown,
|
| 244 |
-
voice_clone_audio_input,
|
| 245 |
-
model_variant_textbox,
|
| 246 |
-
lsd_decode_steps_slider,
|
| 247 |
-
temperature_slider,
|
| 248 |
-
noise_clamp_slider,
|
| 249 |
-
eos_threshold_slider,
|
| 250 |
-
frames_after_eos_slider,
|
| 251 |
-
enable_custom_frames_checkbox
|
| 252 |
-
]
|
| 253 |
-
|
| 254 |
-
voice_mode_radio.change(
|
| 255 |
-
fn=update_voice_mode_visibility,
|
| 256 |
-
inputs=[voice_mode_radio],
|
| 257 |
-
outputs=[
|
| 258 |
-
preset_voice_container,
|
| 259 |
-
clone_voice_container
|
| 260 |
-
]
|
| 261 |
-
)
|
| 262 |
-
|
| 263 |
-
text_input_component.change(
|
| 264 |
-
fn=calculate_character_count_display,
|
| 265 |
-
inputs=[text_input_component],
|
| 266 |
-
outputs=[character_count_display]
|
| 267 |
-
)
|
| 268 |
-
|
| 269 |
-
text_input_component.change(
|
| 270 |
-
fn=check_generate_button_state,
|
| 271 |
-
inputs=[
|
| 272 |
-
text_input_component,
|
| 273 |
-
ui_state
|
| 274 |
-
],
|
| 275 |
-
outputs=[generate_button]
|
| 276 |
-
)
|
| 277 |
-
|
| 278 |
-
text_input_component.change(
|
| 279 |
-
fn=determine_clear_button_visibility,
|
| 280 |
-
inputs=[
|
| 281 |
-
text_input_component,
|
| 282 |
-
ui_state
|
| 283 |
-
],
|
| 284 |
-
outputs=[clear_button]
|
| 285 |
-
)
|
| 286 |
-
|
| 287 |
-
generate_button.click(
|
| 288 |
-
fn=switch_to_generating_state,
|
| 289 |
-
inputs=[ui_state],
|
| 290 |
-
outputs=[
|
| 291 |
-
generate_button,
|
| 292 |
-
stop_button,
|
| 293 |
-
clear_button,
|
| 294 |
-
ui_state
|
| 295 |
-
]
|
| 296 |
-
).then(
|
| 297 |
-
fn=perform_speech_generation,
|
| 298 |
-
inputs=generation_inputs,
|
| 299 |
-
outputs=[audio_output_component]
|
| 300 |
-
).then(
|
| 301 |
-
fn=switch_to_idle_state,
|
| 302 |
-
inputs=[
|
| 303 |
-
text_input_component,
|
| 304 |
-
ui_state
|
| 305 |
-
],
|
| 306 |
-
outputs=[
|
| 307 |
-
generate_button,
|
| 308 |
-
stop_button,
|
| 309 |
-
clear_button,
|
| 310 |
-
ui_state
|
| 311 |
-
]
|
| 312 |
-
)
|
| 313 |
-
|
| 314 |
-
stop_button.click(
|
| 315 |
-
fn=request_generation_stop,
|
| 316 |
-
outputs=[stop_button]
|
| 317 |
-
)
|
| 318 |
-
|
| 319 |
-
clear_button.click(
|
| 320 |
-
fn=perform_clear_action,
|
| 321 |
-
outputs=[
|
| 322 |
-
text_input_component,
|
| 323 |
-
audio_output_component,
|
| 324 |
-
clear_button,
|
| 325 |
-
voice_mode_radio,
|
| 326 |
-
voice_preset_dropdown,
|
| 327 |
-
voice_clone_audio_input
|
| 328 |
-
]
|
| 329 |
-
)
|
| 330 |
-
|
| 331 |
-
for button_index, example_button in enumerate(example_buttons_list):
|
| 332 |
-
example_text = EXAMPLE_PROMPTS[button_index]["text"]
|
| 333 |
-
example_voice = EXAMPLE_PROMPTS[button_index]["voice"]
|
| 334 |
-
|
| 335 |
-
example_button.click(
|
| 336 |
-
fn=switch_to_generating_state,
|
| 337 |
-
inputs=[ui_state],
|
| 338 |
-
outputs=[
|
| 339 |
-
generate_button,
|
| 340 |
-
stop_button,
|
| 341 |
-
clear_button,
|
| 342 |
-
ui_state
|
| 343 |
-
]
|
| 344 |
-
).then(
|
| 345 |
-
fn=create_example_handler(example_text, example_voice),
|
| 346 |
-
outputs=[
|
| 347 |
-
text_input_component,
|
| 348 |
-
voice_mode_radio,
|
| 349 |
-
voice_preset_dropdown
|
| 350 |
-
]
|
| 351 |
-
).then(
|
| 352 |
-
fn=perform_speech_generation,
|
| 353 |
-
inputs=generation_inputs,
|
| 354 |
-
outputs=[audio_output_component]
|
| 355 |
-
).then(
|
| 356 |
-
fn=switch_to_idle_state,
|
| 357 |
-
inputs=[
|
| 358 |
-
text_input_component,
|
| 359 |
-
ui_state
|
| 360 |
-
],
|
| 361 |
-
outputs=[
|
| 362 |
-
generate_button,
|
| 363 |
-
stop_button,
|
| 364 |
-
clear_button,
|
| 365 |
-
ui_state
|
| 366 |
-
]
|
| 367 |
-
)
|
| 368 |
-
|
| 369 |
-
app.launch(
|
| 370 |
-
server_name="0.0.0.0",
|
| 371 |
-
max_file_size="1mb"
|
| 372 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/css/styles.py
DELETED
|
@@ -1,161 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# https://huggingface.co/spaces/D3vShoaib/pocket-tts
|
| 3 |
-
#
|
| 4 |
-
|
| 5 |
-
CSS = """
|
| 6 |
-
footer {
|
| 7 |
-
visibility: hidden;
|
| 8 |
-
}
|
| 9 |
-
|
| 10 |
-
.gradio-container {
|
| 11 |
-
max-width: 100% !important;
|
| 12 |
-
padding: 0 !important;
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
-
@media (min-width: 768px) {
|
| 16 |
-
.gradio-container {
|
| 17 |
-
padding-left: 2% !important;
|
| 18 |
-
padding-right: 2% !important;
|
| 19 |
-
}
|
| 20 |
-
}
|
| 21 |
-
|
| 22 |
-
.header-section {
|
| 23 |
-
text-align: left;
|
| 24 |
-
margin-bottom: 1.5rem;
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
.main-title {
|
| 28 |
-
color: #10b981;
|
| 29 |
-
font-weight: 800;
|
| 30 |
-
font-size: 1.8rem;
|
| 31 |
-
margin: 5px 0;
|
| 32 |
-
}
|
| 33 |
-
|
| 34 |
-
@media (min-width: 768px) {
|
| 35 |
-
.main-title {
|
| 36 |
-
font-size: 2.2rem;
|
| 37 |
-
}
|
| 38 |
-
}
|
| 39 |
-
|
| 40 |
-
.logo-container {
|
| 41 |
-
display: flex;
|
| 42 |
-
justify-content: flex-start;
|
| 43 |
-
align-items: center;
|
| 44 |
-
gap: 10px;
|
| 45 |
-
margin-bottom: 0;
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
.logo-img {
|
| 49 |
-
height: 40px;
|
| 50 |
-
border-radius: 8px;
|
| 51 |
-
}
|
| 52 |
-
|
| 53 |
-
@media (min-width: 768px) {
|
| 54 |
-
.logo-img {
|
| 55 |
-
height: 50px;
|
| 56 |
-
}
|
| 57 |
-
|
| 58 |
-
.logo-container {
|
| 59 |
-
gap: 15px;
|
| 60 |
-
}
|
| 61 |
-
}
|
| 62 |
-
|
| 63 |
-
.links-row {
|
| 64 |
-
display: flex;
|
| 65 |
-
flex-wrap: wrap;
|
| 66 |
-
justify-content: flex-start;
|
| 67 |
-
gap: 8px;
|
| 68 |
-
margin: 5px 0 10px 0;
|
| 69 |
-
font-size: 0.85rem;
|
| 70 |
-
}
|
| 71 |
-
|
| 72 |
-
@media (min-width: 768px) {
|
| 73 |
-
.links-row {
|
| 74 |
-
gap: 10px;
|
| 75 |
-
font-size: 0.9rem;
|
| 76 |
-
}
|
| 77 |
-
}
|
| 78 |
-
|
| 79 |
-
.links-row a {
|
| 80 |
-
color: #10b981;
|
| 81 |
-
text-decoration: none;
|
| 82 |
-
padding: 3px 10px;
|
| 83 |
-
border: 1px solid #10b981;
|
| 84 |
-
border-radius: 15px;
|
| 85 |
-
transition: all 0.2s;
|
| 86 |
-
white-space: nowrap;
|
| 87 |
-
}
|
| 88 |
-
|
| 89 |
-
.links-row a:hover {
|
| 90 |
-
background-color: #10b981;
|
| 91 |
-
color: white;
|
| 92 |
-
}
|
| 93 |
-
|
| 94 |
-
.disclaimer {
|
| 95 |
-
text-align: center;
|
| 96 |
-
font-size: 10px;
|
| 97 |
-
line-height: 1.4;
|
| 98 |
-
color: #9ca3af;
|
| 99 |
-
margin-top: 30px;
|
| 100 |
-
padding: 15px;
|
| 101 |
-
border-top: 1px solid currentColor;
|
| 102 |
-
}
|
| 103 |
-
|
| 104 |
-
@media (min-width: 768px) {
|
| 105 |
-
.disclaimer {
|
| 106 |
-
margin-top: 40px;
|
| 107 |
-
padding: 20px;
|
| 108 |
-
}
|
| 109 |
-
}
|
| 110 |
-
|
| 111 |
-
.disclaimer-copyright {
|
| 112 |
-
opacity: 0.8;
|
| 113 |
-
}
|
| 114 |
-
|
| 115 |
-
.disclaimer-warning {
|
| 116 |
-
font-size: 8px;
|
| 117 |
-
opacity: 0.7;
|
| 118 |
-
}
|
| 119 |
-
|
| 120 |
-
.accent-link {
|
| 121 |
-
color: #10b981;
|
| 122 |
-
text-decoration: none;
|
| 123 |
-
}
|
| 124 |
-
|
| 125 |
-
#voice-mode .wrap {
|
| 126 |
-
display: flex !important;
|
| 127 |
-
flex-direction: row !important;
|
| 128 |
-
width: 100% !important;
|
| 129 |
-
}
|
| 130 |
-
|
| 131 |
-
#voice-mode .wrap label {
|
| 132 |
-
flex: 1 !important;
|
| 133 |
-
justify-content: center !important;
|
| 134 |
-
text-align: center !important;
|
| 135 |
-
}
|
| 136 |
-
|
| 137 |
-
.example-prompts {
|
| 138 |
-
padding: 16px 0 8px 0;
|
| 139 |
-
}
|
| 140 |
-
|
| 141 |
-
.example-prompts h3 {
|
| 142 |
-
margin: 0 0 8px 0;
|
| 143 |
-
font-size: 1.1em;
|
| 144 |
-
}
|
| 145 |
-
|
| 146 |
-
.example-prompts p {
|
| 147 |
-
margin: 0;
|
| 148 |
-
opacity: 0.7;
|
| 149 |
-
font-size: 0.9em;
|
| 150 |
-
}
|
| 151 |
-
|
| 152 |
-
.character-count {
|
| 153 |
-
text-align: right;
|
| 154 |
-
padding: 4px 0;
|
| 155 |
-
}
|
| 156 |
-
|
| 157 |
-
.character-count span {
|
| 158 |
-
color: var(--body-text-color-subdued);
|
| 159 |
-
font-size: 0.85em;
|
| 160 |
-
}
|
| 161 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/static/footer.py
DELETED
|
@@ -1,32 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
from config import (
|
| 7 |
-
COPYRIGHT_NAME,
|
| 8 |
-
COPYRIGHT_URL,
|
| 9 |
-
DESIGN_BY_NAME,
|
| 10 |
-
DESIGN_BY_URL
|
| 11 |
-
)
|
| 12 |
-
|
| 13 |
-
def FOOTER():
|
| 14 |
-
return f"""
|
| 15 |
-
<div class="disclaimer">
|
| 16 |
-
<br>
|
| 17 |
-
<p class="disclaimer-copyright">
|
| 18 |
-
Copyright © 2026
|
| 19 |
-
<a href="{COPYRIGHT_URL}" target="_blank" class="accent-link">
|
| 20 |
-
{COPYRIGHT_NAME}
|
| 21 |
-
</a>,
|
| 22 |
-
design inspired by
|
| 23 |
-
<a href="{DESIGN_BY_URL}" target="_blank" class="accent-link">
|
| 24 |
-
{DESIGN_BY_NAME}
|
| 25 |
-
</a>.
|
| 26 |
-
</p>
|
| 27 |
-
|
| 28 |
-
<p class="disclaimer-warning">
|
| 29 |
-
⚠️ This Space is not affiliated with Kyutai TTS and is provided for demonstration purposes only.
|
| 30 |
-
</p>
|
| 31 |
-
</div>
|
| 32 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/static/header.py
DELETED
|
@@ -1,18 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
from config import HEADER_LINKS
|
| 7 |
-
|
| 8 |
-
def HEADER():
|
| 9 |
-
data = ""
|
| 10 |
-
|
| 11 |
-
for link in HEADER_LINKS:
|
| 12 |
-
data += f'<a href="{link["url"]}" target="_blank">{link["icon"]} {link["text"]}</a>\n'
|
| 13 |
-
|
| 14 |
-
return f"""
|
| 15 |
-
<div class="links-row">
|
| 16 |
-
{data}
|
| 17 |
-
</div>
|
| 18 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/static/sidebar.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
def SIDEBAR():
|
| 7 |
-
return f"""
|
| 8 |
-
<h1>
|
| 9 |
-
Audio Generation Playground part of the
|
| 10 |
-
<a href="https://huggingface.co/spaces/hadadxyz/ai" target="_blank" class="accent-link">
|
| 11 |
-
Demo Playground
|
| 12 |
-
</a>,
|
| 13 |
-
and the
|
| 14 |
-
<a href="https://huggingface.co/umint" target="_blank" class="accent-link">
|
| 15 |
-
UltimaX Intelligence
|
| 16 |
-
</a>
|
| 17 |
-
project.
|
| 18 |
-
</h1><br />
|
| 19 |
-
|
| 20 |
-
<p>
|
| 21 |
-
This Space runs the
|
| 22 |
-
<b>
|
| 23 |
-
<a href="https://huggingface.co/kyutai/pocket-tts" target="_blank" class="accent-link">
|
| 24 |
-
Pocket TTS
|
| 25 |
-
</a>
|
| 26 |
-
</b>
|
| 27 |
-
model from <b>Kyutai</b>.<br /><br />
|
| 28 |
-
|
| 29 |
-
A lightweight text-to-speech (TTS) application designed to run
|
| 30 |
-
efficiently on CPUs. Forget about the hassle of using GPUs and
|
| 31 |
-
web APIs serving TTS models.<br /><br />
|
| 32 |
-
|
| 33 |
-
Additionally, this Space uses a custom Docker image to
|
| 34 |
-
maximize model performance and is optimized for the
|
| 35 |
-
constraints of Hugging Face Spaces.
|
| 36 |
-
</p><br />
|
| 37 |
-
|
| 38 |
-
<p>
|
| 39 |
-
<b>Like this project?</b> You can support me by buying a
|
| 40 |
-
<a href="https://ko-fi.com/hadad" target="_blank" class="accent-link">
|
| 41 |
-
coffee
|
| 42 |
-
</a>.
|
| 43 |
-
</p>
|
| 44 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/static/title.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
from config import KYUTAI_LOGO_URL, POCKET_TTS_LOGO_URL, SPACE_INFO
|
| 7 |
-
|
| 8 |
-
def TITLE():
|
| 9 |
-
return f"""
|
| 10 |
-
<div class="logo-container">
|
| 11 |
-
<img src="{KYUTAI_LOGO_URL}" class="logo-img" alt="Kyutai Logo">
|
| 12 |
-
<img src="{POCKET_TTS_LOGO_URL}" class="logo-img" alt="PocketTTS Logo">
|
| 13 |
-
<h1 class='main-title'>{SPACE_INFO}</h1>
|
| 14 |
-
</div>
|
| 15 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.py
DELETED
|
@@ -1,126 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import os
|
| 7 |
-
|
| 8 |
-
HF_TOKEN = os.getenv("HF_TOKEN", None)
|
| 9 |
-
|
| 10 |
-
AVAILABLE_VOICES = [
|
| 11 |
-
"alba",
|
| 12 |
-
"marius",
|
| 13 |
-
"javert",
|
| 14 |
-
"jean",
|
| 15 |
-
"fantine",
|
| 16 |
-
"cosette",
|
| 17 |
-
"eponine",
|
| 18 |
-
"azelma"
|
| 19 |
-
]
|
| 20 |
-
|
| 21 |
-
DEFAULT_VOICE = "alba"
|
| 22 |
-
DEFAULT_MODEL_VARIANT = "b6369a24"
|
| 23 |
-
DEFAULT_TEMPERATURE = 0.7
|
| 24 |
-
DEFAULT_LSD_DECODE_STEPS = 1
|
| 25 |
-
DEFAULT_EOS_THRESHOLD = -4.0
|
| 26 |
-
DEFAULT_NOISE_CLAMP = 0.0
|
| 27 |
-
DEFAULT_FRAMES_AFTER_EOS = 10
|
| 28 |
-
|
| 29 |
-
VOICE_MODE_PRESET = "Preset Voices"
|
| 30 |
-
VOICE_MODE_CLONE = "Voice Cloning"
|
| 31 |
-
|
| 32 |
-
VOICE_STATE_CACHE_MAXIMUM_SIZE = 8
|
| 33 |
-
VOICE_STATE_CACHE_CLEANUP_THRESHOLD = 4
|
| 34 |
-
|
| 35 |
-
BACKGROUND_CLEANUP_INTERVAL = 300
|
| 36 |
-
|
| 37 |
-
MAXIMUM_INPUT_LENGTH = 1000
|
| 38 |
-
|
| 39 |
-
TEMPORARY_FILE_LIFETIME_SECONDS = 7200
|
| 40 |
-
|
| 41 |
-
MAXIMUM_MEMORY_USAGE = 1 * 1024 * 1024 * 1024
|
| 42 |
-
MEMORY_WARNING_THRESHOLD = int(0.7 * MAXIMUM_MEMORY_USAGE)
|
| 43 |
-
MEMORY_CRITICAL_THRESHOLD = int(0.85 * MAXIMUM_MEMORY_USAGE)
|
| 44 |
-
MEMORY_CHECK_INTERVAL = 30
|
| 45 |
-
MEMORY_IDLE_TARGET = int(0.5 * MAXIMUM_MEMORY_USAGE)
|
| 46 |
-
|
| 47 |
-
MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES = 1 * 1024 * 1024
|
| 48 |
-
|
| 49 |
-
AUDIO_CONVERSION_QUEUE_TIMEOUT_SECONDS = 60
|
| 50 |
-
|
| 51 |
-
MODEL_LOAD_RETRY_ATTEMPTS = 3
|
| 52 |
-
|
| 53 |
-
SUPPORTED_AUDIO_EXTENSIONS = [
|
| 54 |
-
".wav",
|
| 55 |
-
".mp3",
|
| 56 |
-
".flac",
|
| 57 |
-
".ogg",
|
| 58 |
-
".m4a",
|
| 59 |
-
".aac",
|
| 60 |
-
".wma",
|
| 61 |
-
".aiff",
|
| 62 |
-
".aif",
|
| 63 |
-
".opus",
|
| 64 |
-
".webm",
|
| 65 |
-
".mp4",
|
| 66 |
-
".mkv",
|
| 67 |
-
".avi",
|
| 68 |
-
".mov",
|
| 69 |
-
".3gp"
|
| 70 |
-
]
|
| 71 |
-
|
| 72 |
-
AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES = {
|
| 73 |
-
"m4a": "M4A/AAC",
|
| 74 |
-
"aif": "AIFF",
|
| 75 |
-
"3gp": "3GP"
|
| 76 |
-
}
|
| 77 |
-
|
| 78 |
-
EXAMPLE_PROMPTS = [
|
| 79 |
-
{
|
| 80 |
-
"text": "The quick brown fox jumps over the lazy dog near the riverbank.",
|
| 81 |
-
"voice": "alba"
|
| 82 |
-
},
|
| 83 |
-
{
|
| 84 |
-
"text": "Welcome to the future of text to speech technology powered by artificial intelligence.",
|
| 85 |
-
"voice": "marius"
|
| 86 |
-
},
|
| 87 |
-
{
|
| 88 |
-
"text": "Technology continues to push the boundaries of what we thought was possible.",
|
| 89 |
-
"voice": "javert"
|
| 90 |
-
},
|
| 91 |
-
{
|
| 92 |
-
"text": "The weather today is absolutely beautiful and perfect for a relaxing walk outside.",
|
| 93 |
-
"voice": "fantine"
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"text": "Science and innovation are transforming how we interact with the world around us.",
|
| 97 |
-
"voice": "jean"
|
| 98 |
-
}
|
| 99 |
-
]
|
| 100 |
-
|
| 101 |
-
KYUTAI_LOGO_URL = "https://cdn-avatars.huggingface.co/v1/production/uploads/6355a3c1805be5a8f30fea49/8xGdIOlfkopZfhbMitw_k.jpeg"
|
| 102 |
-
POCKET_TTS_LOGO_URL = "https://raw.githubusercontent.com/kyutai-labs/pocket-tts/refs/heads/main/docs/logo.png"
|
| 103 |
-
|
| 104 |
-
SPACE_INFO = "Pocket TTS"
|
| 105 |
-
|
| 106 |
-
HEADER_LINKS = [
|
| 107 |
-
{"icon": "🔊", "text": "Demo", "url": "https://kyutai.org/tts"},
|
| 108 |
-
{"icon": "🐱💻", "text": "GitHub", "url": "https://github.com/kyutai-labs/pocket-tts"},
|
| 109 |
-
{"icon": "🤗", "text": "Model Card", "url": "https://huggingface.co/kyutai/pocket-tts"},
|
| 110 |
-
{"icon": "🤗", "text": "Space", "url": "https://huggingface.co/spaces/hadadxyz/pocket-tts-hf-cpu-optimized"},
|
| 111 |
-
{"icon": "📄", "text": "Paper", "url": "https://arxiv.org/abs/2509.06926"},
|
| 112 |
-
{"icon": "📚", "text": "Docs", "url": "https://github.com/kyutai-labs/pocket-tts/tree/main/docs"},
|
| 113 |
-
]
|
| 114 |
-
|
| 115 |
-
COPYRIGHT_NAME = "Hadad Darajat"
|
| 116 |
-
COPYRIGHT_URL = "https://www.linkedin.com/in/hadadrjt"
|
| 117 |
-
|
| 118 |
-
DESIGN_BY_NAME = "D3vShoaib/pocket-tts"
|
| 119 |
-
DESIGN_BY_URL = f"https://huggingface.co/spaces/{DESIGN_BY_NAME}"
|
| 120 |
-
|
| 121 |
-
ACCELERATOR_SOCKET_PATH = "/app/pocket_tts_accelerator.sock"
|
| 122 |
-
ACCELERATOR_BINARY_PATH = "/app/bin/pocket_tts_accelerator"
|
| 123 |
-
ACCELERATOR_WORKER_THREADS = 1
|
| 124 |
-
ACCELERATOR_MEMORY_POOL_MB = 64
|
| 125 |
-
ACCELERATOR_LOG_PREFIX = "[ACCELERATOR]"
|
| 126 |
-
ACCELERATOR_ENABLED = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/accelerator/client.py
DELETED
|
@@ -1,583 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import os
|
| 7 |
-
import socket
|
| 8 |
-
import struct
|
| 9 |
-
import subprocess
|
| 10 |
-
import tempfile
|
| 11 |
-
import threading
|
| 12 |
-
import sys
|
| 13 |
-
from typing import Optional, Tuple, Dict, Any
|
| 14 |
-
from config import (
|
| 15 |
-
ACCELERATOR_SOCKET_PATH,
|
| 16 |
-
ACCELERATOR_BINARY_PATH,
|
| 17 |
-
ACCELERATOR_WORKER_THREADS,
|
| 18 |
-
ACCELERATOR_MEMORY_POOL_MB,
|
| 19 |
-
ACCELERATOR_LOG_PREFIX
|
| 20 |
-
)
|
| 21 |
-
from ..core.state import (
|
| 22 |
-
accelerator_log_lock,
|
| 23 |
-
accelerator_log_thread,
|
| 24 |
-
accelerator_log_stop_event
|
| 25 |
-
)
|
| 26 |
-
|
| 27 |
-
PROTOCOL_MAGIC_NUMBER = 0x50545453
|
| 28 |
-
|
| 29 |
-
COMMAND_PING = 0
|
| 30 |
-
COMMAND_PROCESS_AUDIO = 1
|
| 31 |
-
COMMAND_CONVERT_TO_MONO = 2
|
| 32 |
-
COMMAND_CONVERT_TO_PCM = 3
|
| 33 |
-
COMMAND_RESAMPLE_AUDIO = 4
|
| 34 |
-
COMMAND_GET_MEMORY_STATS = 5
|
| 35 |
-
COMMAND_CLEAR_MEMORY_POOL = 6
|
| 36 |
-
COMMAND_SHUTDOWN = 7
|
| 37 |
-
|
| 38 |
-
RESPONSE_SUCCESS = 0
|
| 39 |
-
RESPONSE_ERROR_INVALID_COMMAND = 1
|
| 40 |
-
RESPONSE_ERROR_FILE_NOT_FOUND = 2
|
| 41 |
-
RESPONSE_ERROR_PROCESSING_FAILED = 3
|
| 42 |
-
RESPONSE_ERROR_MEMORY_ALLOCATION = 4
|
| 43 |
-
RESPONSE_ERROR_INTERNAL = 5
|
| 44 |
-
|
| 45 |
-
REQUEST_HEADER_FORMAT = "=IIII"
|
| 46 |
-
RESPONSE_HEADER_FORMAT = "=IIII"
|
| 47 |
-
REQUEST_HEADER_SIZE = struct.calcsize(REQUEST_HEADER_FORMAT)
|
| 48 |
-
RESPONSE_HEADER_SIZE = struct.calcsize(RESPONSE_HEADER_FORMAT)
|
| 49 |
-
|
| 50 |
-
PROCESS_AUDIO_REQUEST_FORMAT = "=512s512sII"
|
| 51 |
-
PROCESS_AUDIO_REQUEST_SIZE = struct.calcsize(PROCESS_AUDIO_REQUEST_FORMAT)
|
| 52 |
-
|
| 53 |
-
MEMORY_STATS_RESPONSE_FORMAT = "=QQQ"
|
| 54 |
-
MEMORY_STATS_RESPONSE_SIZE = struct.calcsize(MEMORY_STATS_RESPONSE_FORMAT)
|
| 55 |
-
|
| 56 |
-
accelerator_process_handle = None
|
| 57 |
-
accelerator_process_lock = threading.Lock()
|
| 58 |
-
request_id_counter = 0
|
| 59 |
-
request_id_lock = threading.Lock()
|
| 60 |
-
|
| 61 |
-
def log_accelerator_message(message: str):
|
| 62 |
-
with accelerator_log_lock:
|
| 63 |
-
print(f"{ACCELERATOR_LOG_PREFIX} {message}", flush=True)
|
| 64 |
-
|
| 65 |
-
def stream_accelerator_output(process_handle: subprocess.Popen):
|
| 66 |
-
try:
|
| 67 |
-
while not accelerator_log_stop_event.is_set():
|
| 68 |
-
if process_handle.poll() is not None:
|
| 69 |
-
break
|
| 70 |
-
|
| 71 |
-
if process_handle.stdout:
|
| 72 |
-
line = process_handle.stdout.readline()
|
| 73 |
-
|
| 74 |
-
if line:
|
| 75 |
-
decoded_line = line.decode("utf-8", errors="replace").rstrip()
|
| 76 |
-
|
| 77 |
-
if decoded_line:
|
| 78 |
-
log_accelerator_message(decoded_line)
|
| 79 |
-
|
| 80 |
-
except Exception as stream_error:
|
| 81 |
-
log_accelerator_message(f"Log stream error: {str(stream_error)}")
|
| 82 |
-
|
| 83 |
-
def stream_accelerator_stderr(process_handle: subprocess.Popen):
|
| 84 |
-
try:
|
| 85 |
-
while not accelerator_log_stop_event.is_set():
|
| 86 |
-
if process_handle.poll() is not None:
|
| 87 |
-
break
|
| 88 |
-
|
| 89 |
-
if process_handle.stderr:
|
| 90 |
-
line = process_handle.stderr.readline()
|
| 91 |
-
|
| 92 |
-
if line:
|
| 93 |
-
decoded_line = line.decode("utf-8", errors="replace").rstrip()
|
| 94 |
-
|
| 95 |
-
if decoded_line:
|
| 96 |
-
log_accelerator_message(f"[STDERR] {decoded_line}")
|
| 97 |
-
|
| 98 |
-
except Exception as stream_error:
|
| 99 |
-
log_accelerator_message(f"Stderr stream error: {str(stream_error)}")
|
| 100 |
-
|
| 101 |
-
class AcceleratorClient:
|
| 102 |
-
def __init__(self, socket_path: str = ACCELERATOR_SOCKET_PATH):
|
| 103 |
-
self.socket_path = socket_path
|
| 104 |
-
self.connection_timeout = 5.0
|
| 105 |
-
self.read_timeout = 30.0
|
| 106 |
-
|
| 107 |
-
def is_connected(self) -> bool:
|
| 108 |
-
try:
|
| 109 |
-
response = self.send_ping()
|
| 110 |
-
return response is not None and response.startswith(b"PONG")
|
| 111 |
-
|
| 112 |
-
except Exception:
|
| 113 |
-
return False
|
| 114 |
-
|
| 115 |
-
def send_ping(self) -> Optional[bytes]:
|
| 116 |
-
return self._send_command(COMMAND_PING, b"")
|
| 117 |
-
|
| 118 |
-
def process_audio(
|
| 119 |
-
self,
|
| 120 |
-
input_file_path: str,
|
| 121 |
-
output_file_path: str,
|
| 122 |
-
target_sample_rate: int = 0,
|
| 123 |
-
options_flags: int = 0
|
| 124 |
-
) -> Tuple[bool, str]:
|
| 125 |
-
payload = self._pack_process_audio_request(
|
| 126 |
-
input_file_path,
|
| 127 |
-
output_file_path,
|
| 128 |
-
target_sample_rate,
|
| 129 |
-
options_flags
|
| 130 |
-
)
|
| 131 |
-
|
| 132 |
-
log_accelerator_message(f"Processing audio: {input_file_path} -> {output_file_path}")
|
| 133 |
-
|
| 134 |
-
response = self._send_command(COMMAND_PROCESS_AUDIO, payload)
|
| 135 |
-
|
| 136 |
-
if response is None:
|
| 137 |
-
log_accelerator_message("Failed to communicate with accelerator for process_audio")
|
| 138 |
-
return False, "Failed to communicate with accelerator"
|
| 139 |
-
|
| 140 |
-
response_string = response.decode("utf-8", errors="ignore")
|
| 141 |
-
|
| 142 |
-
if response_string.startswith("SUCCESS:"):
|
| 143 |
-
log_accelerator_message(f"Audio processing succeeded: {response_string[8:]}")
|
| 144 |
-
return True, response_string[8:]
|
| 145 |
-
|
| 146 |
-
elif response_string.startswith("ERROR:"):
|
| 147 |
-
log_accelerator_message(f"Audio processing failed: {response_string[6:]}")
|
| 148 |
-
return False, response_string[6:]
|
| 149 |
-
|
| 150 |
-
else:
|
| 151 |
-
log_accelerator_message(f"Audio processing unknown response: {response_string}")
|
| 152 |
-
return False, response_string
|
| 153 |
-
|
| 154 |
-
def convert_to_mono(
|
| 155 |
-
self,
|
| 156 |
-
input_file_path: str,
|
| 157 |
-
output_file_path: str
|
| 158 |
-
) -> Tuple[bool, str]:
|
| 159 |
-
payload = self._pack_process_audio_request(
|
| 160 |
-
input_file_path,
|
| 161 |
-
output_file_path,
|
| 162 |
-
0,
|
| 163 |
-
0
|
| 164 |
-
)
|
| 165 |
-
|
| 166 |
-
log_accelerator_message(f"Converting to mono: {input_file_path} -> {output_file_path}")
|
| 167 |
-
|
| 168 |
-
response = self._send_command(COMMAND_CONVERT_TO_MONO, payload)
|
| 169 |
-
|
| 170 |
-
if response is None:
|
| 171 |
-
log_accelerator_message("Failed to communicate with accelerator for convert_to_mono")
|
| 172 |
-
return False, "Failed to communicate with accelerator"
|
| 173 |
-
|
| 174 |
-
response_string = response.decode("utf-8", errors="ignore")
|
| 175 |
-
|
| 176 |
-
if response_string.startswith("SUCCESS:"):
|
| 177 |
-
log_accelerator_message(f"Mono conversion succeeded: {response_string[8:]}")
|
| 178 |
-
return True, response_string[8:]
|
| 179 |
-
|
| 180 |
-
elif response_string.startswith("ERROR:"):
|
| 181 |
-
log_accelerator_message(f"Mono conversion failed: {response_string[6:]}")
|
| 182 |
-
return False, response_string[6:]
|
| 183 |
-
|
| 184 |
-
else:
|
| 185 |
-
log_accelerator_message(f"Mono conversion unknown response: {response_string}")
|
| 186 |
-
return False, response_string
|
| 187 |
-
|
| 188 |
-
def convert_to_pcm(
|
| 189 |
-
self,
|
| 190 |
-
input_file_path: str,
|
| 191 |
-
output_file_path: str
|
| 192 |
-
) -> Tuple[bool, str]:
|
| 193 |
-
payload = self._pack_process_audio_request(
|
| 194 |
-
input_file_path,
|
| 195 |
-
output_file_path,
|
| 196 |
-
0,
|
| 197 |
-
0
|
| 198 |
-
)
|
| 199 |
-
|
| 200 |
-
log_accelerator_message(f"Converting to PCM: {input_file_path} -> {output_file_path}")
|
| 201 |
-
|
| 202 |
-
response = self._send_command(COMMAND_CONVERT_TO_PCM, payload)
|
| 203 |
-
|
| 204 |
-
if response is None:
|
| 205 |
-
log_accelerator_message("Failed to communicate with accelerator for convert_to_pcm")
|
| 206 |
-
return False, "Failed to communicate with accelerator"
|
| 207 |
-
|
| 208 |
-
response_string = response.decode("utf-8", errors="ignore")
|
| 209 |
-
|
| 210 |
-
if response_string.startswith("SUCCESS:"):
|
| 211 |
-
log_accelerator_message(f"PCM conversion succeeded: {response_string[8:]}")
|
| 212 |
-
return True, response_string[8:]
|
| 213 |
-
|
| 214 |
-
elif response_string.startswith("ERROR:"):
|
| 215 |
-
log_accelerator_message(f"PCM conversion failed: {response_string[6:]}")
|
| 216 |
-
return False, response_string[6:]
|
| 217 |
-
|
| 218 |
-
else:
|
| 219 |
-
log_accelerator_message(f"PCM conversion unknown response: {response_string}")
|
| 220 |
-
return False, response_string
|
| 221 |
-
|
| 222 |
-
def resample_audio(
|
| 223 |
-
self,
|
| 224 |
-
input_file_path: str,
|
| 225 |
-
output_file_path: str,
|
| 226 |
-
target_sample_rate: int
|
| 227 |
-
) -> Tuple[bool, str]:
|
| 228 |
-
payload = self._pack_process_audio_request(
|
| 229 |
-
input_file_path,
|
| 230 |
-
output_file_path,
|
| 231 |
-
target_sample_rate,
|
| 232 |
-
0
|
| 233 |
-
)
|
| 234 |
-
|
| 235 |
-
log_accelerator_message(f"Resampling audio to {target_sample_rate}Hz: {input_file_path} -> {output_file_path}")
|
| 236 |
-
|
| 237 |
-
response = self._send_command(COMMAND_RESAMPLE_AUDIO, payload)
|
| 238 |
-
|
| 239 |
-
if response is None:
|
| 240 |
-
log_accelerator_message("Failed to communicate with accelerator for resample_audio")
|
| 241 |
-
return False, "Failed to communicate with accelerator"
|
| 242 |
-
|
| 243 |
-
response_string = response.decode("utf-8", errors="ignore")
|
| 244 |
-
|
| 245 |
-
if response_string.startswith("SUCCESS:"):
|
| 246 |
-
log_accelerator_message(f"Resampling succeeded: {response_string[8:]}")
|
| 247 |
-
return True, response_string[8:]
|
| 248 |
-
|
| 249 |
-
elif response_string.startswith("ERROR:"):
|
| 250 |
-
log_accelerator_message(f"Resampling failed: {response_string[6:]}")
|
| 251 |
-
return False, response_string[6:]
|
| 252 |
-
|
| 253 |
-
else:
|
| 254 |
-
log_accelerator_message(f"Resampling unknown response: {response_string}")
|
| 255 |
-
return False, response_string
|
| 256 |
-
|
| 257 |
-
def get_memory_stats(self) -> Optional[Dict[str, int]]:
|
| 258 |
-
response = self._send_command(COMMAND_GET_MEMORY_STATS, b"")
|
| 259 |
-
|
| 260 |
-
if response is None or len(response) < MEMORY_STATS_RESPONSE_SIZE:
|
| 261 |
-
log_accelerator_message("Failed to get memory stats from accelerator")
|
| 262 |
-
return None
|
| 263 |
-
|
| 264 |
-
total_allocated, total_used, block_count = struct.unpack(
|
| 265 |
-
MEMORY_STATS_RESPONSE_FORMAT,
|
| 266 |
-
response[:MEMORY_STATS_RESPONSE_SIZE]
|
| 267 |
-
)
|
| 268 |
-
|
| 269 |
-
stats = {
|
| 270 |
-
"total_allocated_bytes": total_allocated,
|
| 271 |
-
"total_used_bytes": total_used,
|
| 272 |
-
"block_count": block_count
|
| 273 |
-
}
|
| 274 |
-
|
| 275 |
-
log_accelerator_message(f"Memory stats: allocated={total_allocated}, used={total_used}, blocks={block_count}")
|
| 276 |
-
|
| 277 |
-
return stats
|
| 278 |
-
|
| 279 |
-
def clear_memory_pool(self) -> bool:
|
| 280 |
-
log_accelerator_message("Clearing accelerator memory pool")
|
| 281 |
-
response = self._send_command(COMMAND_CLEAR_MEMORY_POOL, b"")
|
| 282 |
-
success = response is not None
|
| 283 |
-
if success:
|
| 284 |
-
log_accelerator_message("Memory pool cleared successfully")
|
| 285 |
-
else:
|
| 286 |
-
log_accelerator_message("Failed to clear memory pool")
|
| 287 |
-
return success
|
| 288 |
-
|
| 289 |
-
def shutdown_accelerator(self) -> bool:
|
| 290 |
-
log_accelerator_message("Sending shutdown command to accelerator")
|
| 291 |
-
response = self._send_command(COMMAND_SHUTDOWN, b"")
|
| 292 |
-
return response is not None
|
| 293 |
-
|
| 294 |
-
def _get_next_request_id(self) -> int:
|
| 295 |
-
global request_id_counter
|
| 296 |
-
|
| 297 |
-
with request_id_lock:
|
| 298 |
-
request_id_counter += 1
|
| 299 |
-
return request_id_counter
|
| 300 |
-
|
| 301 |
-
def _pack_process_audio_request(
|
| 302 |
-
self,
|
| 303 |
-
input_path: str,
|
| 304 |
-
output_path: str,
|
| 305 |
-
target_sample_rate: int,
|
| 306 |
-
options_flags: int
|
| 307 |
-
) -> bytes:
|
| 308 |
-
input_path_bytes = input_path.encode("utf-8")[:511] + b"\x00"
|
| 309 |
-
output_path_bytes = output_path.encode("utf-8")[:511] + b"\x00"
|
| 310 |
-
|
| 311 |
-
input_path_padded = input_path_bytes.ljust(512, b"\x00")
|
| 312 |
-
output_path_padded = output_path_bytes.ljust(512, b"\x00")
|
| 313 |
-
|
| 314 |
-
return struct.pack(
|
| 315 |
-
PROCESS_AUDIO_REQUEST_FORMAT,
|
| 316 |
-
input_path_padded,
|
| 317 |
-
output_path_padded,
|
| 318 |
-
target_sample_rate,
|
| 319 |
-
options_flags
|
| 320 |
-
)
|
| 321 |
-
|
| 322 |
-
def _send_command(
|
| 323 |
-
self,
|
| 324 |
-
command_type: int,
|
| 325 |
-
payload: bytes
|
| 326 |
-
) -> Optional[bytes]:
|
| 327 |
-
try:
|
| 328 |
-
client_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
| 329 |
-
client_socket.settimeout(self.connection_timeout)
|
| 330 |
-
client_socket.connect(self.socket_path)
|
| 331 |
-
|
| 332 |
-
request_id = self._get_next_request_id()
|
| 333 |
-
|
| 334 |
-
request_header = struct.pack(
|
| 335 |
-
REQUEST_HEADER_FORMAT,
|
| 336 |
-
PROTOCOL_MAGIC_NUMBER,
|
| 337 |
-
command_type,
|
| 338 |
-
len(payload),
|
| 339 |
-
request_id
|
| 340 |
-
)
|
| 341 |
-
|
| 342 |
-
client_socket.sendall(request_header)
|
| 343 |
-
|
| 344 |
-
if payload:
|
| 345 |
-
client_socket.sendall(payload)
|
| 346 |
-
|
| 347 |
-
client_socket.settimeout(self.read_timeout)
|
| 348 |
-
|
| 349 |
-
response_header_data = self._receive_exactly(client_socket, RESPONSE_HEADER_SIZE)
|
| 350 |
-
|
| 351 |
-
if response_header_data is None:
|
| 352 |
-
client_socket.close()
|
| 353 |
-
return None
|
| 354 |
-
|
| 355 |
-
magic_number, status_code, payload_size, response_request_id = struct.unpack(
|
| 356 |
-
RESPONSE_HEADER_FORMAT,
|
| 357 |
-
response_header_data
|
| 358 |
-
)
|
| 359 |
-
|
| 360 |
-
if magic_number != PROTOCOL_MAGIC_NUMBER:
|
| 361 |
-
log_accelerator_message(f"Invalid magic number in response: {magic_number}")
|
| 362 |
-
client_socket.close()
|
| 363 |
-
return None
|
| 364 |
-
|
| 365 |
-
if response_request_id != request_id:
|
| 366 |
-
log_accelerator_message(f"Request ID mismatch: expected {request_id}, got {response_request_id}")
|
| 367 |
-
client_socket.close()
|
| 368 |
-
return None
|
| 369 |
-
|
| 370 |
-
response_payload = b""
|
| 371 |
-
|
| 372 |
-
if payload_size > 0:
|
| 373 |
-
response_payload = self._receive_exactly(client_socket, payload_size)
|
| 374 |
-
|
| 375 |
-
if response_payload is None:
|
| 376 |
-
client_socket.close()
|
| 377 |
-
return None
|
| 378 |
-
|
| 379 |
-
client_socket.close()
|
| 380 |
-
|
| 381 |
-
if status_code != RESPONSE_SUCCESS:
|
| 382 |
-
return response_payload if response_payload else None
|
| 383 |
-
|
| 384 |
-
return response_payload
|
| 385 |
-
|
| 386 |
-
except socket.timeout:
|
| 387 |
-
log_accelerator_message("Socket timeout while communicating with accelerator")
|
| 388 |
-
return None
|
| 389 |
-
|
| 390 |
-
except socket.error as socket_err:
|
| 391 |
-
log_accelerator_message(f"Socket error: {str(socket_err)}")
|
| 392 |
-
return None
|
| 393 |
-
|
| 394 |
-
except Exception as general_error:
|
| 395 |
-
log_accelerator_message(f"Unexpected error: {str(general_error)}")
|
| 396 |
-
return None
|
| 397 |
-
|
| 398 |
-
def _receive_exactly(
|
| 399 |
-
self,
|
| 400 |
-
client_socket: socket.socket,
|
| 401 |
-
num_bytes: int
|
| 402 |
-
) -> Optional[bytes]:
|
| 403 |
-
received_data = b""
|
| 404 |
-
remaining_bytes = num_bytes
|
| 405 |
-
|
| 406 |
-
while remaining_bytes > 0:
|
| 407 |
-
try:
|
| 408 |
-
chunk = client_socket.recv(remaining_bytes)
|
| 409 |
-
|
| 410 |
-
if not chunk:
|
| 411 |
-
return None
|
| 412 |
-
|
| 413 |
-
received_data += chunk
|
| 414 |
-
remaining_bytes -= len(chunk)
|
| 415 |
-
|
| 416 |
-
except socket.timeout:
|
| 417 |
-
return None
|
| 418 |
-
|
| 419 |
-
except socket.error:
|
| 420 |
-
return None
|
| 421 |
-
|
| 422 |
-
return received_data
|
| 423 |
-
|
| 424 |
-
def is_accelerator_available() -> bool:
|
| 425 |
-
if not os.path.exists(ACCELERATOR_SOCKET_PATH):
|
| 426 |
-
return False
|
| 427 |
-
|
| 428 |
-
client = AcceleratorClient()
|
| 429 |
-
return client.is_connected()
|
| 430 |
-
|
| 431 |
-
def start_accelerator_daemon() -> bool:
|
| 432 |
-
global accelerator_process_handle
|
| 433 |
-
|
| 434 |
-
from ..core import state as global_state
|
| 435 |
-
|
| 436 |
-
with accelerator_process_lock:
|
| 437 |
-
if accelerator_process_handle is not None:
|
| 438 |
-
if accelerator_process_handle.poll() is None:
|
| 439 |
-
return True
|
| 440 |
-
|
| 441 |
-
if not os.path.exists(ACCELERATOR_BINARY_PATH):
|
| 442 |
-
log_accelerator_message(f"Accelerator binary not found: {ACCELERATOR_BINARY_PATH}")
|
| 443 |
-
return False
|
| 444 |
-
|
| 445 |
-
try:
|
| 446 |
-
log_accelerator_message("Starting accelerator daemon...")
|
| 447 |
-
|
| 448 |
-
global_state.accelerator_log_stop_event.clear()
|
| 449 |
-
|
| 450 |
-
accelerator_process_handle = subprocess.Popen(
|
| 451 |
-
[
|
| 452 |
-
ACCELERATOR_BINARY_PATH,
|
| 453 |
-
"--socket", ACCELERATOR_SOCKET_PATH,
|
| 454 |
-
"--threads", str(ACCELERATOR_WORKER_THREADS),
|
| 455 |
-
"--memory", str(ACCELERATOR_MEMORY_POOL_MB)
|
| 456 |
-
],
|
| 457 |
-
stdout=subprocess.PIPE,
|
| 458 |
-
stderr=subprocess.PIPE,
|
| 459 |
-
start_new_session=True
|
| 460 |
-
)
|
| 461 |
-
|
| 462 |
-
stdout_thread = threading.Thread(
|
| 463 |
-
target=stream_accelerator_output,
|
| 464 |
-
args=(accelerator_process_handle,),
|
| 465 |
-
daemon=True,
|
| 466 |
-
name="AcceleratorStdoutThread"
|
| 467 |
-
)
|
| 468 |
-
stdout_thread.start()
|
| 469 |
-
|
| 470 |
-
stderr_thread = threading.Thread(
|
| 471 |
-
target=stream_accelerator_stderr,
|
| 472 |
-
args=(accelerator_process_handle,),
|
| 473 |
-
daemon=True,
|
| 474 |
-
name="AcceleratorStderrThread"
|
| 475 |
-
)
|
| 476 |
-
stderr_thread.start()
|
| 477 |
-
|
| 478 |
-
for attempt_index in range(50):
|
| 479 |
-
if is_accelerator_available():
|
| 480 |
-
log_accelerator_message("Accelerator daemon started and responding")
|
| 481 |
-
return True
|
| 482 |
-
|
| 483 |
-
available = is_accelerator_available()
|
| 484 |
-
if available:
|
| 485 |
-
log_accelerator_message("Accelerator daemon started successfully")
|
| 486 |
-
else:
|
| 487 |
-
log_accelerator_message("Accelerator daemon started but not responding")
|
| 488 |
-
|
| 489 |
-
return available
|
| 490 |
-
|
| 491 |
-
except Exception as start_error:
|
| 492 |
-
log_accelerator_message(f"Failed to start accelerator daemon: {str(start_error)}")
|
| 493 |
-
return False
|
| 494 |
-
|
| 495 |
-
def stop_accelerator_daemon() -> bool:
|
| 496 |
-
global accelerator_process_handle
|
| 497 |
-
|
| 498 |
-
from ..core import state as global_state
|
| 499 |
-
|
| 500 |
-
with accelerator_process_lock:
|
| 501 |
-
global_state.accelerator_log_stop_event.set()
|
| 502 |
-
|
| 503 |
-
if is_accelerator_available():
|
| 504 |
-
try:
|
| 505 |
-
log_accelerator_message("Sending shutdown command to accelerator...")
|
| 506 |
-
client = AcceleratorClient()
|
| 507 |
-
client.shutdown_accelerator()
|
| 508 |
-
|
| 509 |
-
except Exception as shutdown_error:
|
| 510 |
-
log_accelerator_message(f"Error during shutdown command: {str(shutdown_error)}")
|
| 511 |
-
|
| 512 |
-
if accelerator_process_handle is not None:
|
| 513 |
-
if accelerator_process_handle.poll() is None:
|
| 514 |
-
try:
|
| 515 |
-
log_accelerator_message("Terminating accelerator process...")
|
| 516 |
-
accelerator_process_handle.terminate()
|
| 517 |
-
accelerator_process_handle.wait(timeout=5)
|
| 518 |
-
log_accelerator_message("Accelerator process terminated")
|
| 519 |
-
|
| 520 |
-
except subprocess.TimeoutExpired:
|
| 521 |
-
log_accelerator_message("Accelerator process did not terminate, killing...")
|
| 522 |
-
accelerator_process_handle.kill()
|
| 523 |
-
accelerator_process_handle.wait()
|
| 524 |
-
log_accelerator_message("Accelerator process killed")
|
| 525 |
-
|
| 526 |
-
accelerator_process_handle = None
|
| 527 |
-
|
| 528 |
-
return True
|
| 529 |
-
|
| 530 |
-
def process_audio_with_accelerator(
|
| 531 |
-
input_file_path: str,
|
| 532 |
-
output_file_path: str
|
| 533 |
-
) -> Tuple[bool, str]:
|
| 534 |
-
if not is_accelerator_available():
|
| 535 |
-
return False, "Accelerator not available"
|
| 536 |
-
|
| 537 |
-
client = AcceleratorClient()
|
| 538 |
-
return client.process_audio(input_file_path, output_file_path)
|
| 539 |
-
|
| 540 |
-
def convert_to_mono_with_accelerator(
|
| 541 |
-
input_file_path: str,
|
| 542 |
-
output_file_path: str
|
| 543 |
-
) -> Tuple[bool, str]:
|
| 544 |
-
if not is_accelerator_available():
|
| 545 |
-
return False, "Accelerator not available"
|
| 546 |
-
|
| 547 |
-
client = AcceleratorClient()
|
| 548 |
-
return client.convert_to_mono(input_file_path, output_file_path)
|
| 549 |
-
|
| 550 |
-
def convert_to_pcm_with_accelerator(
|
| 551 |
-
input_file_path: str,
|
| 552 |
-
output_file_path: str
|
| 553 |
-
) -> Tuple[bool, str]:
|
| 554 |
-
if not is_accelerator_available():
|
| 555 |
-
return False, "Accelerator not available"
|
| 556 |
-
|
| 557 |
-
client = AcceleratorClient()
|
| 558 |
-
return client.convert_to_pcm(input_file_path, output_file_path)
|
| 559 |
-
|
| 560 |
-
def resample_audio_with_accelerator(
|
| 561 |
-
input_file_path: str,
|
| 562 |
-
output_file_path: str,
|
| 563 |
-
target_sample_rate: int
|
| 564 |
-
) -> Tuple[bool, str]:
|
| 565 |
-
if not is_accelerator_available():
|
| 566 |
-
return False, "Accelerator not available"
|
| 567 |
-
|
| 568 |
-
client = AcceleratorClient()
|
| 569 |
-
return client.resample_audio(input_file_path, output_file_path, target_sample_rate)
|
| 570 |
-
|
| 571 |
-
def get_accelerator_memory_stats() -> Optional[Dict[str, int]]:
|
| 572 |
-
if not is_accelerator_available():
|
| 573 |
-
return None
|
| 574 |
-
|
| 575 |
-
client = AcceleratorClient()
|
| 576 |
-
return client.get_memory_stats()
|
| 577 |
-
|
| 578 |
-
def clear_accelerator_memory_pool() -> bool:
|
| 579 |
-
if not is_accelerator_available():
|
| 580 |
-
return False
|
| 581 |
-
|
| 582 |
-
client = AcceleratorClient()
|
| 583 |
-
return client.clear_memory_pool()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/audio/converter.py
DELETED
|
@@ -1,344 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import os
|
| 7 |
-
import time
|
| 8 |
-
import tempfile
|
| 9 |
-
import numpy as np
|
| 10 |
-
import scipy.io.wavfile
|
| 11 |
-
from config import AUDIO_CONVERSION_QUEUE_TIMEOUT_SECONDS
|
| 12 |
-
from ..core.state import (
|
| 13 |
-
temporary_files_registry,
|
| 14 |
-
temporary_files_lock,
|
| 15 |
-
audio_conversion_semaphore,
|
| 16 |
-
increment_audio_conversion_active,
|
| 17 |
-
decrement_audio_conversion_active,
|
| 18 |
-
increment_audio_conversion_waiting,
|
| 19 |
-
decrement_audio_conversion_waiting,
|
| 20 |
-
is_audio_conversion_queue_busy
|
| 21 |
-
)
|
| 22 |
-
from ..core.memory import trigger_background_cleanup_check
|
| 23 |
-
from ..accelerator.client import (
|
| 24 |
-
is_accelerator_available,
|
| 25 |
-
convert_to_pcm_with_accelerator,
|
| 26 |
-
process_audio_with_accelerator,
|
| 27 |
-
log_accelerator_message
|
| 28 |
-
)
|
| 29 |
-
|
| 30 |
-
class AudioConversionQueueError(Exception):
|
| 31 |
-
pass
|
| 32 |
-
|
| 33 |
-
class AudioConversionQueueBusyError(AudioConversionQueueError):
|
| 34 |
-
pass
|
| 35 |
-
|
| 36 |
-
class AudioConversionQueueTimeoutError(AudioConversionQueueError):
|
| 37 |
-
pass
|
| 38 |
-
|
| 39 |
-
def convert_audio_data_to_pcm_int16(audio_data):
|
| 40 |
-
if audio_data.dtype == np.float32 or audio_data.dtype == np.float64:
|
| 41 |
-
audio_data_clipped = np.clip(audio_data, -1.0, 1.0)
|
| 42 |
-
audio_data_int16 = (audio_data_clipped * 32767).astype(np.int16)
|
| 43 |
-
return audio_data_int16
|
| 44 |
-
|
| 45 |
-
if audio_data.dtype == np.int32:
|
| 46 |
-
audio_data_int16 = (audio_data >> 16).astype(np.int16)
|
| 47 |
-
return audio_data_int16
|
| 48 |
-
|
| 49 |
-
if audio_data.dtype == np.uint8:
|
| 50 |
-
audio_data_int16 = ((audio_data.astype(np.int16) - 128) * 256).astype(np.int16)
|
| 51 |
-
return audio_data_int16
|
| 52 |
-
|
| 53 |
-
if audio_data.dtype == np.int16:
|
| 54 |
-
return audio_data
|
| 55 |
-
|
| 56 |
-
if audio_data.dtype == np.int64:
|
| 57 |
-
audio_data_int16 = (audio_data >> 48).astype(np.int16)
|
| 58 |
-
return audio_data_int16
|
| 59 |
-
|
| 60 |
-
return audio_data.astype(np.int16)
|
| 61 |
-
|
| 62 |
-
def convert_stereo_to_mono(audio_data):
|
| 63 |
-
if len(audio_data.shape) == 1:
|
| 64 |
-
return audio_data
|
| 65 |
-
|
| 66 |
-
if len(audio_data.shape) == 2:
|
| 67 |
-
if audio_data.shape[0] > audio_data.shape[1]:
|
| 68 |
-
audio_data = audio_data.T
|
| 69 |
-
|
| 70 |
-
if audio_data.shape[0] > 1:
|
| 71 |
-
mono_audio = np.mean(audio_data, axis=0)
|
| 72 |
-
return mono_audio.astype(audio_data.dtype)
|
| 73 |
-
|
| 74 |
-
return audio_data[0]
|
| 75 |
-
|
| 76 |
-
return audio_data
|
| 77 |
-
|
| 78 |
-
def register_temporary_file(file_path):
|
| 79 |
-
with temporary_files_lock:
|
| 80 |
-
temporary_files_registry[file_path] = time.time()
|
| 81 |
-
trigger_background_cleanup_check()
|
| 82 |
-
|
| 83 |
-
def acquire_audio_conversion_slot(wait_for_slot=True):
|
| 84 |
-
if is_audio_conversion_queue_busy():
|
| 85 |
-
if not wait_for_slot:
|
| 86 |
-
raise AudioConversionQueueBusyError(
|
| 87 |
-
"Audio conversion is currently in progress for another user. Please wait a moment and try again."
|
| 88 |
-
)
|
| 89 |
-
|
| 90 |
-
increment_audio_conversion_waiting()
|
| 91 |
-
|
| 92 |
-
try:
|
| 93 |
-
acquired = audio_conversion_semaphore.acquire(
|
| 94 |
-
blocking=True,
|
| 95 |
-
timeout=AUDIO_CONVERSION_QUEUE_TIMEOUT_SECONDS
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
if not acquired:
|
| 99 |
-
raise AudioConversionQueueTimeoutError(
|
| 100 |
-
"Audio conversion queue timed out. The server is busy processing other requests. Please try again in a moment."
|
| 101 |
-
)
|
| 102 |
-
|
| 103 |
-
finally:
|
| 104 |
-
decrement_audio_conversion_waiting()
|
| 105 |
-
|
| 106 |
-
else:
|
| 107 |
-
acquired = audio_conversion_semaphore.acquire(blocking=False)
|
| 108 |
-
|
| 109 |
-
if not acquired:
|
| 110 |
-
if not wait_for_slot:
|
| 111 |
-
raise AudioConversionQueueBusyError(
|
| 112 |
-
"Audio conversion is currently in progress for another user. Please wait a moment and try again."
|
| 113 |
-
)
|
| 114 |
-
|
| 115 |
-
increment_audio_conversion_waiting()
|
| 116 |
-
|
| 117 |
-
try:
|
| 118 |
-
acquired = audio_conversion_semaphore.acquire(
|
| 119 |
-
blocking=True,
|
| 120 |
-
timeout=AUDIO_CONVERSION_QUEUE_TIMEOUT_SECONDS
|
| 121 |
-
)
|
| 122 |
-
|
| 123 |
-
if not acquired:
|
| 124 |
-
raise AudioConversionQueueTimeoutError(
|
| 125 |
-
"Audio conversion queue timed out. The server is busy processing other requests. Please try again in a moment."
|
| 126 |
-
)
|
| 127 |
-
|
| 128 |
-
finally:
|
| 129 |
-
decrement_audio_conversion_waiting()
|
| 130 |
-
|
| 131 |
-
increment_audio_conversion_active()
|
| 132 |
-
|
| 133 |
-
def release_audio_conversion_slot():
|
| 134 |
-
decrement_audio_conversion_active()
|
| 135 |
-
audio_conversion_semaphore.release()
|
| 136 |
-
|
| 137 |
-
def convert_wav_file_to_pcm_format_with_accelerator(input_path):
|
| 138 |
-
output_file = tempfile.NamedTemporaryFile(suffix="_accel_pcm_converted.wav", delete=False)
|
| 139 |
-
output_path = output_file.name
|
| 140 |
-
output_file.close()
|
| 141 |
-
|
| 142 |
-
success, result_message = convert_to_pcm_with_accelerator(input_path, output_path)
|
| 143 |
-
|
| 144 |
-
if success:
|
| 145 |
-
register_temporary_file(output_path)
|
| 146 |
-
return output_path, None
|
| 147 |
-
|
| 148 |
-
else:
|
| 149 |
-
if os.path.exists(output_path):
|
| 150 |
-
try:
|
| 151 |
-
os.remove(output_path)
|
| 152 |
-
|
| 153 |
-
except Exception:
|
| 154 |
-
pass
|
| 155 |
-
|
| 156 |
-
return None, result_message
|
| 157 |
-
|
| 158 |
-
def convert_wav_file_to_pcm_format(input_path):
|
| 159 |
-
if is_accelerator_available():
|
| 160 |
-
log_accelerator_message(f"Using accelerator for PCM conversion: {input_path}")
|
| 161 |
-
accelerated_result, accelerated_error = convert_wav_file_to_pcm_format_with_accelerator(input_path)
|
| 162 |
-
if accelerated_result is not None:
|
| 163 |
-
return accelerated_result, None
|
| 164 |
-
log_accelerator_message(f"Accelerator PCM conversion failed, falling back to Python: {accelerated_error}")
|
| 165 |
-
|
| 166 |
-
try:
|
| 167 |
-
sample_rate, audio_data = scipy.io.wavfile.read(input_path)
|
| 168 |
-
|
| 169 |
-
if len(audio_data.shape) > 1:
|
| 170 |
-
audio_data = convert_stereo_to_mono(audio_data)
|
| 171 |
-
|
| 172 |
-
audio_data_pcm = convert_audio_data_to_pcm_int16(audio_data)
|
| 173 |
-
|
| 174 |
-
output_file = tempfile.NamedTemporaryFile(suffix="_pcm_converted.wav", delete=False)
|
| 175 |
-
scipy.io.wavfile.write(output_file.name, sample_rate, audio_data_pcm)
|
| 176 |
-
|
| 177 |
-
register_temporary_file(output_file.name)
|
| 178 |
-
|
| 179 |
-
return output_file.name, None
|
| 180 |
-
|
| 181 |
-
except Exception as conversion_error:
|
| 182 |
-
return None, f"Failed to convert WAV to PCM format: {str(conversion_error)}"
|
| 183 |
-
|
| 184 |
-
def convert_audio_using_pydub(input_path, target_sample_rate=None):
|
| 185 |
-
try:
|
| 186 |
-
from pydub import AudioSegment
|
| 187 |
-
|
| 188 |
-
audio_segment = AudioSegment.from_file(input_path)
|
| 189 |
-
|
| 190 |
-
audio_segment = audio_segment.set_channels(1)
|
| 191 |
-
audio_segment = audio_segment.set_sample_width(2)
|
| 192 |
-
|
| 193 |
-
if target_sample_rate is not None:
|
| 194 |
-
audio_segment = audio_segment.set_frame_rate(target_sample_rate)
|
| 195 |
-
|
| 196 |
-
output_file = tempfile.NamedTemporaryFile(suffix="_pydub_converted.wav", delete=False)
|
| 197 |
-
audio_segment.export(output_file.name, format="wav")
|
| 198 |
-
|
| 199 |
-
register_temporary_file(output_file.name)
|
| 200 |
-
|
| 201 |
-
return output_file.name, None
|
| 202 |
-
|
| 203 |
-
except ImportError:
|
| 204 |
-
return None, "pydub_library_not_available"
|
| 205 |
-
|
| 206 |
-
except Exception as conversion_error:
|
| 207 |
-
error_message = str(conversion_error)
|
| 208 |
-
if "ffmpeg" in error_message.lower() or "ffprobe" in error_message.lower():
|
| 209 |
-
return None, "ffmpeg_not_available"
|
| 210 |
-
return None, f"Failed to convert audio using pydub: {error_message}"
|
| 211 |
-
|
| 212 |
-
def convert_audio_using_soundfile(input_path):
|
| 213 |
-
try:
|
| 214 |
-
import soundfile
|
| 215 |
-
|
| 216 |
-
audio_data, sample_rate = soundfile.read(input_path, dtype='float32')
|
| 217 |
-
|
| 218 |
-
if len(audio_data.shape) > 1:
|
| 219 |
-
audio_data = np.mean(audio_data, axis=1)
|
| 220 |
-
|
| 221 |
-
audio_data_pcm = convert_audio_data_to_pcm_int16(audio_data)
|
| 222 |
-
|
| 223 |
-
output_file = tempfile.NamedTemporaryFile(suffix="_soundfile_converted.wav", delete=False)
|
| 224 |
-
scipy.io.wavfile.write(output_file.name, sample_rate, audio_data_pcm)
|
| 225 |
-
|
| 226 |
-
register_temporary_file(output_file.name)
|
| 227 |
-
|
| 228 |
-
return output_file.name, None
|
| 229 |
-
|
| 230 |
-
except ImportError:
|
| 231 |
-
return None, "soundfile_library_not_available"
|
| 232 |
-
|
| 233 |
-
except Exception as conversion_error:
|
| 234 |
-
return None, f"Failed to convert audio using soundfile: {str(conversion_error)}"
|
| 235 |
-
|
| 236 |
-
def convert_audio_using_librosa(input_path):
|
| 237 |
-
try:
|
| 238 |
-
import librosa
|
| 239 |
-
|
| 240 |
-
audio_data, sample_rate = librosa.load(input_path, sr=None, mono=True)
|
| 241 |
-
|
| 242 |
-
audio_data_pcm = convert_audio_data_to_pcm_int16(audio_data)
|
| 243 |
-
|
| 244 |
-
output_file = tempfile.NamedTemporaryFile(suffix="_librosa_converted.wav", delete=False)
|
| 245 |
-
scipy.io.wavfile.write(output_file.name, sample_rate, audio_data_pcm)
|
| 246 |
-
|
| 247 |
-
register_temporary_file(output_file.name)
|
| 248 |
-
|
| 249 |
-
return output_file.name, None
|
| 250 |
-
|
| 251 |
-
except ImportError:
|
| 252 |
-
return None, "librosa_library_not_available"
|
| 253 |
-
|
| 254 |
-
except Exception as conversion_error:
|
| 255 |
-
return None, f"Failed to convert audio using librosa: {str(conversion_error)}"
|
| 256 |
-
|
| 257 |
-
def convert_non_wav_audio_to_wav(input_path):
|
| 258 |
-
converted_path, pydub_error = convert_audio_using_pydub(input_path)
|
| 259 |
-
if converted_path is not None:
|
| 260 |
-
return converted_path, None, "pydub"
|
| 261 |
-
|
| 262 |
-
converted_path, soundfile_error = convert_audio_using_soundfile(input_path)
|
| 263 |
-
if converted_path is not None:
|
| 264 |
-
return converted_path, None, "soundfile"
|
| 265 |
-
|
| 266 |
-
converted_path, librosa_error = convert_audio_using_librosa(input_path)
|
| 267 |
-
if converted_path is not None:
|
| 268 |
-
return converted_path, None, "librosa"
|
| 269 |
-
|
| 270 |
-
pydub_unavailable = pydub_error in ["pydub_library_not_available", "ffmpeg_not_available"]
|
| 271 |
-
soundfile_unavailable = soundfile_error == "soundfile_library_not_available"
|
| 272 |
-
librosa_unavailable = librosa_error == "librosa_library_not_available"
|
| 273 |
-
|
| 274 |
-
if pydub_unavailable and soundfile_unavailable and librosa_unavailable:
|
| 275 |
-
return None, "No audio conversion library is available on the server. Please upload a WAV file directly.", None
|
| 276 |
-
|
| 277 |
-
all_errors = []
|
| 278 |
-
if not pydub_unavailable and pydub_error:
|
| 279 |
-
all_errors.append(f"pydub: {pydub_error}")
|
| 280 |
-
|
| 281 |
-
if not soundfile_unavailable and soundfile_error:
|
| 282 |
-
all_errors.append(f"soundfile: {soundfile_error}")
|
| 283 |
-
|
| 284 |
-
if not librosa_unavailable and librosa_error:
|
| 285 |
-
all_errors.append(f"librosa: {librosa_error}")
|
| 286 |
-
|
| 287 |
-
if all_errors:
|
| 288 |
-
combined_error = " | ".join(all_errors)
|
| 289 |
-
return None, f"Audio conversion failed with all available methods. {combined_error}", None
|
| 290 |
-
|
| 291 |
-
return None, "Audio conversion failed. Please try uploading a different audio file or use WAV format.", None
|
| 292 |
-
|
| 293 |
-
def prepare_audio_file_for_voice_cloning_internal(input_path):
|
| 294 |
-
from .validator import perform_comprehensive_audio_validation, get_format_display_name
|
| 295 |
-
|
| 296 |
-
is_valid, is_wav_format, detected_format, validation_error = perform_comprehensive_audio_validation(input_path)
|
| 297 |
-
|
| 298 |
-
if not is_valid:
|
| 299 |
-
return None, validation_error, False, detected_format
|
| 300 |
-
|
| 301 |
-
if is_wav_format:
|
| 302 |
-
converted_path, conversion_error = convert_wav_file_to_pcm_format(input_path)
|
| 303 |
-
if converted_path is not None:
|
| 304 |
-
return converted_path, None, False, 'wav'
|
| 305 |
-
return None, conversion_error, False, 'wav'
|
| 306 |
-
|
| 307 |
-
format_display_name = get_format_display_name(detected_format)
|
| 308 |
-
|
| 309 |
-
converted_path, conversion_error, conversion_method = convert_non_wav_audio_to_wav(input_path)
|
| 310 |
-
if converted_path is not None:
|
| 311 |
-
final_path, pcm_error = convert_wav_file_to_pcm_format(converted_path)
|
| 312 |
-
if final_path is not None:
|
| 313 |
-
return final_path, None, True, detected_format
|
| 314 |
-
return converted_path, None, True, detected_format
|
| 315 |
-
|
| 316 |
-
return None, conversion_error, True, detected_format
|
| 317 |
-
|
| 318 |
-
def prepare_audio_file_for_voice_cloning(input_path, wait_for_queue=True):
|
| 319 |
-
try:
|
| 320 |
-
acquire_audio_conversion_slot(wait_for_slot=wait_for_queue)
|
| 321 |
-
|
| 322 |
-
except AudioConversionQueueBusyError as queue_busy_error:
|
| 323 |
-
return None, str(queue_busy_error), False, None
|
| 324 |
-
|
| 325 |
-
except AudioConversionQueueTimeoutError as queue_timeout_error:
|
| 326 |
-
return None, str(queue_timeout_error), False, None
|
| 327 |
-
|
| 328 |
-
try:
|
| 329 |
-
result_path, result_error, was_converted, detected_format = prepare_audio_file_for_voice_cloning_internal(input_path)
|
| 330 |
-
return result_path, result_error, was_converted, detected_format
|
| 331 |
-
|
| 332 |
-
finally:
|
| 333 |
-
release_audio_conversion_slot()
|
| 334 |
-
|
| 335 |
-
def convert_audio_to_pcm_wav(input_path):
|
| 336 |
-
converted_path, error, was_converted, detected_format = prepare_audio_file_for_voice_cloning(input_path)
|
| 337 |
-
|
| 338 |
-
if converted_path is not None:
|
| 339 |
-
return converted_path
|
| 340 |
-
|
| 341 |
-
if error:
|
| 342 |
-
print(f"Warning: Audio conversion failed - {error}")
|
| 343 |
-
|
| 344 |
-
return input_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/audio/validator.py
DELETED
|
@@ -1,268 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import os
|
| 7 |
-
import wave
|
| 8 |
-
from config import (
|
| 9 |
-
SUPPORTED_AUDIO_EXTENSIONS,
|
| 10 |
-
AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES,
|
| 11 |
-
MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES
|
| 12 |
-
)
|
| 13 |
-
|
| 14 |
-
def build_format_display_names_from_supported_extensions():
|
| 15 |
-
format_display_names = {}
|
| 16 |
-
|
| 17 |
-
for extension in SUPPORTED_AUDIO_EXTENSIONS:
|
| 18 |
-
format_code = extension.lstrip(".")
|
| 19 |
-
|
| 20 |
-
if format_code in AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES:
|
| 21 |
-
format_display_names[format_code] = AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES[format_code]
|
| 22 |
-
else:
|
| 23 |
-
format_display_names[format_code] = format_code.upper()
|
| 24 |
-
|
| 25 |
-
format_display_names["unknown"] = "Unknown"
|
| 26 |
-
|
| 27 |
-
return format_display_names
|
| 28 |
-
|
| 29 |
-
FORMAT_DISPLAY_NAMES = build_format_display_names_from_supported_extensions()
|
| 30 |
-
|
| 31 |
-
def get_audio_file_extension(file_path):
|
| 32 |
-
if not file_path:
|
| 33 |
-
return None
|
| 34 |
-
|
| 35 |
-
_, extension = os.path.splitext(file_path)
|
| 36 |
-
|
| 37 |
-
return extension.lower()
|
| 38 |
-
|
| 39 |
-
def is_supported_audio_extension(file_path):
|
| 40 |
-
extension = get_audio_file_extension(file_path)
|
| 41 |
-
|
| 42 |
-
if extension is None:
|
| 43 |
-
return False
|
| 44 |
-
|
| 45 |
-
return extension in SUPPORTED_AUDIO_EXTENSIONS
|
| 46 |
-
|
| 47 |
-
def format_file_size_for_display(size_bytes):
|
| 48 |
-
if size_bytes < 1024:
|
| 49 |
-
return f"{size_bytes} bytes"
|
| 50 |
-
|
| 51 |
-
elif size_bytes < 1024 * 1024:
|
| 52 |
-
return f"{size_bytes / 1024:.1f} KB"
|
| 53 |
-
|
| 54 |
-
else:
|
| 55 |
-
return f"{size_bytes / (1024 * 1024):.2f} MB"
|
| 56 |
-
|
| 57 |
-
def validate_file_size_for_voice_cloning(file_path):
|
| 58 |
-
if not file_path:
|
| 59 |
-
return False, "No audio file provided."
|
| 60 |
-
|
| 61 |
-
try:
|
| 62 |
-
file_size = os.path.getsize(file_path)
|
| 63 |
-
|
| 64 |
-
except OSError as size_error:
|
| 65 |
-
return False, f"Cannot read file size: {str(size_error)}"
|
| 66 |
-
|
| 67 |
-
if file_size > MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES:
|
| 68 |
-
max_size_display = format_file_size_for_display(MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES)
|
| 69 |
-
actual_size_display = format_file_size_for_display(file_size)
|
| 70 |
-
return False, f"Audio file size ({actual_size_display}) exceeds the maximum allowed size of {max_size_display}. Please upload a smaller audio file."
|
| 71 |
-
|
| 72 |
-
return True, None
|
| 73 |
-
|
| 74 |
-
def validate_file_exists_and_readable(file_path):
|
| 75 |
-
if not file_path:
|
| 76 |
-
return False, "No audio file provided."
|
| 77 |
-
|
| 78 |
-
if not os.path.exists(file_path):
|
| 79 |
-
return False, "Audio file does not exist."
|
| 80 |
-
|
| 81 |
-
if not os.path.isfile(file_path):
|
| 82 |
-
return False, "The provided path is not a valid file."
|
| 83 |
-
|
| 84 |
-
try:
|
| 85 |
-
file_size = os.path.getsize(file_path)
|
| 86 |
-
|
| 87 |
-
except OSError as size_error:
|
| 88 |
-
return False, f"Cannot read file size: {str(size_error)}"
|
| 89 |
-
|
| 90 |
-
if file_size == 0:
|
| 91 |
-
return False, "Audio file is empty (0 bytes)."
|
| 92 |
-
|
| 93 |
-
if file_size < 44:
|
| 94 |
-
return False, "Audio file is too small to be a valid audio file."
|
| 95 |
-
|
| 96 |
-
try:
|
| 97 |
-
with open(file_path, "rb") as test_file:
|
| 98 |
-
test_file.read(1)
|
| 99 |
-
except IOError as read_error:
|
| 100 |
-
return False, f"Audio file is not readable: {str(read_error)}"
|
| 101 |
-
|
| 102 |
-
return True, None
|
| 103 |
-
|
| 104 |
-
def detect_audio_format_from_header(file_path):
|
| 105 |
-
try:
|
| 106 |
-
with open(file_path, "rb") as audio_file:
|
| 107 |
-
header_bytes = audio_file.read(32)
|
| 108 |
-
|
| 109 |
-
if len(header_bytes) < 4:
|
| 110 |
-
return None, "File is too small to determine audio format."
|
| 111 |
-
|
| 112 |
-
if len(header_bytes) >= 12:
|
| 113 |
-
if header_bytes[:4] == b"RIFF" and header_bytes[8:12] == b"WAVE":
|
| 114 |
-
return "wav", None
|
| 115 |
-
|
| 116 |
-
if header_bytes[:3] == b"ID3":
|
| 117 |
-
return "mp3", None
|
| 118 |
-
|
| 119 |
-
if len(header_bytes) >= 2:
|
| 120 |
-
first_two_bytes = header_bytes[:2]
|
| 121 |
-
|
| 122 |
-
mp3_sync_bytes = [
|
| 123 |
-
b"\xff\xfb",
|
| 124 |
-
b"\xff\xfa",
|
| 125 |
-
b"\xff\xf3",
|
| 126 |
-
b"\xff\xf2",
|
| 127 |
-
b"\xff\xe0",
|
| 128 |
-
b"\xff\xe2",
|
| 129 |
-
b"\xff\xe3"
|
| 130 |
-
]
|
| 131 |
-
|
| 132 |
-
if first_two_bytes in mp3_sync_bytes:
|
| 133 |
-
return "mp3", None
|
| 134 |
-
|
| 135 |
-
if header_bytes[:4] == b"fLaC":
|
| 136 |
-
return "flac", None
|
| 137 |
-
|
| 138 |
-
if header_bytes[:4] == b"OggS":
|
| 139 |
-
return "ogg", None
|
| 140 |
-
|
| 141 |
-
if len(header_bytes) >= 12:
|
| 142 |
-
if header_bytes[:4] == b"FORM" and header_bytes[8:12] in [b"AIFF", b"AIFC"]:
|
| 143 |
-
return "aiff", None
|
| 144 |
-
|
| 145 |
-
if len(header_bytes) >= 8:
|
| 146 |
-
if header_bytes[4:8] == b"ftyp":
|
| 147 |
-
return "m4a", None
|
| 148 |
-
|
| 149 |
-
if len(header_bytes) >= 4:
|
| 150 |
-
if header_bytes[:4] == b"\x1aE\xdf\xa3":
|
| 151 |
-
return "webm", None
|
| 152 |
-
|
| 153 |
-
if len(header_bytes) >= 8:
|
| 154 |
-
if header_bytes[4:8] in [b"mdat", b"moov", b"free", b"skip", b"wide"]:
|
| 155 |
-
return "m4a", None
|
| 156 |
-
|
| 157 |
-
file_extension = get_audio_file_extension(file_path)
|
| 158 |
-
|
| 159 |
-
if file_extension and file_extension in SUPPORTED_AUDIO_EXTENSIONS:
|
| 160 |
-
return file_extension.lstrip("."), None
|
| 161 |
-
|
| 162 |
-
return "unknown", "Could not determine audio format from file header. The file may be corrupted or in an unsupported format."
|
| 163 |
-
|
| 164 |
-
except IOError as io_error:
|
| 165 |
-
return None, f"Error reading file header: {str(io_error)}"
|
| 166 |
-
|
| 167 |
-
except Exception as detection_error:
|
| 168 |
-
return None, f"Unexpected error detecting audio format: {str(detection_error)}"
|
| 169 |
-
|
| 170 |
-
def validate_wav_file_structure(file_path):
|
| 171 |
-
try:
|
| 172 |
-
with wave.open(file_path, "rb") as wav_file:
|
| 173 |
-
number_of_channels = wav_file.getnchannels()
|
| 174 |
-
sample_width_bytes = wav_file.getsampwidth()
|
| 175 |
-
sample_rate = wav_file.getframerate()
|
| 176 |
-
number_of_frames = wav_file.getnframes()
|
| 177 |
-
|
| 178 |
-
if number_of_channels < 1:
|
| 179 |
-
return False, "WAV file has no audio channels."
|
| 180 |
-
|
| 181 |
-
if number_of_channels > 16:
|
| 182 |
-
return False, f"WAV file has too many channels ({number_of_channels}). Maximum supported is 16."
|
| 183 |
-
|
| 184 |
-
if sample_width_bytes < 1:
|
| 185 |
-
return False, "WAV file has invalid sample width (less than 1 byte)."
|
| 186 |
-
|
| 187 |
-
if sample_width_bytes > 4:
|
| 188 |
-
return False, f"WAV file has unsupported sample width ({sample_width_bytes} bytes). Maximum supported is 4 bytes (32-bit)."
|
| 189 |
-
|
| 190 |
-
if sample_rate < 100:
|
| 191 |
-
return False, f"WAV file has invalid sample rate ({sample_rate} Hz). Minimum supported is 100 Hz."
|
| 192 |
-
|
| 193 |
-
if sample_rate > 384000:
|
| 194 |
-
return False, f"WAV file has unsupported sample rate ({sample_rate} Hz). Maximum supported is 384000 Hz."
|
| 195 |
-
|
| 196 |
-
if number_of_frames < 1:
|
| 197 |
-
return False, "WAV file contains no audio frames."
|
| 198 |
-
|
| 199 |
-
audio_duration_seconds = number_of_frames / sample_rate
|
| 200 |
-
|
| 201 |
-
if audio_duration_seconds < 0.1:
|
| 202 |
-
return False, f"Audio is too short ({audio_duration_seconds:.2f} seconds). Minimum duration is 0.1 seconds."
|
| 203 |
-
|
| 204 |
-
if audio_duration_seconds > 60:
|
| 205 |
-
return False, f"Audio is too long ({audio_duration_seconds:.0f} seconds). Maximum duration is 1 minute."
|
| 206 |
-
|
| 207 |
-
return True, None
|
| 208 |
-
|
| 209 |
-
except wave.Error as wav_error:
|
| 210 |
-
error_message = str(wav_error)
|
| 211 |
-
|
| 212 |
-
if "file does not start with RIFF id" in error_message:
|
| 213 |
-
return False, "File has .wav extension but is not a valid WAV file. It may be a different audio format renamed to .wav."
|
| 214 |
-
|
| 215 |
-
if "unknown format" in error_message.lower():
|
| 216 |
-
return False, "WAV file uses an unsupported audio encoding format."
|
| 217 |
-
|
| 218 |
-
return False, f"Invalid WAV file structure: {error_message}"
|
| 219 |
-
|
| 220 |
-
except EOFError:
|
| 221 |
-
return False, "WAV file is truncated or corrupted (unexpected end of file)."
|
| 222 |
-
|
| 223 |
-
except Exception as validation_error:
|
| 224 |
-
return False, f"Error validating WAV file: {str(validation_error)}"
|
| 225 |
-
|
| 226 |
-
def perform_comprehensive_audio_validation(file_path):
|
| 227 |
-
file_exists_valid, file_exists_error = validate_file_exists_and_readable(file_path)
|
| 228 |
-
|
| 229 |
-
if not file_exists_valid:
|
| 230 |
-
return False, False, None, file_exists_error
|
| 231 |
-
|
| 232 |
-
file_extension = get_audio_file_extension(file_path)
|
| 233 |
-
|
| 234 |
-
if not is_supported_audio_extension(file_path):
|
| 235 |
-
supported_formats_list = ", ".join(SUPPORTED_AUDIO_EXTENSIONS)
|
| 236 |
-
return False, False, None, f"Unsupported file format '{file_extension}'. Supported formats are: {supported_formats_list}"
|
| 237 |
-
|
| 238 |
-
detected_format, detection_error = detect_audio_format_from_header(file_path)
|
| 239 |
-
|
| 240 |
-
if detected_format is None:
|
| 241 |
-
return False, False, None, detection_error
|
| 242 |
-
|
| 243 |
-
is_wav_format = (detected_format == "wav")
|
| 244 |
-
|
| 245 |
-
if is_wav_format:
|
| 246 |
-
wav_structure_valid, wav_structure_error = validate_wav_file_structure(file_path)
|
| 247 |
-
|
| 248 |
-
if not wav_structure_valid:
|
| 249 |
-
return False, True, "wav", wav_structure_error
|
| 250 |
-
|
| 251 |
-
return True, is_wav_format, detected_format, None
|
| 252 |
-
|
| 253 |
-
def perform_voice_clone_file_validation(file_path):
|
| 254 |
-
file_size_valid, file_size_error = validate_file_size_for_voice_cloning(file_path)
|
| 255 |
-
|
| 256 |
-
if not file_size_valid:
|
| 257 |
-
return False, False, None, file_size_error
|
| 258 |
-
|
| 259 |
-
return perform_comprehensive_audio_validation(file_path)
|
| 260 |
-
|
| 261 |
-
def get_format_display_name(format_code):
|
| 262 |
-
if format_code is None:
|
| 263 |
-
return "Unknown"
|
| 264 |
-
|
| 265 |
-
if format_code in FORMAT_DISPLAY_NAMES:
|
| 266 |
-
return FORMAT_DISPLAY_NAMES[format_code]
|
| 267 |
-
|
| 268 |
-
return format_code.upper()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/core/authentication.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
from config import HF_TOKEN
|
| 7 |
-
from huggingface_hub import login
|
| 8 |
-
|
| 9 |
-
def authenticate_huggingface():
|
| 10 |
-
if HF_TOKEN:
|
| 11 |
-
try:
|
| 12 |
-
login(token=HF_TOKEN, add_to_git_credential=False)
|
| 13 |
-
print("Authenticated with Hugging Face", flush=True)
|
| 14 |
-
|
| 15 |
-
except Exception as authentication_error:
|
| 16 |
-
print(f"Hugging Face authentication failed: {authentication_error}", flush=True)
|
| 17 |
-
print("Voice cloning may not be available", flush=True)
|
| 18 |
-
|
| 19 |
-
else:
|
| 20 |
-
print("Missing Hugging Face authentication required for the license agreement", flush=True)
|
| 21 |
-
|
| 22 |
-
def get_huggingface_token():
|
| 23 |
-
return HF_TOKEN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/core/memory.py
DELETED
|
@@ -1,394 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import os
|
| 7 |
-
import gc
|
| 8 |
-
import time
|
| 9 |
-
import atexit
|
| 10 |
-
import threading
|
| 11 |
-
import torch
|
| 12 |
-
from config import (
|
| 13 |
-
TEMPORARY_FILE_LIFETIME_SECONDS,
|
| 14 |
-
BACKGROUND_CLEANUP_INTERVAL,
|
| 15 |
-
MEMORY_WARNING_THRESHOLD,
|
| 16 |
-
MEMORY_CRITICAL_THRESHOLD,
|
| 17 |
-
MEMORY_CHECK_INTERVAL,
|
| 18 |
-
MEMORY_IDLE_TARGET,
|
| 19 |
-
MAXIMUM_MEMORY_USAGE
|
| 20 |
-
)
|
| 21 |
-
from ..core.state import (
|
| 22 |
-
temporary_files_registry,
|
| 23 |
-
temporary_files_lock,
|
| 24 |
-
memory_enforcement_lock,
|
| 25 |
-
background_cleanup_thread,
|
| 26 |
-
background_cleanup_stop_event,
|
| 27 |
-
background_cleanup_trigger_event,
|
| 28 |
-
check_if_generation_is_currently_active,
|
| 29 |
-
get_text_to_speech_manager,
|
| 30 |
-
is_model_in_use
|
| 31 |
-
)
|
| 32 |
-
|
| 33 |
-
def get_current_memory_usage():
|
| 34 |
-
try:
|
| 35 |
-
with open('/proc/self/status', 'r') as status_file:
|
| 36 |
-
for line in status_file:
|
| 37 |
-
if line.startswith('VmRSS:'):
|
| 38 |
-
memory_value_kb = int(line.split()[1])
|
| 39 |
-
return memory_value_kb * 1024
|
| 40 |
-
|
| 41 |
-
except Exception:
|
| 42 |
-
pass
|
| 43 |
-
|
| 44 |
-
try:
|
| 45 |
-
with open('/proc/self/statm', 'r') as statm_file:
|
| 46 |
-
statm_values = statm_file.read().split()
|
| 47 |
-
resident_pages = int(statm_values[1])
|
| 48 |
-
page_size = os.sysconf('SC_PAGE_SIZE')
|
| 49 |
-
return resident_pages * page_size
|
| 50 |
-
|
| 51 |
-
except Exception:
|
| 52 |
-
pass
|
| 53 |
-
|
| 54 |
-
try:
|
| 55 |
-
import resource
|
| 56 |
-
import platform
|
| 57 |
-
memory_usage_kilobytes = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
| 58 |
-
|
| 59 |
-
if platform.system() == "Darwin":
|
| 60 |
-
return memory_usage_kilobytes
|
| 61 |
-
|
| 62 |
-
else:
|
| 63 |
-
return memory_usage_kilobytes * 1024
|
| 64 |
-
|
| 65 |
-
except Exception:
|
| 66 |
-
pass
|
| 67 |
-
|
| 68 |
-
return 0
|
| 69 |
-
|
| 70 |
-
def is_memory_usage_within_limit():
|
| 71 |
-
current_memory_usage = get_current_memory_usage()
|
| 72 |
-
return current_memory_usage < MAXIMUM_MEMORY_USAGE
|
| 73 |
-
|
| 74 |
-
def is_memory_usage_approaching_limit():
|
| 75 |
-
current_memory_usage = get_current_memory_usage()
|
| 76 |
-
return current_memory_usage >= MEMORY_WARNING_THRESHOLD
|
| 77 |
-
|
| 78 |
-
def is_memory_usage_critical():
|
| 79 |
-
current_memory_usage = get_current_memory_usage()
|
| 80 |
-
return current_memory_usage >= MEMORY_CRITICAL_THRESHOLD
|
| 81 |
-
|
| 82 |
-
def is_memory_above_idle_target():
|
| 83 |
-
current_memory_usage = get_current_memory_usage()
|
| 84 |
-
return current_memory_usage > MEMORY_IDLE_TARGET
|
| 85 |
-
|
| 86 |
-
def force_garbage_collection():
|
| 87 |
-
gc.collect(0)
|
| 88 |
-
gc.collect(1)
|
| 89 |
-
gc.collect(2)
|
| 90 |
-
|
| 91 |
-
if torch.cuda.is_available():
|
| 92 |
-
try:
|
| 93 |
-
torch.cuda.empty_cache()
|
| 94 |
-
torch.cuda.synchronize()
|
| 95 |
-
|
| 96 |
-
except Exception:
|
| 97 |
-
pass
|
| 98 |
-
|
| 99 |
-
def memory_cleanup():
|
| 100 |
-
force_garbage_collection()
|
| 101 |
-
|
| 102 |
-
try:
|
| 103 |
-
import ctypes
|
| 104 |
-
libc = ctypes.CDLL("libc.so.6")
|
| 105 |
-
libc.malloc_trim(0)
|
| 106 |
-
|
| 107 |
-
except Exception:
|
| 108 |
-
pass
|
| 109 |
-
|
| 110 |
-
force_garbage_collection()
|
| 111 |
-
|
| 112 |
-
def perform_memory_cleanup():
|
| 113 |
-
force_garbage_collection()
|
| 114 |
-
|
| 115 |
-
tts_manager = get_text_to_speech_manager()
|
| 116 |
-
if tts_manager is not None:
|
| 117 |
-
try:
|
| 118 |
-
tts_manager.evict_least_recently_used_voice_states()
|
| 119 |
-
|
| 120 |
-
except Exception:
|
| 121 |
-
pass
|
| 122 |
-
|
| 123 |
-
memory_cleanup()
|
| 124 |
-
|
| 125 |
-
def cleanup_expired_temporary_files():
|
| 126 |
-
current_timestamp = time.time()
|
| 127 |
-
expired_files = []
|
| 128 |
-
|
| 129 |
-
with temporary_files_lock:
|
| 130 |
-
for file_path, creation_timestamp in list(temporary_files_registry.items()):
|
| 131 |
-
if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
|
| 132 |
-
expired_files.append(file_path)
|
| 133 |
-
|
| 134 |
-
for file_path in expired_files:
|
| 135 |
-
try:
|
| 136 |
-
if os.path.exists(file_path):
|
| 137 |
-
os.remove(file_path)
|
| 138 |
-
del temporary_files_registry[file_path]
|
| 139 |
-
|
| 140 |
-
except Exception:
|
| 141 |
-
pass
|
| 142 |
-
|
| 143 |
-
def cleanup_all_temporary_files_immediately():
|
| 144 |
-
with temporary_files_lock:
|
| 145 |
-
for file_path in list(temporary_files_registry.keys()):
|
| 146 |
-
try:
|
| 147 |
-
if os.path.exists(file_path):
|
| 148 |
-
os.remove(file_path)
|
| 149 |
-
del temporary_files_registry[file_path]
|
| 150 |
-
|
| 151 |
-
except Exception:
|
| 152 |
-
pass
|
| 153 |
-
|
| 154 |
-
def has_temporary_files_pending_cleanup():
|
| 155 |
-
with temporary_files_lock:
|
| 156 |
-
if len(temporary_files_registry) == 0:
|
| 157 |
-
return False
|
| 158 |
-
|
| 159 |
-
current_timestamp = time.time()
|
| 160 |
-
|
| 161 |
-
for file_path, creation_timestamp in temporary_files_registry.items():
|
| 162 |
-
if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
|
| 163 |
-
return True
|
| 164 |
-
|
| 165 |
-
return False
|
| 166 |
-
|
| 167 |
-
def has_any_temporary_files_registered():
|
| 168 |
-
with temporary_files_lock:
|
| 169 |
-
return len(temporary_files_registry) > 0
|
| 170 |
-
|
| 171 |
-
def calculate_time_until_next_file_expiration():
|
| 172 |
-
with temporary_files_lock:
|
| 173 |
-
if len(temporary_files_registry) == 0:
|
| 174 |
-
return None
|
| 175 |
-
|
| 176 |
-
current_timestamp = time.time()
|
| 177 |
-
minimum_time_until_expiration = None
|
| 178 |
-
|
| 179 |
-
for file_path, creation_timestamp in temporary_files_registry.items():
|
| 180 |
-
time_since_creation = current_timestamp - creation_timestamp
|
| 181 |
-
time_until_expiration = TEMPORARY_FILE_LIFETIME_SECONDS - time_since_creation
|
| 182 |
-
|
| 183 |
-
if time_until_expiration <= 0:
|
| 184 |
-
return 0
|
| 185 |
-
|
| 186 |
-
if minimum_time_until_expiration is None or time_until_expiration < minimum_time_until_expiration:
|
| 187 |
-
minimum_time_until_expiration = time_until_expiration
|
| 188 |
-
|
| 189 |
-
return minimum_time_until_expiration
|
| 190 |
-
|
| 191 |
-
def enforce_memory_limit_if_exceeded():
|
| 192 |
-
with memory_enforcement_lock:
|
| 193 |
-
generation_is_active = check_if_generation_is_currently_active()
|
| 194 |
-
model_is_in_use = is_model_in_use()
|
| 195 |
-
|
| 196 |
-
current_memory_usage = get_current_memory_usage()
|
| 197 |
-
|
| 198 |
-
if current_memory_usage < MEMORY_WARNING_THRESHOLD:
|
| 199 |
-
return True
|
| 200 |
-
|
| 201 |
-
force_garbage_collection()
|
| 202 |
-
current_memory_usage = get_current_memory_usage()
|
| 203 |
-
|
| 204 |
-
if current_memory_usage < MEMORY_WARNING_THRESHOLD:
|
| 205 |
-
return True
|
| 206 |
-
|
| 207 |
-
tts_manager = get_text_to_speech_manager()
|
| 208 |
-
if tts_manager is not None:
|
| 209 |
-
try:
|
| 210 |
-
tts_manager.evict_least_recently_used_voice_states()
|
| 211 |
-
except Exception:
|
| 212 |
-
pass
|
| 213 |
-
|
| 214 |
-
memory_cleanup()
|
| 215 |
-
current_memory_usage = get_current_memory_usage()
|
| 216 |
-
|
| 217 |
-
if current_memory_usage < MEMORY_CRITICAL_THRESHOLD:
|
| 218 |
-
return True
|
| 219 |
-
|
| 220 |
-
generation_is_active = check_if_generation_is_currently_active()
|
| 221 |
-
model_is_in_use = is_model_in_use()
|
| 222 |
-
|
| 223 |
-
if generation_is_active or model_is_in_use:
|
| 224 |
-
if tts_manager is not None:
|
| 225 |
-
try:
|
| 226 |
-
tts_manager.clear_voice_state_cache_completely()
|
| 227 |
-
|
| 228 |
-
except Exception:
|
| 229 |
-
pass
|
| 230 |
-
|
| 231 |
-
cleanup_all_temporary_files_immediately()
|
| 232 |
-
memory_cleanup()
|
| 233 |
-
|
| 234 |
-
return current_memory_usage < MAXIMUM_MEMORY_USAGE
|
| 235 |
-
|
| 236 |
-
if tts_manager is not None:
|
| 237 |
-
try:
|
| 238 |
-
tts_manager.clear_voice_state_cache_completely()
|
| 239 |
-
|
| 240 |
-
except Exception:
|
| 241 |
-
pass
|
| 242 |
-
|
| 243 |
-
cleanup_all_temporary_files_immediately()
|
| 244 |
-
memory_cleanup()
|
| 245 |
-
|
| 246 |
-
current_memory_usage = get_current_memory_usage()
|
| 247 |
-
|
| 248 |
-
return current_memory_usage < MAXIMUM_MEMORY_USAGE
|
| 249 |
-
|
| 250 |
-
def perform_idle_memory_reduction():
|
| 251 |
-
if check_if_generation_is_currently_active():
|
| 252 |
-
return
|
| 253 |
-
|
| 254 |
-
if is_model_in_use():
|
| 255 |
-
return
|
| 256 |
-
|
| 257 |
-
with memory_enforcement_lock:
|
| 258 |
-
current_memory_usage = get_current_memory_usage()
|
| 259 |
-
|
| 260 |
-
if current_memory_usage <= MEMORY_IDLE_TARGET:
|
| 261 |
-
return
|
| 262 |
-
|
| 263 |
-
force_garbage_collection()
|
| 264 |
-
current_memory_usage = get_current_memory_usage()
|
| 265 |
-
|
| 266 |
-
if current_memory_usage <= MEMORY_IDLE_TARGET:
|
| 267 |
-
return
|
| 268 |
-
|
| 269 |
-
if check_if_generation_is_currently_active() or is_model_in_use():
|
| 270 |
-
return
|
| 271 |
-
|
| 272 |
-
tts_manager = get_text_to_speech_manager()
|
| 273 |
-
if tts_manager is not None:
|
| 274 |
-
try:
|
| 275 |
-
tts_manager.evict_least_recently_used_voice_states()
|
| 276 |
-
|
| 277 |
-
except Exception:
|
| 278 |
-
pass
|
| 279 |
-
|
| 280 |
-
memory_cleanup()
|
| 281 |
-
|
| 282 |
-
current_memory_usage = get_current_memory_usage()
|
| 283 |
-
if current_memory_usage <= MEMORY_IDLE_TARGET:
|
| 284 |
-
return
|
| 285 |
-
|
| 286 |
-
if check_if_generation_is_currently_active() or is_model_in_use():
|
| 287 |
-
return
|
| 288 |
-
|
| 289 |
-
if tts_manager is not None:
|
| 290 |
-
try:
|
| 291 |
-
tts_manager.clear_voice_state_cache_completely()
|
| 292 |
-
|
| 293 |
-
except Exception:
|
| 294 |
-
pass
|
| 295 |
-
|
| 296 |
-
memory_cleanup()
|
| 297 |
-
|
| 298 |
-
def perform_background_cleanup_cycle():
|
| 299 |
-
last_memory_check_timestamp = 0
|
| 300 |
-
|
| 301 |
-
while not background_cleanup_stop_event.is_set():
|
| 302 |
-
time_until_next_expiration = calculate_time_until_next_file_expiration()
|
| 303 |
-
current_timestamp = time.time()
|
| 304 |
-
time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
|
| 305 |
-
|
| 306 |
-
if time_until_next_expiration is not None:
|
| 307 |
-
if time_until_next_expiration <= 0:
|
| 308 |
-
wait_duration = 1
|
| 309 |
-
|
| 310 |
-
else:
|
| 311 |
-
wait_duration = min(
|
| 312 |
-
time_until_next_expiration + 1,
|
| 313 |
-
MEMORY_CHECK_INTERVAL,
|
| 314 |
-
BACKGROUND_CLEANUP_INTERVAL
|
| 315 |
-
)
|
| 316 |
-
else:
|
| 317 |
-
should_check_memory = (
|
| 318 |
-
is_memory_above_idle_target() and
|
| 319 |
-
not check_if_generation_is_currently_active() and
|
| 320 |
-
not is_model_in_use()
|
| 321 |
-
)
|
| 322 |
-
|
| 323 |
-
if should_check_memory:
|
| 324 |
-
wait_duration = MEMORY_CHECK_INTERVAL
|
| 325 |
-
|
| 326 |
-
else:
|
| 327 |
-
background_cleanup_trigger_event.clear()
|
| 328 |
-
triggered = background_cleanup_trigger_event.wait(timeout=BACKGROUND_CLEANUP_INTERVAL)
|
| 329 |
-
|
| 330 |
-
if background_cleanup_stop_event.is_set():
|
| 331 |
-
break
|
| 332 |
-
|
| 333 |
-
if triggered:
|
| 334 |
-
continue
|
| 335 |
-
|
| 336 |
-
else:
|
| 337 |
-
if not check_if_generation_is_currently_active() and not is_model_in_use():
|
| 338 |
-
perform_idle_memory_reduction()
|
| 339 |
-
continue
|
| 340 |
-
|
| 341 |
-
background_cleanup_stop_event.wait(timeout=wait_duration)
|
| 342 |
-
|
| 343 |
-
if background_cleanup_stop_event.is_set():
|
| 344 |
-
break
|
| 345 |
-
|
| 346 |
-
if has_temporary_files_pending_cleanup():
|
| 347 |
-
cleanup_expired_temporary_files()
|
| 348 |
-
|
| 349 |
-
current_timestamp = time.time()
|
| 350 |
-
time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
|
| 351 |
-
|
| 352 |
-
if time_since_last_memory_check >= MEMORY_CHECK_INTERVAL:
|
| 353 |
-
generation_active = check_if_generation_is_currently_active()
|
| 354 |
-
model_in_use = is_model_in_use()
|
| 355 |
-
|
| 356 |
-
if not generation_active and not model_in_use:
|
| 357 |
-
if is_memory_usage_critical():
|
| 358 |
-
enforce_memory_limit_if_exceeded()
|
| 359 |
-
|
| 360 |
-
elif is_memory_above_idle_target():
|
| 361 |
-
perform_idle_memory_reduction()
|
| 362 |
-
|
| 363 |
-
last_memory_check_timestamp = current_timestamp
|
| 364 |
-
|
| 365 |
-
def trigger_background_cleanup_check():
|
| 366 |
-
background_cleanup_trigger_event.set()
|
| 367 |
-
|
| 368 |
-
def start_background_cleanup_thread():
|
| 369 |
-
global background_cleanup_thread
|
| 370 |
-
|
| 371 |
-
from ..core import state as global_state
|
| 372 |
-
|
| 373 |
-
if global_state.background_cleanup_thread is None or not global_state.background_cleanup_thread.is_alive():
|
| 374 |
-
background_cleanup_stop_event.clear()
|
| 375 |
-
background_cleanup_trigger_event.clear()
|
| 376 |
-
|
| 377 |
-
global_state.background_cleanup_thread = threading.Thread(
|
| 378 |
-
target=perform_background_cleanup_cycle,
|
| 379 |
-
daemon=True,
|
| 380 |
-
name="BackgroundCleanupThread"
|
| 381 |
-
)
|
| 382 |
-
|
| 383 |
-
global_state.background_cleanup_thread.start()
|
| 384 |
-
|
| 385 |
-
def stop_background_cleanup_thread():
|
| 386 |
-
from ..core import state as global_state
|
| 387 |
-
|
| 388 |
-
background_cleanup_stop_event.set()
|
| 389 |
-
background_cleanup_trigger_event.set()
|
| 390 |
-
|
| 391 |
-
if global_state.background_cleanup_thread is not None and global_state.background_cleanup_thread.is_alive():
|
| 392 |
-
global_state.background_cleanup_thread.join(timeout=5)
|
| 393 |
-
|
| 394 |
-
atexit.register(stop_background_cleanup_thread)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/core/state.py
DELETED
|
@@ -1,147 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import threading
|
| 7 |
-
|
| 8 |
-
generation_state_lock = threading.Lock()
|
| 9 |
-
is_currently_generating = False
|
| 10 |
-
stop_generation_requested = False
|
| 11 |
-
temporary_files_registry = {}
|
| 12 |
-
temporary_files_lock = threading.Lock()
|
| 13 |
-
memory_enforcement_lock = threading.Lock()
|
| 14 |
-
background_cleanup_thread = None
|
| 15 |
-
background_cleanup_stop_event = threading.Event()
|
| 16 |
-
background_cleanup_trigger_event = threading.Event()
|
| 17 |
-
text_to_speech_manager = None
|
| 18 |
-
|
| 19 |
-
audio_conversion_semaphore = threading.Semaphore(1)
|
| 20 |
-
audio_conversion_queue_lock = threading.Lock()
|
| 21 |
-
audio_conversion_active_count = 0
|
| 22 |
-
audio_conversion_waiting_count = 0
|
| 23 |
-
|
| 24 |
-
accelerator_log_lock = threading.Lock()
|
| 25 |
-
accelerator_log_thread = None
|
| 26 |
-
accelerator_log_stop_event = threading.Event()
|
| 27 |
-
|
| 28 |
-
model_usage_lock = threading.Lock()
|
| 29 |
-
model_usage_count = 0
|
| 30 |
-
|
| 31 |
-
generation_protection_lock = threading.RLock()
|
| 32 |
-
generation_protection_count = 0
|
| 33 |
-
|
| 34 |
-
def set_text_to_speech_manager(manager_instance):
|
| 35 |
-
global text_to_speech_manager
|
| 36 |
-
text_to_speech_manager = manager_instance
|
| 37 |
-
|
| 38 |
-
def get_text_to_speech_manager():
|
| 39 |
-
global text_to_speech_manager
|
| 40 |
-
return text_to_speech_manager
|
| 41 |
-
|
| 42 |
-
def check_if_generation_is_currently_active():
|
| 43 |
-
with generation_state_lock:
|
| 44 |
-
return is_currently_generating
|
| 45 |
-
|
| 46 |
-
def set_generation_active(is_active):
|
| 47 |
-
global is_currently_generating
|
| 48 |
-
with generation_state_lock:
|
| 49 |
-
is_currently_generating = is_active
|
| 50 |
-
|
| 51 |
-
def set_stop_generation_requested(requested):
|
| 52 |
-
global stop_generation_requested
|
| 53 |
-
with generation_state_lock:
|
| 54 |
-
stop_generation_requested = requested
|
| 55 |
-
|
| 56 |
-
def get_stop_generation_requested():
|
| 57 |
-
with generation_state_lock:
|
| 58 |
-
return stop_generation_requested
|
| 59 |
-
|
| 60 |
-
def increment_audio_conversion_active():
|
| 61 |
-
global audio_conversion_active_count
|
| 62 |
-
with audio_conversion_queue_lock:
|
| 63 |
-
audio_conversion_active_count += 1
|
| 64 |
-
return audio_conversion_active_count
|
| 65 |
-
|
| 66 |
-
def decrement_audio_conversion_active():
|
| 67 |
-
global audio_conversion_active_count
|
| 68 |
-
with audio_conversion_queue_lock:
|
| 69 |
-
audio_conversion_active_count = max(0, audio_conversion_active_count - 1)
|
| 70 |
-
return audio_conversion_active_count
|
| 71 |
-
|
| 72 |
-
def get_audio_conversion_active_count():
|
| 73 |
-
with audio_conversion_queue_lock:
|
| 74 |
-
return audio_conversion_active_count
|
| 75 |
-
|
| 76 |
-
def increment_audio_conversion_waiting():
|
| 77 |
-
global audio_conversion_waiting_count
|
| 78 |
-
with audio_conversion_queue_lock:
|
| 79 |
-
audio_conversion_waiting_count += 1
|
| 80 |
-
return audio_conversion_waiting_count
|
| 81 |
-
|
| 82 |
-
def decrement_audio_conversion_waiting():
|
| 83 |
-
global audio_conversion_waiting_count
|
| 84 |
-
with audio_conversion_queue_lock:
|
| 85 |
-
audio_conversion_waiting_count = max(0, audio_conversion_waiting_count - 1)
|
| 86 |
-
return audio_conversion_waiting_count
|
| 87 |
-
|
| 88 |
-
def get_audio_conversion_waiting_count():
|
| 89 |
-
with audio_conversion_queue_lock:
|
| 90 |
-
return audio_conversion_waiting_count
|
| 91 |
-
|
| 92 |
-
def is_audio_conversion_queue_busy():
|
| 93 |
-
with audio_conversion_queue_lock:
|
| 94 |
-
return audio_conversion_active_count > 0
|
| 95 |
-
|
| 96 |
-
def increment_model_usage():
|
| 97 |
-
global model_usage_count
|
| 98 |
-
with model_usage_lock:
|
| 99 |
-
model_usage_count += 1
|
| 100 |
-
return model_usage_count
|
| 101 |
-
|
| 102 |
-
def decrement_model_usage():
|
| 103 |
-
global model_usage_count
|
| 104 |
-
with model_usage_lock:
|
| 105 |
-
model_usage_count = max(0, model_usage_count - 1)
|
| 106 |
-
current_count = model_usage_count
|
| 107 |
-
return current_count
|
| 108 |
-
|
| 109 |
-
def get_model_usage_count():
|
| 110 |
-
with model_usage_lock:
|
| 111 |
-
return model_usage_count
|
| 112 |
-
|
| 113 |
-
def is_model_in_use():
|
| 114 |
-
with model_usage_lock:
|
| 115 |
-
return model_usage_count > 0
|
| 116 |
-
|
| 117 |
-
def acquire_generation_protection():
|
| 118 |
-
global generation_protection_count
|
| 119 |
-
generation_protection_lock.acquire()
|
| 120 |
-
generation_protection_count += 1
|
| 121 |
-
return generation_protection_count
|
| 122 |
-
|
| 123 |
-
def release_generation_protection():
|
| 124 |
-
global generation_protection_count
|
| 125 |
-
generation_protection_count = max(0, generation_protection_count - 1)
|
| 126 |
-
generation_protection_lock.release()
|
| 127 |
-
|
| 128 |
-
def is_generation_protected():
|
| 129 |
-
if generation_protection_lock.acquire(blocking=False):
|
| 130 |
-
is_protected = generation_protection_count > 0
|
| 131 |
-
generation_protection_lock.release()
|
| 132 |
-
return is_protected
|
| 133 |
-
return True
|
| 134 |
-
|
| 135 |
-
def try_acquire_generation_protection_for_cleanup(timeout_seconds=0.1):
|
| 136 |
-
acquired = generation_protection_lock.acquire(blocking=True, timeout=timeout_seconds)
|
| 137 |
-
if acquired:
|
| 138 |
-
if generation_protection_count > 0:
|
| 139 |
-
generation_protection_lock.release()
|
| 140 |
-
return False
|
| 141 |
-
|
| 142 |
-
return True
|
| 143 |
-
|
| 144 |
-
return False
|
| 145 |
-
|
| 146 |
-
def release_generation_protection_for_cleanup():
|
| 147 |
-
generation_protection_lock.release()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/generation/handler.py
DELETED
|
@@ -1,309 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import gradio as gr
|
| 7 |
-
from config import VOICE_MODE_CLONE, MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES
|
| 8 |
-
from ..core.state import (
|
| 9 |
-
generation_state_lock,
|
| 10 |
-
get_stop_generation_requested,
|
| 11 |
-
set_stop_generation_requested,
|
| 12 |
-
is_audio_conversion_queue_busy,
|
| 13 |
-
get_audio_conversion_waiting_count,
|
| 14 |
-
acquire_generation_protection,
|
| 15 |
-
release_generation_protection
|
| 16 |
-
)
|
| 17 |
-
from ..core.authentication import get_huggingface_token
|
| 18 |
-
from ..core.memory import (
|
| 19 |
-
has_temporary_files_pending_cleanup,
|
| 20 |
-
cleanup_expired_temporary_files,
|
| 21 |
-
perform_memory_cleanup,
|
| 22 |
-
memory_cleanup,
|
| 23 |
-
trigger_background_cleanup_check
|
| 24 |
-
)
|
| 25 |
-
from ..tts.manager import text_to_speech_manager, ModelNotLoadedError, ModelLoadingError
|
| 26 |
-
from ..validation.text import validate_text_input
|
| 27 |
-
from ..audio.validator import (
|
| 28 |
-
perform_voice_clone_file_validation,
|
| 29 |
-
get_format_display_name,
|
| 30 |
-
format_file_size_for_display,
|
| 31 |
-
validate_file_size_for_voice_cloning
|
| 32 |
-
)
|
| 33 |
-
from ..audio.converter import (
|
| 34 |
-
prepare_audio_file_for_voice_cloning,
|
| 35 |
-
AudioConversionQueueBusyError,
|
| 36 |
-
AudioConversionQueueTimeoutError
|
| 37 |
-
)
|
| 38 |
-
|
| 39 |
-
def check_if_generating():
|
| 40 |
-
from ..core.state import is_currently_generating
|
| 41 |
-
with generation_state_lock:
|
| 42 |
-
return is_currently_generating
|
| 43 |
-
|
| 44 |
-
def request_generation_stop():
|
| 45 |
-
set_stop_generation_requested(True)
|
| 46 |
-
return gr.update(interactive=False)
|
| 47 |
-
|
| 48 |
-
def validate_voice_clone_file_size(voice_clone_audio_file):
|
| 49 |
-
if not voice_clone_audio_file:
|
| 50 |
-
return True, None
|
| 51 |
-
|
| 52 |
-
file_size_valid, file_size_error = validate_file_size_for_voice_cloning(voice_clone_audio_file)
|
| 53 |
-
|
| 54 |
-
if not file_size_valid:
|
| 55 |
-
return False, file_size_error
|
| 56 |
-
|
| 57 |
-
return True, None
|
| 58 |
-
|
| 59 |
-
def validate_and_prepare_voice_clone_audio(voice_clone_audio_file):
|
| 60 |
-
if not voice_clone_audio_file:
|
| 61 |
-
return None, "Please upload an audio file for voice cloning.", None, None
|
| 62 |
-
|
| 63 |
-
file_size_valid, file_size_error = validate_file_size_for_voice_cloning(voice_clone_audio_file)
|
| 64 |
-
|
| 65 |
-
if not file_size_valid:
|
| 66 |
-
return None, file_size_error, None, None
|
| 67 |
-
|
| 68 |
-
is_valid, is_wav_format, detected_format, validation_error = perform_voice_clone_file_validation(voice_clone_audio_file)
|
| 69 |
-
|
| 70 |
-
if not is_valid:
|
| 71 |
-
format_display_name = get_format_display_name(detected_format) if detected_format else "Unknown"
|
| 72 |
-
|
| 73 |
-
if validation_error:
|
| 74 |
-
if "too short" in validation_error.lower():
|
| 75 |
-
return None, f"The uploaded audio file is too short. Please upload a longer audio sample for better voice cloning results.", None, detected_format
|
| 76 |
-
|
| 77 |
-
if "too long" in validation_error.lower():
|
| 78 |
-
return None, f"The uploaded audio file is too long. Please upload a shorter audio sample (maximum 1 hour).", None, detected_format
|
| 79 |
-
|
| 80 |
-
if "empty" in validation_error.lower() or "0 bytes" in validation_error.lower():
|
| 81 |
-
return None, "The uploaded audio file is empty. Please upload a valid audio file.", None, detected_format
|
| 82 |
-
|
| 83 |
-
if "corrupted" in validation_error.lower() or "truncated" in validation_error.lower():
|
| 84 |
-
return None, f"The uploaded {format_display_name} file appears to be corrupted or incomplete. Please upload a valid audio file.", None, detected_format
|
| 85 |
-
|
| 86 |
-
if "unsupported" in validation_error.lower():
|
| 87 |
-
return None, validation_error, None, detected_format
|
| 88 |
-
|
| 89 |
-
if "exceeds" in validation_error.lower() or "maximum" in validation_error.lower():
|
| 90 |
-
return None, validation_error, None, detected_format
|
| 91 |
-
|
| 92 |
-
return None, f"Invalid audio file: {validation_error}", None, detected_format
|
| 93 |
-
|
| 94 |
-
return None, "The uploaded file could not be validated as a valid audio file.", None, detected_format
|
| 95 |
-
|
| 96 |
-
format_display_name = get_format_display_name(detected_format)
|
| 97 |
-
|
| 98 |
-
if is_audio_conversion_queue_busy():
|
| 99 |
-
waiting_count = get_audio_conversion_waiting_count()
|
| 100 |
-
|
| 101 |
-
if waiting_count > 0:
|
| 102 |
-
gr.Warning(f"Audio conversion queue is busy. Your request is queued (position: {waiting_count + 1}). Please wait...")
|
| 103 |
-
|
| 104 |
-
else:
|
| 105 |
-
gr.Warning("Audio conversion is in progress for another user. Your request has been queued. Please wait...")
|
| 106 |
-
|
| 107 |
-
try:
|
| 108 |
-
if is_wav_format:
|
| 109 |
-
prepared_path, preparation_error, was_converted, final_format = prepare_audio_file_for_voice_cloning(
|
| 110 |
-
voice_clone_audio_file,
|
| 111 |
-
wait_for_queue=True
|
| 112 |
-
)
|
| 113 |
-
|
| 114 |
-
if prepared_path is None:
|
| 115 |
-
return None, f"Failed to process WAV file: {preparation_error}", None, 'wav'
|
| 116 |
-
|
| 117 |
-
return prepared_path, None, False, 'wav'
|
| 118 |
-
|
| 119 |
-
else:
|
| 120 |
-
prepared_path, preparation_error, was_converted, final_format = prepare_audio_file_for_voice_cloning(
|
| 121 |
-
voice_clone_audio_file,
|
| 122 |
-
wait_for_queue=True
|
| 123 |
-
)
|
| 124 |
-
|
| 125 |
-
if prepared_path is None:
|
| 126 |
-
if "no audio conversion library" in preparation_error.lower():
|
| 127 |
-
return None, f"Cannot convert {format_display_name} format. Please upload a WAV file directly.", None, detected_format
|
| 128 |
-
|
| 129 |
-
if "queue" in preparation_error.lower() or "busy" in preparation_error.lower():
|
| 130 |
-
return None, preparation_error, None, detected_format
|
| 131 |
-
|
| 132 |
-
return None, f"Failed to convert {format_display_name} to WAV format: {preparation_error}", None, detected_format
|
| 133 |
-
|
| 134 |
-
return prepared_path, None, True, detected_format
|
| 135 |
-
|
| 136 |
-
except AudioConversionQueueBusyError as queue_busy_error:
|
| 137 |
-
return None, str(queue_busy_error), None, detected_format
|
| 138 |
-
|
| 139 |
-
except AudioConversionQueueTimeoutError as queue_timeout_error:
|
| 140 |
-
return None, str(queue_timeout_error), None, detected_format
|
| 141 |
-
|
| 142 |
-
def perform_speech_generation(
|
| 143 |
-
text_input,
|
| 144 |
-
voice_mode_selection,
|
| 145 |
-
voice_preset_selection,
|
| 146 |
-
voice_clone_audio_file,
|
| 147 |
-
model_variant,
|
| 148 |
-
lsd_decode_steps,
|
| 149 |
-
temperature,
|
| 150 |
-
noise_clamp,
|
| 151 |
-
eos_threshold,
|
| 152 |
-
frames_after_eos,
|
| 153 |
-
enable_custom_frames
|
| 154 |
-
):
|
| 155 |
-
from ..core import state as global_state
|
| 156 |
-
|
| 157 |
-
if has_temporary_files_pending_cleanup():
|
| 158 |
-
cleanup_expired_temporary_files()
|
| 159 |
-
|
| 160 |
-
is_valid, validation_result = validate_text_input(text_input)
|
| 161 |
-
|
| 162 |
-
if not is_valid:
|
| 163 |
-
if validation_result:
|
| 164 |
-
raise gr.Error(validation_result)
|
| 165 |
-
raise gr.Error("Please enter valid text to generate speech.")
|
| 166 |
-
|
| 167 |
-
prepared_audio_path = None
|
| 168 |
-
was_audio_converted = False
|
| 169 |
-
original_audio_format = None
|
| 170 |
-
|
| 171 |
-
if voice_mode_selection == VOICE_MODE_CLONE:
|
| 172 |
-
if not voice_clone_audio_file:
|
| 173 |
-
raise gr.Error("Please upload an audio file for voice cloning.")
|
| 174 |
-
|
| 175 |
-
file_size_valid, file_size_error = validate_voice_clone_file_size(voice_clone_audio_file)
|
| 176 |
-
if not file_size_valid:
|
| 177 |
-
max_size_display = format_file_size_for_display(MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES)
|
| 178 |
-
raise gr.Error(f"File size exceeds maximum limit of {max_size_display}. {file_size_error}")
|
| 179 |
-
|
| 180 |
-
if not get_huggingface_token():
|
| 181 |
-
raise gr.Error("Voice cloning is not configured properly at the moment. Please try again later.")
|
| 182 |
-
|
| 183 |
-
prepared_audio_path, audio_error, was_audio_converted, original_audio_format = validate_and_prepare_voice_clone_audio(voice_clone_audio_file)
|
| 184 |
-
|
| 185 |
-
if prepared_audio_path is None:
|
| 186 |
-
raise gr.Error(audio_error)
|
| 187 |
-
|
| 188 |
-
if was_audio_converted:
|
| 189 |
-
format_display_name = get_format_display_name(original_audio_format)
|
| 190 |
-
gr.Warning(f"Audio converted from {format_display_name} to WAV format for voice cloning.")
|
| 191 |
-
|
| 192 |
-
with generation_state_lock:
|
| 193 |
-
if global_state.is_currently_generating:
|
| 194 |
-
raise gr.Error("A generation is already in progress. Please wait.")
|
| 195 |
-
|
| 196 |
-
global_state.is_currently_generating = True
|
| 197 |
-
global_state.stop_generation_requested = False
|
| 198 |
-
|
| 199 |
-
acquire_generation_protection()
|
| 200 |
-
|
| 201 |
-
generated_audio_tensor = None
|
| 202 |
-
cloned_voice_state_tensor = None
|
| 203 |
-
|
| 204 |
-
try:
|
| 205 |
-
perform_memory_cleanup()
|
| 206 |
-
|
| 207 |
-
loaded_model = text_to_speech_manager.load_or_get_model(
|
| 208 |
-
model_variant,
|
| 209 |
-
temperature,
|
| 210 |
-
lsd_decode_steps,
|
| 211 |
-
noise_clamp,
|
| 212 |
-
eos_threshold
|
| 213 |
-
)
|
| 214 |
-
|
| 215 |
-
if loaded_model is None:
|
| 216 |
-
raise gr.Error("Failed to load TTS model. Please try again.")
|
| 217 |
-
|
| 218 |
-
with generation_state_lock:
|
| 219 |
-
if global_state.stop_generation_requested:
|
| 220 |
-
return None
|
| 221 |
-
|
| 222 |
-
if voice_mode_selection == VOICE_MODE_CLONE:
|
| 223 |
-
cloned_voice_state_tensor = text_to_speech_manager.get_voice_state_for_clone(
|
| 224 |
-
voice_clone_audio_file,
|
| 225 |
-
prepared_audio_path=prepared_audio_path
|
| 226 |
-
)
|
| 227 |
-
voice_state = cloned_voice_state_tensor
|
| 228 |
-
|
| 229 |
-
else:
|
| 230 |
-
voice_state = text_to_speech_manager.get_voice_state_for_preset(voice_preset_selection)
|
| 231 |
-
|
| 232 |
-
with generation_state_lock:
|
| 233 |
-
if global_state.stop_generation_requested:
|
| 234 |
-
return None
|
| 235 |
-
|
| 236 |
-
generated_audio_tensor = text_to_speech_manager.generate_audio(
|
| 237 |
-
validation_result,
|
| 238 |
-
voice_state,
|
| 239 |
-
frames_after_eos,
|
| 240 |
-
enable_custom_frames
|
| 241 |
-
)
|
| 242 |
-
|
| 243 |
-
with generation_state_lock:
|
| 244 |
-
if global_state.stop_generation_requested:
|
| 245 |
-
return None
|
| 246 |
-
|
| 247 |
-
output_file_path = text_to_speech_manager.save_audio_to_file(generated_audio_tensor)
|
| 248 |
-
|
| 249 |
-
return output_file_path
|
| 250 |
-
|
| 251 |
-
except gr.Error:
|
| 252 |
-
raise
|
| 253 |
-
|
| 254 |
-
except ModelNotLoadedError as model_not_loaded_error:
|
| 255 |
-
raise gr.Error(str(model_not_loaded_error))
|
| 256 |
-
|
| 257 |
-
except ModelLoadingError as model_loading_error:
|
| 258 |
-
raise gr.Error(f"Failed to load TTS model: {str(model_loading_error)}")
|
| 259 |
-
|
| 260 |
-
except RuntimeError as runtime_error:
|
| 261 |
-
error_message = str(runtime_error)
|
| 262 |
-
if "not loaded" in error_message.lower():
|
| 263 |
-
|
| 264 |
-
if text_to_speech_manager.ensure_model_loaded():
|
| 265 |
-
raise gr.Error("Model was temporarily unavailable. Please try again.")
|
| 266 |
-
|
| 267 |
-
else:
|
| 268 |
-
raise gr.Error("TTS model could not be loaded. Please try again later.")
|
| 269 |
-
|
| 270 |
-
raise gr.Error(error_message)
|
| 271 |
-
|
| 272 |
-
except Exception as generation_error:
|
| 273 |
-
error_message = str(generation_error)
|
| 274 |
-
|
| 275 |
-
if "file does not start with RIFF id" in error_message:
|
| 276 |
-
raise gr.Error("The audio file format is not supported. Please upload a valid WAV file or a common audio format (MP3, FLAC, OGG, M4A).")
|
| 277 |
-
|
| 278 |
-
if "unknown format" in error_message.lower():
|
| 279 |
-
raise gr.Error("The audio file uses an unsupported encoding format. Please convert it to a standard format and try again.")
|
| 280 |
-
|
| 281 |
-
raise gr.Error(f"Speech generation failed: {error_message}")
|
| 282 |
-
|
| 283 |
-
finally:
|
| 284 |
-
release_generation_protection()
|
| 285 |
-
|
| 286 |
-
with generation_state_lock:
|
| 287 |
-
global_state.is_currently_generating = False
|
| 288 |
-
global_state.stop_generation_requested = False
|
| 289 |
-
|
| 290 |
-
if generated_audio_tensor is not None:
|
| 291 |
-
try:
|
| 292 |
-
del generated_audio_tensor
|
| 293 |
-
|
| 294 |
-
except Exception:
|
| 295 |
-
pass
|
| 296 |
-
|
| 297 |
-
generated_audio_tensor = None
|
| 298 |
-
|
| 299 |
-
if cloned_voice_state_tensor is not None:
|
| 300 |
-
try:
|
| 301 |
-
del cloned_voice_state_tensor
|
| 302 |
-
|
| 303 |
-
except Exception:
|
| 304 |
-
pass
|
| 305 |
-
|
| 306 |
-
cloned_voice_state_tensor = None
|
| 307 |
-
|
| 308 |
-
memory_cleanup()
|
| 309 |
-
trigger_background_cleanup_check()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/tts/manager.py
DELETED
|
@@ -1,341 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import time
|
| 7 |
-
import tempfile
|
| 8 |
-
import threading
|
| 9 |
-
import torch
|
| 10 |
-
import scipy.io.wavfile
|
| 11 |
-
from pocket_tts import TTSModel
|
| 12 |
-
from config import (
|
| 13 |
-
AVAILABLE_VOICES,
|
| 14 |
-
DEFAULT_VOICE,
|
| 15 |
-
DEFAULT_MODEL_VARIANT,
|
| 16 |
-
DEFAULT_TEMPERATURE,
|
| 17 |
-
DEFAULT_LSD_DECODE_STEPS,
|
| 18 |
-
DEFAULT_EOS_THRESHOLD,
|
| 19 |
-
VOICE_STATE_CACHE_MAXIMUM_SIZE,
|
| 20 |
-
VOICE_STATE_CACHE_CLEANUP_THRESHOLD,
|
| 21 |
-
MODEL_LOAD_RETRY_ATTEMPTS
|
| 22 |
-
)
|
| 23 |
-
from ..core.state import (
|
| 24 |
-
temporary_files_registry,
|
| 25 |
-
temporary_files_lock,
|
| 26 |
-
set_text_to_speech_manager,
|
| 27 |
-
increment_model_usage,
|
| 28 |
-
decrement_model_usage,
|
| 29 |
-
is_model_in_use
|
| 30 |
-
)
|
| 31 |
-
from ..core.memory import (
|
| 32 |
-
force_garbage_collection,
|
| 33 |
-
memory_cleanup,
|
| 34 |
-
perform_memory_cleanup,
|
| 35 |
-
trigger_background_cleanup_check,
|
| 36 |
-
is_memory_usage_approaching_limit
|
| 37 |
-
)
|
| 38 |
-
|
| 39 |
-
class ModelNotLoadedError(RuntimeError):
|
| 40 |
-
pass
|
| 41 |
-
|
| 42 |
-
class ModelLoadingError(RuntimeError):
|
| 43 |
-
pass
|
| 44 |
-
|
| 45 |
-
class TextToSpeechManager:
|
| 46 |
-
def __init__(self):
|
| 47 |
-
self.loaded_model = None
|
| 48 |
-
self.current_configuration = {}
|
| 49 |
-
self.voice_state_cache = {}
|
| 50 |
-
self.voice_state_cache_access_timestamps = {}
|
| 51 |
-
self.voice_state_cache_lock = threading.Lock()
|
| 52 |
-
self.model_lock = threading.RLock()
|
| 53 |
-
self.model_loading_in_progress = False
|
| 54 |
-
self.last_successful_configuration = None
|
| 55 |
-
|
| 56 |
-
def is_model_loaded(self):
|
| 57 |
-
with self.model_lock:
|
| 58 |
-
return self.loaded_model is not None
|
| 59 |
-
|
| 60 |
-
def is_model_loading(self):
|
| 61 |
-
with self.model_lock:
|
| 62 |
-
return self.model_loading_in_progress
|
| 63 |
-
|
| 64 |
-
def _clear_voice_state_cache_internal(self):
|
| 65 |
-
with self.voice_state_cache_lock:
|
| 66 |
-
for voice_name in list(self.voice_state_cache.keys()):
|
| 67 |
-
voice_state_tensor = self.voice_state_cache.pop(voice_name, None)
|
| 68 |
-
|
| 69 |
-
if voice_state_tensor is not None:
|
| 70 |
-
try:
|
| 71 |
-
del voice_state_tensor
|
| 72 |
-
|
| 73 |
-
except Exception:
|
| 74 |
-
pass
|
| 75 |
-
|
| 76 |
-
self.voice_state_cache.clear()
|
| 77 |
-
self.voice_state_cache_access_timestamps.clear()
|
| 78 |
-
|
| 79 |
-
def load_or_get_model(
|
| 80 |
-
self,
|
| 81 |
-
model_variant,
|
| 82 |
-
temperature,
|
| 83 |
-
lsd_decode_steps,
|
| 84 |
-
noise_clamp,
|
| 85 |
-
eos_threshold
|
| 86 |
-
):
|
| 87 |
-
processed_variant = str(model_variant or DEFAULT_MODEL_VARIANT).strip()
|
| 88 |
-
processed_temperature = float(temperature) if temperature is not None else DEFAULT_TEMPERATURE
|
| 89 |
-
processed_lsd_steps = int(lsd_decode_steps) if lsd_decode_steps is not None else DEFAULT_LSD_DECODE_STEPS
|
| 90 |
-
processed_noise_clamp = float(noise_clamp) if noise_clamp and float(noise_clamp) > 0 else None
|
| 91 |
-
processed_eos_threshold = float(eos_threshold) if eos_threshold is not None else DEFAULT_EOS_THRESHOLD
|
| 92 |
-
|
| 93 |
-
requested_configuration = {
|
| 94 |
-
"variant": processed_variant,
|
| 95 |
-
"temp": processed_temperature,
|
| 96 |
-
"lsd_decode_steps": processed_lsd_steps,
|
| 97 |
-
"noise_clamp": processed_noise_clamp,
|
| 98 |
-
"eos_threshold": processed_eos_threshold
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
-
with self.model_lock:
|
| 102 |
-
if self.loaded_model is not None and self.current_configuration == requested_configuration:
|
| 103 |
-
return self.loaded_model
|
| 104 |
-
|
| 105 |
-
return self._load_model_with_retry(requested_configuration)
|
| 106 |
-
|
| 107 |
-
def _load_model_with_retry(self, requested_configuration):
|
| 108 |
-
last_exception = None
|
| 109 |
-
|
| 110 |
-
for attempt_number in range(MODEL_LOAD_RETRY_ATTEMPTS):
|
| 111 |
-
try:
|
| 112 |
-
self.model_loading_in_progress = True
|
| 113 |
-
|
| 114 |
-
if self.loaded_model is not None:
|
| 115 |
-
self._clear_voice_state_cache_internal()
|
| 116 |
-
|
| 117 |
-
try:
|
| 118 |
-
del self.loaded_model
|
| 119 |
-
|
| 120 |
-
except Exception:
|
| 121 |
-
pass
|
| 122 |
-
|
| 123 |
-
self.loaded_model = None
|
| 124 |
-
memory_cleanup()
|
| 125 |
-
|
| 126 |
-
perform_memory_cleanup()
|
| 127 |
-
|
| 128 |
-
self.loaded_model = TTSModel.load_model(**requested_configuration)
|
| 129 |
-
|
| 130 |
-
if self.loaded_model is None:
|
| 131 |
-
raise ModelLoadingError("TTSModel.load_model returned None")
|
| 132 |
-
|
| 133 |
-
self.current_configuration = requested_configuration.copy()
|
| 134 |
-
self.last_successful_configuration = requested_configuration.copy()
|
| 135 |
-
self.voice_state_cache = {}
|
| 136 |
-
|
| 137 |
-
self.model_loading_in_progress = False
|
| 138 |
-
|
| 139 |
-
return self.loaded_model
|
| 140 |
-
|
| 141 |
-
except Exception as loading_exception:
|
| 142 |
-
last_exception = loading_exception
|
| 143 |
-
self.loaded_model = None
|
| 144 |
-
self.model_loading_in_progress = False
|
| 145 |
-
|
| 146 |
-
if attempt_number < MODEL_LOAD_RETRY_ATTEMPTS - 1:
|
| 147 |
-
memory_cleanup()
|
| 148 |
-
|
| 149 |
-
self.model_loading_in_progress = False
|
| 150 |
-
raise ModelLoadingError(f"Failed to load TTS model after {MODEL_LOAD_RETRY_ATTEMPTS} attempts: {str(last_exception)}")
|
| 151 |
-
|
| 152 |
-
def ensure_model_loaded(self):
|
| 153 |
-
with self.model_lock:
|
| 154 |
-
if self.loaded_model is not None:
|
| 155 |
-
return True
|
| 156 |
-
|
| 157 |
-
if self.last_successful_configuration is not None:
|
| 158 |
-
try:
|
| 159 |
-
self._load_model_with_retry(self.last_successful_configuration)
|
| 160 |
-
return self.loaded_model is not None
|
| 161 |
-
|
| 162 |
-
except Exception:
|
| 163 |
-
pass
|
| 164 |
-
|
| 165 |
-
default_configuration = {
|
| 166 |
-
"variant": DEFAULT_MODEL_VARIANT,
|
| 167 |
-
"temp": DEFAULT_TEMPERATURE,
|
| 168 |
-
"lsd_decode_steps": DEFAULT_LSD_DECODE_STEPS,
|
| 169 |
-
"noise_clamp": None,
|
| 170 |
-
"eos_threshold": DEFAULT_EOS_THRESHOLD
|
| 171 |
-
}
|
| 172 |
-
|
| 173 |
-
try:
|
| 174 |
-
self._load_model_with_retry(default_configuration)
|
| 175 |
-
return self.loaded_model is not None
|
| 176 |
-
|
| 177 |
-
except Exception:
|
| 178 |
-
return False
|
| 179 |
-
|
| 180 |
-
def clear_voice_state_cache_completely(self):
|
| 181 |
-
with self.model_lock:
|
| 182 |
-
self._clear_voice_state_cache_internal()
|
| 183 |
-
|
| 184 |
-
force_garbage_collection()
|
| 185 |
-
|
| 186 |
-
def evict_least_recently_used_voice_states(self):
|
| 187 |
-
with self.voice_state_cache_lock:
|
| 188 |
-
if len(self.voice_state_cache) == 0:
|
| 189 |
-
return
|
| 190 |
-
|
| 191 |
-
if len(self.voice_state_cache) <= VOICE_STATE_CACHE_CLEANUP_THRESHOLD:
|
| 192 |
-
sorted_voice_names_by_access_time = sorted(
|
| 193 |
-
self.voice_state_cache_access_timestamps.keys(),
|
| 194 |
-
key=lambda voice_name: self.voice_state_cache_access_timestamps.get(voice_name, 0)
|
| 195 |
-
)
|
| 196 |
-
|
| 197 |
-
number_of_entries_to_remove = max(1, len(self.voice_state_cache) // 2)
|
| 198 |
-
|
| 199 |
-
for index in range(min(number_of_entries_to_remove, len(sorted_voice_names_by_access_time))):
|
| 200 |
-
voice_name_to_remove = sorted_voice_names_by_access_time[index]
|
| 201 |
-
voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
|
| 202 |
-
self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
|
| 203 |
-
|
| 204 |
-
if voice_state_tensor is not None:
|
| 205 |
-
try:
|
| 206 |
-
del voice_state_tensor
|
| 207 |
-
|
| 208 |
-
except Exception:
|
| 209 |
-
pass
|
| 210 |
-
|
| 211 |
-
force_garbage_collection()
|
| 212 |
-
return
|
| 213 |
-
|
| 214 |
-
sorted_voice_names_by_access_time = sorted(
|
| 215 |
-
self.voice_state_cache_access_timestamps.keys(),
|
| 216 |
-
key=lambda voice_name: self.voice_state_cache_access_timestamps.get(voice_name, 0)
|
| 217 |
-
)
|
| 218 |
-
|
| 219 |
-
number_of_entries_to_remove = len(self.voice_state_cache) - VOICE_STATE_CACHE_CLEANUP_THRESHOLD
|
| 220 |
-
|
| 221 |
-
for index in range(number_of_entries_to_remove):
|
| 222 |
-
if index >= len(sorted_voice_names_by_access_time):
|
| 223 |
-
break
|
| 224 |
-
|
| 225 |
-
voice_name_to_remove = sorted_voice_names_by_access_time[index]
|
| 226 |
-
voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
|
| 227 |
-
self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
|
| 228 |
-
|
| 229 |
-
if voice_state_tensor is not None:
|
| 230 |
-
try:
|
| 231 |
-
del voice_state_tensor
|
| 232 |
-
|
| 233 |
-
except Exception:
|
| 234 |
-
pass
|
| 235 |
-
|
| 236 |
-
force_garbage_collection()
|
| 237 |
-
|
| 238 |
-
def get_voice_state_for_preset(self, voice_name):
|
| 239 |
-
validated_voice = voice_name if voice_name in AVAILABLE_VOICES else DEFAULT_VOICE
|
| 240 |
-
|
| 241 |
-
with self.voice_state_cache_lock:
|
| 242 |
-
if validated_voice in self.voice_state_cache:
|
| 243 |
-
self.voice_state_cache_access_timestamps[validated_voice] = time.time()
|
| 244 |
-
cached_state = self.voice_state_cache[validated_voice]
|
| 245 |
-
return cached_state
|
| 246 |
-
|
| 247 |
-
if is_memory_usage_approaching_limit():
|
| 248 |
-
self.evict_least_recently_used_voice_states()
|
| 249 |
-
|
| 250 |
-
if len(self.voice_state_cache) >= VOICE_STATE_CACHE_MAXIMUM_SIZE:
|
| 251 |
-
self.evict_least_recently_used_voice_states()
|
| 252 |
-
|
| 253 |
-
increment_model_usage()
|
| 254 |
-
|
| 255 |
-
try:
|
| 256 |
-
with self.model_lock:
|
| 257 |
-
if self.loaded_model is None:
|
| 258 |
-
if not self.ensure_model_loaded():
|
| 259 |
-
raise ModelNotLoadedError("TTS model is not loaded and could not be reloaded. Please try again.")
|
| 260 |
-
|
| 261 |
-
computed_voice_state = self.loaded_model.get_state_for_audio_prompt(
|
| 262 |
-
audio_conditioning=validated_voice,
|
| 263 |
-
truncate=False
|
| 264 |
-
)
|
| 265 |
-
|
| 266 |
-
with self.voice_state_cache_lock:
|
| 267 |
-
self.voice_state_cache[validated_voice] = computed_voice_state
|
| 268 |
-
self.voice_state_cache_access_timestamps[validated_voice] = time.time()
|
| 269 |
-
|
| 270 |
-
return computed_voice_state
|
| 271 |
-
|
| 272 |
-
finally:
|
| 273 |
-
decrement_model_usage()
|
| 274 |
-
|
| 275 |
-
def get_voice_state_for_clone(self, audio_file_path, prepared_audio_path=None):
|
| 276 |
-
audio_path_to_use = prepared_audio_path if prepared_audio_path is not None else audio_file_path
|
| 277 |
-
|
| 278 |
-
increment_model_usage()
|
| 279 |
-
|
| 280 |
-
try:
|
| 281 |
-
with self.model_lock:
|
| 282 |
-
if self.loaded_model is None:
|
| 283 |
-
if not self.ensure_model_loaded():
|
| 284 |
-
raise ModelNotLoadedError("TTS model is not loaded and could not be reloaded. Please try again.")
|
| 285 |
-
|
| 286 |
-
cloned_voice_state = self.loaded_model.get_state_for_audio_prompt(
|
| 287 |
-
audio_conditioning=audio_path_to_use,
|
| 288 |
-
truncate=False
|
| 289 |
-
)
|
| 290 |
-
|
| 291 |
-
return cloned_voice_state
|
| 292 |
-
|
| 293 |
-
finally:
|
| 294 |
-
decrement_model_usage()
|
| 295 |
-
|
| 296 |
-
def generate_audio(self, text_content, voice_state, frames_after_eos, enable_custom_frames):
|
| 297 |
-
increment_model_usage()
|
| 298 |
-
|
| 299 |
-
try:
|
| 300 |
-
with self.model_lock:
|
| 301 |
-
if self.loaded_model is None:
|
| 302 |
-
if not self.ensure_model_loaded():
|
| 303 |
-
raise ModelNotLoadedError("TTS model is not loaded and could not be reloaded. Please try again.")
|
| 304 |
-
|
| 305 |
-
processed_frames = int(frames_after_eos) if enable_custom_frames else None
|
| 306 |
-
|
| 307 |
-
generated_audio = self.loaded_model.generate_audio(
|
| 308 |
-
model_state=voice_state,
|
| 309 |
-
text_to_generate=text_content,
|
| 310 |
-
frames_after_eos=processed_frames,
|
| 311 |
-
copy_state=True
|
| 312 |
-
)
|
| 313 |
-
|
| 314 |
-
force_garbage_collection()
|
| 315 |
-
|
| 316 |
-
return generated_audio
|
| 317 |
-
|
| 318 |
-
finally:
|
| 319 |
-
decrement_model_usage()
|
| 320 |
-
|
| 321 |
-
def save_audio_to_file(self, audio_tensor):
|
| 322 |
-
with self.model_lock:
|
| 323 |
-
if self.loaded_model is None:
|
| 324 |
-
raise ModelNotLoadedError("TTS model is not loaded. Cannot determine sample rate.")
|
| 325 |
-
|
| 326 |
-
audio_sample_rate = self.loaded_model.sample_rate
|
| 327 |
-
|
| 328 |
-
audio_numpy_data = audio_tensor.numpy()
|
| 329 |
-
|
| 330 |
-
output_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 331 |
-
scipy.io.wavfile.write(output_file.name, audio_sample_rate, audio_numpy_data)
|
| 332 |
-
|
| 333 |
-
with temporary_files_lock:
|
| 334 |
-
temporary_files_registry[output_file.name] = time.time()
|
| 335 |
-
|
| 336 |
-
trigger_background_cleanup_check()
|
| 337 |
-
|
| 338 |
-
return output_file.name
|
| 339 |
-
|
| 340 |
-
text_to_speech_manager = TextToSpeechManager()
|
| 341 |
-
set_text_to_speech_manager(text_to_speech_manager)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ui/handlers.py
DELETED
|
@@ -1,58 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import gradio as gr
|
| 7 |
-
from config import VOICE_MODE_PRESET, DEFAULT_VOICE
|
| 8 |
-
from ..validation.text import validate_text_input
|
| 9 |
-
|
| 10 |
-
def switch_to_generating_state(ui_state):
|
| 11 |
-
new_state = {"generating": True}
|
| 12 |
-
|
| 13 |
-
return (
|
| 14 |
-
gr.update(visible=False),
|
| 15 |
-
gr.update(visible=True, interactive=True),
|
| 16 |
-
gr.update(visible=False),
|
| 17 |
-
new_state
|
| 18 |
-
)
|
| 19 |
-
|
| 20 |
-
def switch_to_idle_state(text_content, ui_state):
|
| 21 |
-
new_state = {"generating": False}
|
| 22 |
-
|
| 23 |
-
has_text_content = bool(text_content and text_content.strip())
|
| 24 |
-
should_show_clear = has_text_content
|
| 25 |
-
|
| 26 |
-
is_valid_text, _ = validate_text_input(text_content)
|
| 27 |
-
|
| 28 |
-
return (
|
| 29 |
-
gr.update(visible=True, interactive=is_valid_text),
|
| 30 |
-
gr.update(visible=False),
|
| 31 |
-
gr.update(visible=should_show_clear),
|
| 32 |
-
new_state
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
def perform_clear_action():
|
| 36 |
-
return (
|
| 37 |
-
"",
|
| 38 |
-
None,
|
| 39 |
-
gr.update(visible=False),
|
| 40 |
-
VOICE_MODE_PRESET,
|
| 41 |
-
DEFAULT_VOICE,
|
| 42 |
-
None
|
| 43 |
-
)
|
| 44 |
-
|
| 45 |
-
def create_example_handler(example_text, example_voice):
|
| 46 |
-
def set_example_values():
|
| 47 |
-
return example_text, VOICE_MODE_PRESET, example_voice
|
| 48 |
-
|
| 49 |
-
return set_example_values
|
| 50 |
-
|
| 51 |
-
def format_example_button_label(example_text, example_voice, max_text_length=40):
|
| 52 |
-
truncated_text = (
|
| 53 |
-
example_text[:max_text_length] + "..."
|
| 54 |
-
if len(example_text) > max_text_length
|
| 55 |
-
else example_text
|
| 56 |
-
)
|
| 57 |
-
|
| 58 |
-
return f"[{example_voice}] {truncated_text}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ui/state.py
DELETED
|
@@ -1,43 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
import gradio as gr
|
| 7 |
-
from config import MAXIMUM_INPUT_LENGTH, VOICE_MODE_CLONE
|
| 8 |
-
from ..validation.text import validate_text_input
|
| 9 |
-
|
| 10 |
-
def check_generate_button_state(text_content, ui_state):
|
| 11 |
-
if ui_state.get("generating", False):
|
| 12 |
-
return gr.update(interactive=False)
|
| 13 |
-
|
| 14 |
-
is_valid, _ = validate_text_input(text_content)
|
| 15 |
-
|
| 16 |
-
return gr.update(interactive=is_valid)
|
| 17 |
-
|
| 18 |
-
def calculate_character_count_display(text_content):
|
| 19 |
-
character_count = len(text_content) if text_content else 0
|
| 20 |
-
|
| 21 |
-
display_color = (
|
| 22 |
-
"var(--error-text-color)"
|
| 23 |
-
if character_count > MAXIMUM_INPUT_LENGTH
|
| 24 |
-
else "var(--body-text-color-subdued)"
|
| 25 |
-
)
|
| 26 |
-
|
| 27 |
-
return f"<div style='text-align: right; padding: 4px 0;'><span style='color: {display_color}; font-size: 0.85em;'>{character_count} / {MAXIMUM_INPUT_LENGTH}</span></div>"
|
| 28 |
-
|
| 29 |
-
def determine_clear_button_visibility(text_content, ui_state):
|
| 30 |
-
if ui_state.get("generating", False):
|
| 31 |
-
return gr.update(visible=False)
|
| 32 |
-
|
| 33 |
-
has_text_content = bool(text_content and text_content.strip())
|
| 34 |
-
should_show_clear = has_text_content
|
| 35 |
-
|
| 36 |
-
return gr.update(visible=should_show_clear)
|
| 37 |
-
|
| 38 |
-
def update_voice_mode_visibility(voice_mode_value):
|
| 39 |
-
if voice_mode_value == VOICE_MODE_CLONE:
|
| 40 |
-
return gr.update(visible=False), gr.update(visible=True)
|
| 41 |
-
|
| 42 |
-
else:
|
| 43 |
-
return gr.update(visible=True), gr.update(visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/validation/text.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
| 3 |
-
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
from config import MAXIMUM_INPUT_LENGTH
|
| 7 |
-
|
| 8 |
-
def validate_text_input(text_content):
|
| 9 |
-
if not text_content or not isinstance(text_content, str):
|
| 10 |
-
return False, ""
|
| 11 |
-
|
| 12 |
-
cleaned_text = text_content.strip()
|
| 13 |
-
|
| 14 |
-
if not cleaned_text:
|
| 15 |
-
return False, ""
|
| 16 |
-
|
| 17 |
-
if len(cleaned_text) > MAXIMUM_INPUT_LENGTH:
|
| 18 |
-
return False, f"Input exceeds maximum length of {MAXIMUM_INPUT_LENGTH} characters."
|
| 19 |
-
|
| 20 |
-
return True, cleaned_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|