tts

Sleeping

App Files Files Community

hadadrjt commited on Jan 21

Commit

cc372ac

1 Parent(s): dae9fa5

Pocket TTS: System stability tuning.

Browse files

Files changed (17) hide show

Dockerfile +11 -1
accelerator/CMakeLists.txt +36 -0
accelerator/include/accelerator_core.hpp +69 -0
accelerator/include/audio_processor.hpp +84 -0
accelerator/include/ipc_handler.hpp +107 -0
accelerator/include/memory_pool.hpp +79 -0
accelerator/include/thread_pool.hpp +83 -0
accelerator/src/accelerator_core.cpp +485 -0
accelerator/src/audio_processor.cpp +352 -0
accelerator/src/ipc_handler.cpp +226 -0
accelerator/src/main.cpp +79 -0
accelerator/src/memory_pool.cpp +216 -0
accelerator/src/thread_pool.cpp +84 -0
app.py +15 -5
config.py +7 -1
src/accelerator/client.py +442 -0
src/audio/converter.py +28 -0

Dockerfile CHANGED Viewed

@@ -7,4 +7,14 @@ FROM hadadrjt/pocket-tts:hf
 WORKDIR /app
-COPY . .

 WORKDIR /app
+COPY . .
+RUN mkdir build \
+    && cd build \
+    && cmake -DCMAKE_BUILD_TYPE=Release ../accelerator \
+    && make -j$(nproc) \
+    && mkdir -p "$PWD/../bin" \
+    && mv pocket_tts_accelerator "$PWD/../bin/" \
+    && chmod +x "$PWD/../bin/pocket_tts_accelerator" \
+    && cd .. \
+    && rm -rf accelerator

accelerator/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,36 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+cmake_minimum_required(VERSION 3.31.6)
+project(pocket_tts_accelerator VERSION 0.0.0 LANGUAGES CXX)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -ffast-math -funroll-loops")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
+find_package(Threads REQUIRED)
+set(ACCELERATOR_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
+set(ACCELERATOR_SOURCES
+    src/main.cpp
+    src/accelerator_core.cpp
+    src/audio_processor.cpp
+    src/ipc_handler.cpp
+    src/memory_pool.cpp
+    src/thread_pool.cpp
+)
+add_executable(pocket_tts_accelerator ${ACCELERATOR_SOURCES})
+target_include_directories(pocket_tts_accelerator PRIVATE ${ACCELERATOR_INCLUDE_DIR})
+target_link_libraries(pocket_tts_accelerator PRIVATE Threads::Threads)
+install(TARGETS pocket_tts_accelerator DESTINATION bin)

accelerator/include/accelerator_core.hpp ADDED Viewed

	@@ -0,0 +1,69 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#ifndef POCKET_TTS_ACCELERATOR_CORE_HPP
+#define POCKET_TTS_ACCELERATOR_CORE_HPP
+#include "audio_processor.hpp"
+#include "ipc_handler.hpp"
+#include "memory_pool.hpp"
+#include "thread_pool.hpp"
+#include <atomic>
+#include <memory>
+#include <string>
+namespace pocket_tts_accelerator {
+struct AcceleratorConfiguration {
+    std::size_t number_of_worker_threads;
+    std::size_t memory_pool_size_bytes;
+    std::string ipc_socket_path;
+    bool enable_verbose_logging;
+};
+class AcceleratorCore {
+public:
+    explicit AcceleratorCore(const AcceleratorConfiguration& configuration);
+    ~AcceleratorCore();
+    AcceleratorCore(const AcceleratorCore&) = delete;
+    AcceleratorCore& operator=(const AcceleratorCore&) = delete;
+    bool initialize();
+    void run();
+    void shutdown();
+    bool is_running() const;
+    std::string get_status_string() const;
+    static AcceleratorConfiguration get_default_configuration();
+private:
+    void register_all_command_handlers();
+    void setup_signal_handlers();
+    std::vector<std::uint8_t> handle_ping_command(const std::vector<std::uint8_t>& payload);
+    std::vector<std::uint8_t> handle_process_audio_command(const std::vector<std::uint8_t>& payload);
+    std::vector<std::uint8_t> handle_convert_to_mono_command(const std::vector<std::uint8_t>& payload);
+    std::vector<std::uint8_t> handle_convert_to_pcm_command(const std::vector<std::uint8_t>& payload);
+    std::vector<std::uint8_t> handle_resample_audio_command(const std::vector<std::uint8_t>& payload);
+    std::vector<std::uint8_t> handle_get_memory_stats_command(const std::vector<std::uint8_t>& payload);
+    std::vector<std::uint8_t> handle_clear_memory_pool_command(const std::vector<std::uint8_t>& payload);
+    std::vector<std::uint8_t> handle_shutdown_command(const std::vector<std::uint8_t>& payload);
+    void log_message(const std::string& message) const;
+    AcceleratorConfiguration config;
+    std::unique_ptr<MemoryPool> memory_pool;
+    std::unique_ptr<ThreadPool> thread_pool;
+    std::unique_ptr<AudioProcessor> audio_processor;
+    std::unique_ptr<IpcHandler> ipc_handler;
+    std::atomic<bool> is_initialized;
+    std::atomic<bool> should_shutdown;
+};
+}
+#endif

accelerator/include/audio_processor.hpp ADDED Viewed

	@@ -0,0 +1,84 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#ifndef POCKET_TTS_AUDIO_PROCESSOR_HPP
+#define POCKET_TTS_AUDIO_PROCESSOR_HPP
+#include "memory_pool.hpp"
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+namespace pocket_tts_accelerator {
+struct WavFileHeader {
+    char riff_marker[4];
+    std::uint32_t file_size;
+    char wave_marker[4];
+    char format_marker[4];
+    std::uint32_t format_chunk_size;
+    std::uint16_t audio_format;
+    std::uint16_t number_of_channels;
+    std::uint32_t sample_rate;
+    std::uint32_t byte_rate;
+    std::uint16_t block_align;
+    std::uint16_t bits_per_sample;
+    char data_marker[4];
+    std::uint32_t data_size;
+};
+struct AudioData {
+    std::vector<std::int16_t> samples;
+    std::uint32_t sample_rate;
+    std::uint16_t number_of_channels;
+    std::uint16_t bits_per_sample;
+    bool is_valid;
+    std::string error_message;
+};
+struct AudioProcessingResult {
+    std::vector<std::int16_t> processed_samples;
+    std::uint32_t output_sample_rate;
+    bool success;
+    std::string error_message;
+};
+class AudioProcessor {
+public:
+    explicit AudioProcessor(MemoryPool& shared_memory_pool);
+    ~AudioProcessor();
+    AudioProcessor(const AudioProcessor&) = delete;
+    AudioProcessor& operator=(const AudioProcessor&) = delete;
+    AudioData read_wav_file(const std::string& file_path);
+    bool write_wav_file(const std::string& file_path, const AudioData& audio_data);
+    AudioProcessingResult convert_to_mono(const AudioData& input_audio);
+    AudioProcessingResult convert_to_pcm_int16(const AudioData& input_audio);
+    AudioProcessingResult resample_audio(const AudioData& input_audio, std::uint32_t target_sample_rate);
+    AudioProcessingResult normalize_audio(const AudioData& input_audio, float target_peak_level);
+    AudioProcessingResult process_audio_for_voice_cloning(
+        const std::string& input_file_path,
+        const std::string& output_file_path
+    );
+    static bool validate_wav_header(const WavFileHeader& header);
+    static std::size_t calculate_audio_duration_milliseconds(const AudioData& audio_data);
+private:
+    void convert_float32_to_int16(const float* input, std::int16_t* output, std::size_t sample_count);
+    void convert_int32_to_int16(const std::int32_t* input, std::int16_t* output, std::size_t sample_count);
+    void convert_uint8_to_int16(const std::uint8_t* input, std::int16_t* output, std::size_t sample_count);
+    void mix_channels_to_mono(const std::int16_t* input, std::int16_t* output, std::size_t frame_count, std::uint16_t channel_count);
+    MemoryPool& memory_pool;
+};
+}
+#endif

accelerator/include/ipc_handler.hpp ADDED Viewed

	@@ -0,0 +1,107 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#ifndef POCKET_TTS_IPC_HANDLER_HPP
+#define POCKET_TTS_IPC_HANDLER_HPP
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <vector>
+namespace pocket_tts_accelerator {
+enum class CommandType : std::uint32_t {
+    PING = 0,
+    PROCESS_AUDIO = 1,
+    CONVERT_TO_MONO = 2,
+    CONVERT_TO_PCM = 3,
+    RESAMPLE_AUDIO = 4,
+    GET_MEMORY_STATS = 5,
+    CLEAR_MEMORY_POOL = 6,
+    SHUTDOWN = 7,
+    UNKNOWN = 255
+};
+enum class ResponseStatus : std::uint32_t {
+    SUCCESS = 0,
+    ERROR_INVALID_COMMAND = 1,
+    ERROR_FILE_NOT_FOUND = 2,
+    ERROR_PROCESSING_FAILED = 3,
+    ERROR_MEMORY_ALLOCATION = 4,
+    ERROR_INTERNAL = 5
+};
+struct RequestHeader {
+    std::uint32_t magic_number;
+    std::uint32_t command_type;
+    std::uint32_t payload_size;
+    std::uint32_t request_id;
+};
+struct ResponseHeader {
+    std::uint32_t magic_number;
+    std::uint32_t status_code;
+    std::uint32_t payload_size;
+    std::uint32_t request_id;
+};
+struct ProcessAudioRequest {
+    char input_file_path[512];
+    char output_file_path[512];
+    std::uint32_t target_sample_rate;
+    std::uint32_t options_flags;
+};
+struct MemoryStatsResponse {
+    std::uint64_t total_allocated_bytes;
+    std::uint64_t total_used_bytes;
+    std::uint64_t block_count;
+};
+class IpcHandler {
+public:
+    using CommandHandlerFunction = std::function<std::vector<std::uint8_t>(const std::vector<std::uint8_t>&)>;
+    explicit IpcHandler(const std::string& socket_path);
+    ~IpcHandler();
+    IpcHandler(const IpcHandler&) = delete;
+    IpcHandler& operator=(const IpcHandler&) = delete;
+    bool start_server();
+    void stop_server();
+    bool is_running() const;
+    void register_command_handler(CommandType command_type, CommandHandlerFunction handler);
+    void set_shutdown_callback(std::function<void()> callback);
+    static constexpr std::uint32_t PROTOCOL_MAGIC_NUMBER = 0x50545453;
+    static constexpr std::size_t MAXIMUM_PAYLOAD_SIZE = 16 * 1024 * 1024;
+    static constexpr int CONNECTION_BACKLOG = 5;
+private:
+    void accept_connections_loop();
+    void handle_client_connection(int client_socket_fd);
+    bool send_response(int socket_fd, const ResponseHeader& header, const std::vector<std::uint8_t>& payload);
+    bool receive_request(int socket_fd, RequestHeader& header, std::vector<std::uint8_t>& payload);
+    std::string socket_file_path;
+    int server_socket_fd;
+    std::atomic<bool> is_server_running;
+    std::thread accept_thread;
+    std::mutex handlers_mutex;
+    std::unordered_map<CommandType, CommandHandlerFunction> command_handlers;
+    std::function<void()> shutdown_callback;
+};
+}
+#endif

accelerator/include/memory_pool.hpp ADDED Viewed

	@@ -0,0 +1,79 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#ifndef POCKET_TTS_MEMORY_POOL_HPP
+#define POCKET_TTS_MEMORY_POOL_HPP
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <vector>
+namespace pocket_tts_accelerator {
+struct MemoryBlock {
+    std::unique_ptr<std::uint8_t[]> data;
+    std::size_t block_size;
+    bool is_in_use;
+    std::uint64_t last_access_timestamp;
+};
+class MemoryPool {
+public:
+    explicit MemoryPool(std::size_t initial_pool_size_bytes = 64 * 1024 * 1024);
+    ~MemoryPool();
+    MemoryPool(const MemoryPool&) = delete;
+    MemoryPool& operator=(const MemoryPool&) = delete;
+    MemoryPool(MemoryPool&&) = delete;
+    MemoryPool& operator=(MemoryPool&&) = delete;
+    std::uint8_t* allocate(std::size_t requested_size_bytes);
+    void deallocate(std::uint8_t* pointer);
+    void clear_unused_blocks();
+    void reset_pool();
+    std::size_t get_total_allocated_bytes() const;
+    std::size_t get_total_used_bytes() const;
+    std::size_t get_block_count() const;
+private:
+    std::size_t find_suitable_block_index(std::size_t requested_size) const;
+    void create_new_block(std::size_t block_size);
+    std::uint64_t get_current_timestamp() const;
+    std::vector<MemoryBlock> memory_blocks;
+    std::unordered_map<std::uint8_t*, std::size_t> pointer_to_block_index;
+    mutable std::mutex pool_mutex;
+    std::size_t total_allocated_bytes;
+    std::size_t total_used_bytes;
+    std::size_t maximum_pool_size_bytes;
+};
+class ScopedMemoryAllocation {
+public:
+    ScopedMemoryAllocation(MemoryPool& pool, std::size_t size);
+    ~ScopedMemoryAllocation();
+    ScopedMemoryAllocation(const ScopedMemoryAllocation&) = delete;
+    ScopedMemoryAllocation& operator=(const ScopedMemoryAllocation&) = delete;
+    ScopedMemoryAllocation(ScopedMemoryAllocation&& other) noexcept;
+    ScopedMemoryAllocation& operator=(ScopedMemoryAllocation&& other) noexcept;
+    std::uint8_t* get() const;
+    std::size_t size() const;
+private:
+    MemoryPool* memory_pool_pointer;
+    std::uint8_t* allocated_pointer;
+    std::size_t allocation_size;
+};
+}
+#endif

accelerator/include/thread_pool.hpp ADDED Viewed

	@@ -0,0 +1,83 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#ifndef POCKET_TTS_THREAD_POOL_HPP
+#define POCKET_TTS_THREAD_POOL_HPP
+#include <atomic>
+#include <condition_variable>
+#include <functional>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <vector>
+namespace pocket_tts_accelerator {
+class ThreadPool {
+public:
+    explicit ThreadPool(std::size_t number_of_threads);
+    ~ThreadPool();
+    ThreadPool(const ThreadPool&) = delete;
+    ThreadPool& operator=(const ThreadPool&) = delete;
+    ThreadPool(ThreadPool&&) = delete;
+    ThreadPool& operator=(ThreadPool&&) = delete;
+    template<typename FunctionType, typename... ArgumentTypes>
+    auto submit_task(FunctionType&& function, ArgumentTypes&&... arguments)
+        -> std::future<typename std::invoke_result<FunctionType, ArgumentTypes...>::type>;
+    void shutdown();
+    bool is_running() const;
+    std::size_t get_pending_task_count() const;
+    std::size_t get_thread_count() const;
+private:
+    void worker_thread_function();
+    std::vector<std::thread> worker_threads;
+    std::queue<std::function<void()>> task_queue;
+    mutable std::mutex queue_mutex;
+    std::condition_variable task_available_condition;
+    std::atomic<bool> should_stop;
+    std::atomic<bool> is_stopped;
+    std::size_t thread_count;
+};
+template<typename FunctionType, typename... ArgumentTypes>
+auto ThreadPool::submit_task(FunctionType&& function, ArgumentTypes&&... arguments)
+    -> std::future<typename std::invoke_result<FunctionType, ArgumentTypes...>::type> {
+    using ReturnType = typename std::invoke_result<FunctionType, ArgumentTypes...>::type;
+    auto packaged_task = std::make_shared<std::packaged_task<ReturnType()>>(
+        std::bind(std::forward<FunctionType>(function), std::forward<ArgumentTypes>(arguments)...)
+    );
+    std::future<ReturnType> result_future = packaged_task->get_future();
+    {
+        std::unique_lock<std::mutex> lock(queue_mutex);
+        if (should_stop.load()) {
+            throw std::runtime_error("Cannot submit task to stopped thread pool");
+        }
+        task_queue.emplace([packaged_task]() {
+            (*packaged_task)();
+        });
+    }
+    task_available_condition.notify_one();
+    return result_future;
+}
+}
+#endif

accelerator/src/accelerator_core.cpp ADDED Viewed

	@@ -0,0 +1,485 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "accelerator_core.hpp"
+#include <chrono>
+#include <cstring>
+#include <ctime>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <signal.h>
+namespace pocket_tts_accelerator {
+static AcceleratorCore* global_accelerator_instance = nullptr;
+static volatile sig_atomic_t last_received_signal = 0;
+static void signal_handler_function(int signal_number) {
+    last_received_signal = signal_number;
+    if (global_accelerator_instance != nullptr) {
+        global_accelerator_instance->shutdown();
+    }
+}
+AcceleratorCore::AcceleratorCore(const AcceleratorConfiguration& configuration)
+    : config(configuration)
+    , is_initialized(false)
+    , should_shutdown(false) {
+}
+AcceleratorCore::~AcceleratorCore() {
+    shutdown();
+}
+bool AcceleratorCore::initialize() {
+    if (is_initialized.load()) {
+        return true;
+    }
+    log_message("Initializing Pocket TTS Accelerator...");
+    memory_pool = std::make_unique<MemoryPool>(config.memory_pool_size_bytes);
+    log_message("Memory pool initialized with " + std::to_string(config.memory_pool_size_bytes / (1024 * 1024)) + " MB");
+    thread_pool = std::make_unique<ThreadPool>(config.number_of_worker_threads);
+    log_message("Thread pool initialized with " + std::to_string(config.number_of_worker_threads) + " worker threads");
+    audio_processor = std::make_unique<AudioProcessor>(*memory_pool);
+    log_message("Audio processor initialized");
+    ipc_handler = std::make_unique<IpcHandler>(config.ipc_socket_path);
+    log_message("IPC handler created for socket: " + config.ipc_socket_path);
+    register_all_command_handlers();
+    ipc_handler->set_shutdown_callback([this]() {
+        this->shutdown();
+    });
+    if (!ipc_handler->start_server()) {
+        log_message("ERROR: Failed to start IPC server");
+        return false;
+    }
+    log_message("IPC server started successfully");
+    global_accelerator_instance = this;
+    setup_signal_handlers();
+    is_initialized.store(true);
+    log_message("Pocket TTS Accelerator initialized successfully");
+    return true;
+}
+void AcceleratorCore::run() {
+    if (!is_initialized.load()) {
+        log_message("ERROR: Accelerator not initialized");
+        return;
+    }
+    log_message("Accelerator running and waiting for commands...");
+    while (!should_shutdown.load()) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        if (last_received_signal != 0) {
+            log_message("Received signal: " + std::to_string(last_received_signal));
+            last_received_signal = 0;
+        }
+    }
+    log_message("Accelerator main loop exited");
+}
+void AcceleratorCore::shutdown() {
+    if (should_shutdown.exchange(true)) {
+        return;
+    }
+    log_message("Shutting down Pocket TTS Accelerator...");
+    if (ipc_handler) {
+        ipc_handler->stop_server();
+        log_message("IPC server stopped");
+    }
+    if (thread_pool) {
+        thread_pool->shutdown();
+        log_message("Thread pool shut down");
+    }
+    if (memory_pool) {
+        memory_pool->reset_pool();
+        log_message("Memory pool reset");
+    }
+    is_initialized.store(false);
+    log_message("Pocket TTS Accelerator shut down complete");
+}
+bool AcceleratorCore::is_running() const {
+    return is_initialized.load() && !should_shutdown.load();
+}
+std::string AcceleratorCore::get_status_string() const {
+    if (!is_initialized.load()) {
+        return "Not initialized";
+    }
+    if (should_shutdown.load()) {
+        return "Shutting down";
+    }
+    return "Running";
+}
+AcceleratorConfiguration AcceleratorCore::get_default_configuration() {
+    AcceleratorConfiguration default_config;
+    default_config.number_of_worker_threads = 2;
+    default_config.memory_pool_size_bytes = 64 * 1024 * 1024;
+    default_config.ipc_socket_path = "/tmp/pocket_tts_accelerator.sock";
+    default_config.enable_verbose_logging = true;
+    return default_config;
+}
+void AcceleratorCore::register_all_command_handlers() {
+    ipc_handler->register_command_handler(
+        CommandType::PING,
+        [this](const std::vector<std::uint8_t>& payload) {
+            return this->handle_ping_command(payload);
+        }
+    );
+    ipc_handler->register_command_handler(
+        CommandType::PROCESS_AUDIO,
+        [this](const std::vector<std::uint8_t>& payload) {
+            return this->handle_process_audio_command(payload);
+        }
+    );
+    ipc_handler->register_command_handler(
+        CommandType::CONVERT_TO_MONO,
+        [this](const std::vector<std::uint8_t>& payload) {
+            return this->handle_convert_to_mono_command(payload);
+        }
+    );
+    ipc_handler->register_command_handler(
+        CommandType::CONVERT_TO_PCM,
+        [this](const std::vector<std::uint8_t>& payload) {
+            return this->handle_convert_to_pcm_command(payload);
+        }
+    );
+    ipc_handler->register_command_handler(
+        CommandType::RESAMPLE_AUDIO,
+        [this](const std::vector<std::uint8_t>& payload) {
+            return this->handle_resample_audio_command(payload);
+        }
+    );
+    ipc_handler->register_command_handler(
+        CommandType::GET_MEMORY_STATS,
+        [this](const std::vector<std::uint8_t>& payload) {
+            return this->handle_get_memory_stats_command(payload);
+        }
+    );
+    ipc_handler->register_command_handler(
+        CommandType::CLEAR_MEMORY_POOL,
+        [this](const std::vector<std::uint8_t>& payload) {
+            return this->handle_clear_memory_pool_command(payload);
+        }
+    );
+    log_message("All command handlers registered");
+}
+void AcceleratorCore::setup_signal_handlers() {
+    signal(SIGINT, signal_handler_function);
+    signal(SIGTERM, signal_handler_function);
+}
+std::vector<std::uint8_t> AcceleratorCore::handle_ping_command(const std::vector<std::uint8_t>& payload) {
+    std::string payload_content;
+    if (!payload.empty()) {
+        payload_content = std::string(payload.begin(), payload.end());
+        log_message("Received PING command with payload: " + payload_content);
+    } else {
+        log_message("Received PING command");
+    }
+    std::string response_message = "PONG";
+    if (!payload_content.empty()) {
+        response_message += ":" + payload_content;
+    }
+    return std::vector<std::uint8_t>(response_message.begin(), response_message.end());
+}
+std::vector<std::uint8_t> AcceleratorCore::handle_process_audio_command(const std::vector<std::uint8_t>& payload) {
+    log_message("Received PROCESS_AUDIO command with payload size: " + std::to_string(payload.size()) + " bytes");
+    if (payload.size() < sizeof(ProcessAudioRequest)) {
+        std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    ProcessAudioRequest request;
+    std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
+    std::string input_path(request.input_file_path);
+    std::string output_path(request.output_file_path);
+    log_message("Processing audio from: " + input_path + " to: " + output_path);
+    auto future_result = thread_pool->submit_task([this, input_path, output_path]() {
+        return this->audio_processor->process_audio_for_voice_cloning(input_path, output_path);
+    });
+    AudioProcessingResult result = future_result.get();
+    if (result.success) {
+        log_message("Audio processing completed successfully");
+        std::string success_message = "SUCCESS:" + output_path;
+        return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
+    } else {
+        log_message("Audio processing failed: " + result.error_message);
+        std::string error_message = "ERROR:" + result.error_message;
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+}
+std::vector<std::uint8_t> AcceleratorCore::handle_convert_to_mono_command(const std::vector<std::uint8_t>& payload) {
+    log_message("Received CONVERT_TO_MONO command with payload size: " + std::to_string(payload.size()) + " bytes");
+    if (payload.size() < sizeof(ProcessAudioRequest)) {
+        std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    ProcessAudioRequest request;
+    std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
+    std::string input_path(request.input_file_path);
+    std::string output_path(request.output_file_path);
+    log_message("Converting to mono from: " + input_path + " to: " + output_path);
+    AudioData audio_data = audio_processor->read_wav_file(input_path);
+    if (!audio_data.is_valid) {
+        std::string error_message = "ERROR:" + audio_data.error_message;
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    AudioProcessingResult result = audio_processor->convert_to_mono(audio_data);
+    if (!result.success) {
+        std::string error_message = "ERROR:" + result.error_message;
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    AudioData output_audio;
+    output_audio.samples = std::move(result.processed_samples);
+    output_audio.sample_rate = result.output_sample_rate;
+    output_audio.number_of_channels = 1;
+    output_audio.bits_per_sample = 16;
+    output_audio.is_valid = true;
+    if (!audio_processor->write_wav_file(output_path, output_audio)) {
+        std::string error_message = "ERROR:Failed to write output file";
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    log_message("Mono conversion completed successfully");
+    std::string success_message = "SUCCESS:" + output_path;
+    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
+}
+std::vector<std::uint8_t> AcceleratorCore::handle_convert_to_pcm_command(const std::vector<std::uint8_t>& payload) {
+    log_message("Received CONVERT_TO_PCM command with payload size: " + std::to_string(payload.size()) + " bytes");
+    if (payload.size() < sizeof(ProcessAudioRequest)) {
+        std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    ProcessAudioRequest request;
+    std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
+    std::string input_path(request.input_file_path);
+    std::string output_path(request.output_file_path);
+    log_message("Converting to PCM from: " + input_path + " to: " + output_path);
+    AudioData audio_data = audio_processor->read_wav_file(input_path);
+    if (!audio_data.is_valid) {
+        std::string error_message = "ERROR:" + audio_data.error_message;
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    AudioData mono_audio;
+    if (audio_data.number_of_channels > 1) {
+        log_message("Input has " + std::to_string(audio_data.number_of_channels) + " channels, converting to mono");
+        AudioProcessingResult mono_result = audio_processor->convert_to_mono(audio_data);
+        if (!mono_result.success) {
+            std::string error_message = "ERROR:" + mono_result.error_message;
+            return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+        }
+        mono_audio.samples = std::move(mono_result.processed_samples);
+        mono_audio.sample_rate = mono_result.output_sample_rate;
+    } else {
+        mono_audio.samples = std::move(audio_data.samples);
+        mono_audio.sample_rate = audio_data.sample_rate;
+    }
+    mono_audio.number_of_channels = 1;
+    mono_audio.bits_per_sample = 16;
+    mono_audio.is_valid = true;
+    if (!audio_processor->write_wav_file(output_path, mono_audio)) {
+        std::string error_message = "ERROR:Failed to write output file";
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    log_message("PCM conversion completed successfully");
+    std::string success_message = "SUCCESS:" + output_path;
+    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
+}
+std::vector<std::uint8_t> AcceleratorCore::handle_resample_audio_command(const std::vector<std::uint8_t>& payload) {
+    log_message("Received RESAMPLE_AUDIO command with payload size: " + std::to_string(payload.size()) + " bytes");
+    if (payload.size() < sizeof(ProcessAudioRequest)) {
+        std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    ProcessAudioRequest request;
+    std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
+    std::string input_path(request.input_file_path);
+    std::string output_path(request.output_file_path);
+    std::uint32_t target_sample_rate = request.target_sample_rate;
+    log_message("Resampling audio from: " + input_path + " to: " + output_path + " at " + std::to_string(target_sample_rate) + " Hz");
+    AudioData audio_data = audio_processor->read_wav_file(input_path);
+    if (!audio_data.is_valid) {
+        std::string error_message = "ERROR:" + audio_data.error_message;
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    AudioProcessingResult result = audio_processor->resample_audio(audio_data, target_sample_rate);
+    if (!result.success) {
+        std::string error_message = "ERROR:" + result.error_message;
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    AudioData output_audio;
+    output_audio.samples = std::move(result.processed_samples);
+    output_audio.sample_rate = result.output_sample_rate;
+    output_audio.number_of_channels = audio_data.number_of_channels;
+    output_audio.bits_per_sample = 16;
+    output_audio.is_valid = true;
+    if (!audio_processor->write_wav_file(output_path, output_audio)) {
+        std::string error_message = "ERROR:Failed to write output file";
+        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
+    }
+    log_message("Resampling completed successfully");
+    std::string success_message = "SUCCESS:" + output_path;
+    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
+}
+std::vector<std::uint8_t> AcceleratorCore::handle_get_memory_stats_command(const std::vector<std::uint8_t>& payload) {
+    log_message("Received GET_MEMORY_STATS command with payload size: " + std::to_string(payload.size()) + " bytes");
+    MemoryStatsResponse stats;
+    stats.total_allocated_bytes = memory_pool->get_total_allocated_bytes();
+    stats.total_used_bytes = memory_pool->get_total_used_bytes();
+    stats.block_count = memory_pool->get_block_count();
+    log_message("Memory stats - Allocated: " + std::to_string(stats.total_allocated_bytes) +
+                " bytes, Used: " + std::to_string(stats.total_used_bytes) +
+                " bytes, Blocks: " + std::to_string(stats.block_count));
+    std::vector<std::uint8_t> response(sizeof(MemoryStatsResponse));
+    std::memcpy(response.data(), &stats, sizeof(MemoryStatsResponse));
+    return response;
+}
+std::vector<std::uint8_t> AcceleratorCore::handle_clear_memory_pool_command(const std::vector<std::uint8_t>& payload) {
+    log_message("Received CLEAR_MEMORY_POOL command with payload size: " + std::to_string(payload.size()) + " bytes");
+    std::size_t blocks_before = memory_pool->get_block_count();
+    std::size_t allocated_before = memory_pool->get_total_allocated_bytes();
+    memory_pool->clear_unused_blocks();
+    std::size_t blocks_after = memory_pool->get_block_count();
+    std::size_t allocated_after = memory_pool->get_total_allocated_bytes();
+    std::size_t blocks_freed = blocks_before - blocks_after;
+    std::size_t bytes_freed = allocated_before - allocated_after;
+    log_message("Memory pool cleared - Freed " + std::to_string(blocks_freed) +
+                " blocks (" + std::to_string(bytes_freed) + " bytes)");
+    std::string success_message = "SUCCESS:Freed " + std::to_string(blocks_freed) +
+                                  " blocks (" + std::to_string(bytes_freed) + " bytes)";
+    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
+}
+std::vector<std::uint8_t> AcceleratorCore::handle_shutdown_command(const std::vector<std::uint8_t>& payload) {
+    std::string shutdown_reason;
+    if (!payload.empty()) {
+        shutdown_reason = std::string(payload.begin(), payload.end());
+        log_message("Received SHUTDOWN command with reason: " + shutdown_reason);
+    } else {
+        log_message("Received SHUTDOWN command");
+    }
+    shutdown();
+    std::string success_message = "SUCCESS:Shutting down";
+    if (!shutdown_reason.empty()) {
+        success_message += " (reason: " + shutdown_reason + ")";
+    }
+    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
+}
+void AcceleratorCore::log_message(const std::string& message) const {
+    if (config.enable_verbose_logging) {
+        auto now = std::chrono::system_clock::now();
+        std::time_t time_t_now = std::chrono::system_clock::to_time_t(now);
+        auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(
+            now.time_since_epoch()
+        ) % 1000;
+        std::tm time_info;
+        localtime_r(&time_t_now, &time_info);
+        std::ostringstream timestamp_stream;
+        timestamp_stream << std::put_time(&time_info, "%Y-%m-%d %H:%M:%S");
+        timestamp_stream << '.' << std::setfill('0') << std::setw(3) << milliseconds.count();
+        std::cout << "[" << timestamp_stream.str() << "] [ACCELERATOR] " << message << std::endl;
+    }
+}
+}

accelerator/src/audio_processor.cpp ADDED Viewed

	@@ -0,0 +1,352 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "audio_processor.hpp"
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <fstream>
+namespace pocket_tts_accelerator {
+AudioProcessor::AudioProcessor(MemoryPool& shared_memory_pool)
+    : memory_pool(shared_memory_pool) {
+}
+AudioProcessor::~AudioProcessor() {
+}
+AudioData AudioProcessor::read_wav_file(const std::string& file_path) {
+    AudioData result;
+    result.is_valid = false;
+    std::ifstream file_stream(file_path, std::ios::binary);
+    if (!file_stream.is_open()) {
+        result.error_message = "Failed to open file: " + file_path;
+        return result;
+    }
+    WavFileHeader header;
+    file_stream.read(reinterpret_cast<char*>(&header), sizeof(WavFileHeader));
+    if (file_stream.gcount() < static_cast<std::streamsize>(sizeof(WavFileHeader))) {
+        result.error_message = "File is too small to be a valid WAV file";
+        return result;
+    }
+    if (!validate_wav_header(header)) {
+        result.error_message = "Invalid WAV file header";
+        return result;
+    }
+    result.sample_rate = header.sample_rate;
+    result.number_of_channels = header.number_of_channels;
+    result.bits_per_sample = header.bits_per_sample;
+    std::size_t sample_count = header.data_size / (header.bits_per_sample / 8);
+    result.samples.resize(sample_count);
+    if (header.bits_per_sample == 16) {
+        file_stream.read(reinterpret_cast<char*>(result.samples.data()), header.data_size);
+    } else if (header.bits_per_sample == 8) {
+        std::vector<std::uint8_t> raw_data(sample_count);
+        file_stream.read(reinterpret_cast<char*>(raw_data.data()), header.data_size);
+        convert_uint8_to_int16(raw_data.data(), result.samples.data(), sample_count);
+    } else if (header.bits_per_sample == 32) {
+        if (header.audio_format == 3) {
+            std::vector<float> raw_data(sample_count);
+            file_stream.read(reinterpret_cast<char*>(raw_data.data()), header.data_size);
+            convert_float32_to_int16(raw_data.data(), result.samples.data(), sample_count);
+        } else {
+            std::vector<std::int32_t> raw_data(sample_count);
+            file_stream.read(reinterpret_cast<char*>(raw_data.data()), header.data_size);
+            convert_int32_to_int16(raw_data.data(), result.samples.data(), sample_count);
+        }
+    }
+    result.is_valid = true;
+    return result;
+}
+bool AudioProcessor::write_wav_file(const std::string& file_path, const AudioData& audio_data) {
+    std::ofstream file_stream(file_path, std::ios::binary);
+    if (!file_stream.is_open()) {
+        return false;
+    }
+    std::uint32_t data_size = static_cast<std::uint32_t>(audio_data.samples.size() * sizeof(std::int16_t));
+    std::uint32_t file_size = data_size + 36;
+    WavFileHeader header;
+    std::memcpy(header.riff_marker, "RIFF", 4);
+    header.file_size = file_size;
+    std::memcpy(header.wave_marker, "WAVE", 4);
+    std::memcpy(header.format_marker, "fmt ", 4);
+    header.format_chunk_size = 16;
+    header.audio_format = 1;
+    header.number_of_channels = audio_data.number_of_channels;
+    header.sample_rate = audio_data.sample_rate;
+    header.bits_per_sample = 16;
+    header.byte_rate = audio_data.sample_rate * audio_data.number_of_channels * 2;
+    header.block_align = audio_data.number_of_channels * 2;
+    std::memcpy(header.data_marker, "data", 4);
+    header.data_size = data_size;
+    file_stream.write(reinterpret_cast<const char*>(&header), sizeof(WavFileHeader));
+    file_stream.write(reinterpret_cast<const char*>(audio_data.samples.data()), data_size);
+    return file_stream.good();
+}
+AudioProcessingResult AudioProcessor::convert_to_mono(const AudioData& input_audio) {
+    AudioProcessingResult result;
+    result.success = false;
+    if (!input_audio.is_valid) {
+        result.error_message = "Invalid input audio";
+        return result;
+    }
+    if (input_audio.number_of_channels == 1) {
+        result.processed_samples = input_audio.samples;
+        result.output_sample_rate = input_audio.sample_rate;
+        result.success = true;
+        return result;
+    }
+    std::size_t frame_count = input_audio.samples.size() / input_audio.number_of_channels;
+    result.processed_samples.resize(frame_count);
+    mix_channels_to_mono(
+        input_audio.samples.data(),
+        result.processed_samples.data(),
+        frame_count,
+        input_audio.number_of_channels
+    );
+    result.output_sample_rate = input_audio.sample_rate;
+    result.success = true;
+    return result;
+}
+AudioProcessingResult AudioProcessor::convert_to_pcm_int16(const AudioData& input_audio) {
+    AudioProcessingResult result;
+    result.success = false;
+    if (!input_audio.is_valid) {
+        result.error_message = "Invalid input audio";
+        return result;
+    }
+    result.processed_samples = input_audio.samples;
+    result.output_sample_rate = input_audio.sample_rate;
+    result.success = true;
+    return result;
+}
+AudioProcessingResult AudioProcessor::resample_audio(const AudioData& input_audio, std::uint32_t target_sample_rate) {
+    AudioProcessingResult result;
+    result.success = false;
+    if (!input_audio.is_valid) {
+        result.error_message = "Invalid input audio";
+        return result;
+    }
+    if (input_audio.sample_rate == target_sample_rate) {
+        result.processed_samples = input_audio.samples;
+        result.output_sample_rate = target_sample_rate;
+        result.success = true;
+        return result;
+    }
+    double ratio = static_cast<double>(target_sample_rate) / static_cast<double>(input_audio.sample_rate);
+    std::size_t output_sample_count = static_cast<std::size_t>(input_audio.samples.size() * ratio);
+    result.processed_samples.resize(output_sample_count);
+    for (std::size_t output_index = 0; output_index < output_sample_count; ++output_index) {
+        double source_position = output_index / ratio;
+        std::size_t source_index_floor = static_cast<std::size_t>(source_position);
+        std::size_t source_index_ceil = source_index_floor + 1;
+        double fractional_part = source_position - source_index_floor;
+        if (source_index_ceil >= input_audio.samples.size()) {
+            source_index_ceil = input_audio.samples.size() - 1;
+        }
+        double interpolated_value =
+            input_audio.samples[source_index_floor] * (1.0 - fractional_part) +
+            input_audio.samples[source_index_ceil] * fractional_part;
+        result.processed_samples[output_index] = static_cast<std::int16_t>(
+            std::clamp(interpolated_value, -32768.0, 32767.0)
+        );
+    }
+    result.output_sample_rate = target_sample_rate;
+    result.success = true;
+    return result;
+}
+AudioProcessingResult AudioProcessor::normalize_audio(const AudioData& input_audio, float target_peak_level) {
+    AudioProcessingResult result;
+    result.success = false;
+    if (!input_audio.is_valid) {
+        result.error_message = "Invalid input audio";
+        return result;
+    }
+    std::int16_t max_absolute_value = 0;
+    for (const std::int16_t sample : input_audio.samples) {
+        std::int16_t absolute_value = static_cast<std::int16_t>(std::abs(sample));
+        if (absolute_value > max_absolute_value) {
+            max_absolute_value = absolute_value;
+        }
+    }
+    if (max_absolute_value == 0) {
+        result.processed_samples = input_audio.samples;
+        result.output_sample_rate = input_audio.sample_rate;
+        result.success = true;
+        return result;
+    }
+    float normalization_factor = (target_peak_level * 32767.0f) / static_cast<float>(max_absolute_value);
+    result.processed_samples.resize(input_audio.samples.size());
+    for (std::size_t index = 0; index < input_audio.samples.size(); ++index) {
+        float normalized_sample = static_cast<float>(input_audio.samples[index]) * normalization_factor;
+        result.processed_samples[index] = static_cast<std::int16_t>(
+            std::clamp(normalized_sample, -32768.0f, 32767.0f)
+        );
+    }
+    result.output_sample_rate = input_audio.sample_rate;
+    result.success = true;
+    return result;
+}
+AudioProcessingResult AudioProcessor::process_audio_for_voice_cloning(
+    const std::string& input_file_path,
+    const std::string& output_file_path
+) {
+    AudioProcessingResult result;
+    result.success = false;
+    AudioData input_audio = read_wav_file(input_file_path);
+    if (!input_audio.is_valid) {
+        result.error_message = "Failed to read input file: " + input_audio.error_message;
+        return result;
+    }
+    AudioProcessingResult mono_result = convert_to_mono(input_audio);
+    if (!mono_result.success) {
+        result.error_message = "Failed to convert to mono: " + mono_result.error_message;
+        return result;
+    }
+    AudioData mono_audio;
+    mono_audio.samples = std::move(mono_result.processed_samples);
+    mono_audio.sample_rate = mono_result.output_sample_rate;
+    mono_audio.number_of_channels = 1;
+    mono_audio.bits_per_sample = 16;
+    mono_audio.is_valid = true;
+    if (!write_wav_file(output_file_path, mono_audio)) {
+        result.error_message = "Failed to write output file";
+        return result;
+    }
+    result.processed_samples = std::move(mono_audio.samples);
+    result.output_sample_rate = mono_audio.sample_rate;
+    result.success = true;
+    return result;
+}
+bool AudioProcessor::validate_wav_header(const WavFileHeader& header) {
+    if (std::memcmp(header.riff_marker, "RIFF", 4) != 0) {
+        return false;
+    }
+    if (std::memcmp(header.wave_marker, "WAVE", 4) != 0) {
+        return false;
+    }
+    if (std::memcmp(header.format_marker, "fmt ", 4) != 0) {
+        return false;
+    }
+    if (header.audio_format != 1 && header.audio_format != 3) {
+        return false;
+    }
+    if (header.number_of_channels < 1 || header.number_of_channels > 16) {
+        return false;
+    }
+    if (header.sample_rate < 100 || header.sample_rate > 384000) {
+        return false;
+    }
+    if (header.bits_per_sample != 8 && header.bits_per_sample != 16 && header.bits_per_sample != 32) {
+        return false;
+    }
+    return true;
+}
+std::size_t AudioProcessor::calculate_audio_duration_milliseconds(const AudioData& audio_data) {
+    if (!audio_data.is_valid || audio_data.sample_rate == 0) {
+        return 0;
+    }
+    std::size_t frame_count = audio_data.samples.size() / audio_data.number_of_channels;
+    return (frame_count * 1000) / audio_data.sample_rate;
+}
+void AudioProcessor::convert_float32_to_int16(const float* input, std::int16_t* output, std::size_t sample_count) {
+    for (std::size_t index = 0; index < sample_count; ++index) {
+        float clamped_value = std::clamp(input[index], -1.0f, 1.0f);
+        output[index] = static_cast<std::int16_t>(clamped_value * 32767.0f);
+    }
+}
+void AudioProcessor::convert_int32_to_int16(const std::int32_t* input, std::int16_t* output, std::size_t sample_count) {
+    for (std::size_t index = 0; index < sample_count; ++index) {
+        output[index] = static_cast<std::int16_t>(input[index] >> 16);
+    }
+}
+void AudioProcessor::convert_uint8_to_int16(const std::uint8_t* input, std::int16_t* output, std::size_t sample_count) {
+    for (std::size_t index = 0; index < sample_count; ++index) {
+        output[index] = static_cast<std::int16_t>((static_cast<std::int16_t>(input[index]) - 128) * 256);
+    }
+}
+void AudioProcessor::mix_channels_to_mono(
+    const std::int16_t* input,
+    std::int16_t* output,
+    std::size_t frame_count,
+    std::uint16_t channel_count
+) {
+    for (std::size_t frame_index = 0; frame_index < frame_count; ++frame_index) {
+        std::int32_t sum = 0;
+        for (std::uint16_t channel_index = 0; channel_index < channel_count; ++channel_index) {
+            sum += input[frame_index * channel_count + channel_index];
+        }
+        output[frame_index] = static_cast<std::int16_t>(sum / channel_count);
+    }
+}
+}

accelerator/src/ipc_handler.cpp ADDED Viewed

	@@ -0,0 +1,226 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "ipc_handler.hpp"
+#include <cstring>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+namespace pocket_tts_accelerator {
+IpcHandler::IpcHandler(const std::string& socket_path)
+    : socket_file_path(socket_path)
+    , server_socket_fd(-1)
+    , is_server_running(false) {
+}
+IpcHandler::~IpcHandler() {
+    stop_server();
+}
+bool IpcHandler::start_server() {
+    if (is_server_running.load()) {
+        return true;
+    }
+    unlink(socket_file_path.c_str());
+    server_socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (server_socket_fd < 0) {
+        return false;
+    }
+    struct sockaddr_un server_address;
+    std::memset(&server_address, 0, sizeof(server_address));
+    server_address.sun_family = AF_UNIX;
+    std::strncpy(server_address.sun_path, socket_file_path.c_str(), sizeof(server_address.sun_path) - 1);
+    if (bind(server_socket_fd, reinterpret_cast<struct sockaddr*>(&server_address), sizeof(server_address)) < 0) {
+        close(server_socket_fd);
+        server_socket_fd = -1;
+        return false;
+    }
+    if (listen(server_socket_fd, CONNECTION_BACKLOG) < 0) {
+        close(server_socket_fd);
+        server_socket_fd = -1;
+        return false;
+    }
+    is_server_running.store(true);
+    accept_thread = std::thread(&IpcHandler::accept_connections_loop, this);
+    return true;
+}
+void IpcHandler::stop_server() {
+    if (!is_server_running.load()) {
+        return;
+    }
+    is_server_running.store(false);
+    if (server_socket_fd >= 0) {
+        shutdown(server_socket_fd, SHUT_RDWR);
+        close(server_socket_fd);
+        server_socket_fd = -1;
+    }
+    if (accept_thread.joinable()) {
+        accept_thread.join();
+    }
+    unlink(socket_file_path.c_str());
+}
+bool IpcHandler::is_running() const {
+    return is_server_running.load();
+}
+void IpcHandler::register_command_handler(CommandType command_type, CommandHandlerFunction handler) {
+    std::unique_lock<std::mutex> lock(handlers_mutex);
+    command_handlers[command_type] = std::move(handler);
+}
+void IpcHandler::set_shutdown_callback(std::function<void()> callback) {
+    shutdown_callback = std::move(callback);
+}
+void IpcHandler::accept_connections_loop() {
+    while (is_server_running.load()) {
+        struct sockaddr_un client_address;
+        socklen_t client_address_length = sizeof(client_address);
+        int client_socket_fd = accept(
+            server_socket_fd,
+            reinterpret_cast<struct sockaddr*>(&client_address),
+            &client_address_length
+        );
+        if (client_socket_fd < 0) {
+            if (!is_server_running.load()) {
+                break;
+            }
+            continue;
+        }
+        handle_client_connection(client_socket_fd);
+        close(client_socket_fd);
+    }
+}
+void IpcHandler::handle_client_connection(int client_socket_fd) {
+    RequestHeader request_header;
+    std::vector<std::uint8_t> request_payload;
+    if (!receive_request(client_socket_fd, request_header, request_payload)) {
+        return;
+    }
+    if (request_header.magic_number != PROTOCOL_MAGIC_NUMBER) {
+        ResponseHeader error_response;
+        error_response.magic_number = PROTOCOL_MAGIC_NUMBER;
+        error_response.status_code = static_cast<std::uint32_t>(ResponseStatus::ERROR_INVALID_COMMAND);
+        error_response.payload_size = 0;
+        error_response.request_id = request_header.request_id;
+        send_response(client_socket_fd, error_response, {});
+        return;
+    }
+    CommandType command_type = static_cast<CommandType>(request_header.command_type);
+    if (command_type == CommandType::SHUTDOWN) {
+        ResponseHeader shutdown_response;
+        shutdown_response.magic_number = PROTOCOL_MAGIC_NUMBER;
+        shutdown_response.status_code = static_cast<std::uint32_t>(ResponseStatus::SUCCESS);
+        shutdown_response.payload_size = 0;
+        shutdown_response.request_id = request_header.request_id;
+        send_response(client_socket_fd, shutdown_response, {});
+        if (shutdown_callback) {
+            shutdown_callback();
+        }
+        return;
+    }
+    std::vector<std::uint8_t> response_payload;
+    ResponseStatus status = ResponseStatus::SUCCESS;
+    {
+        std::unique_lock<std::mutex> lock(handlers_mutex);
+        auto handler_iterator = command_handlers.find(command_type);
+        if (handler_iterator != command_handlers.end()) {
+            try {
+                response_payload = handler_iterator->second(request_payload);
+            } catch (...) {
+                status = ResponseStatus::ERROR_INTERNAL;
+            }
+        } else {
+            status = ResponseStatus::ERROR_INVALID_COMMAND;
+        }
+    }
+    ResponseHeader response_header;
+    response_header.magic_number = PROTOCOL_MAGIC_NUMBER;
+    response_header.status_code = static_cast<std::uint32_t>(status);
+    response_header.payload_size = static_cast<std::uint32_t>(response_payload.size());
+    response_header.request_id = request_header.request_id;
+    send_response(client_socket_fd, response_header, response_payload);
+}
+bool IpcHandler::send_response(
+    int socket_fd,
+    const ResponseHeader& header,
+    const std::vector<std::uint8_t>& payload
+) {
+    ssize_t bytes_written = write(socket_fd, &header, sizeof(ResponseHeader));
+    if (bytes_written != sizeof(ResponseHeader)) {
+        return false;
+    }
+    if (!payload.empty()) {
+        bytes_written = write(socket_fd, payload.data(), payload.size());
+        if (bytes_written != static_cast<ssize_t>(payload.size())) {
+            return false;
+        }
+    }
+    return true;
+}
+bool IpcHandler::receive_request(
+    int socket_fd,
+    RequestHeader& header,
+    std::vector<std::uint8_t>& payload
+) {
+    ssize_t bytes_read = read(socket_fd, &header, sizeof(RequestHeader));
+    if (bytes_read != sizeof(RequestHeader)) {
+        return false;
+    }
+    if (header.payload_size > MAXIMUM_PAYLOAD_SIZE) {
+        return false;
+    }
+    if (header.payload_size > 0) {
+        payload.resize(header.payload_size);
+        bytes_read = read(socket_fd, payload.data(), header.payload_size);
+        if (bytes_read != static_cast<ssize_t>(header.payload_size)) {
+            return false;
+        }
+    }
+    return true;
+}
+}

accelerator/src/main.cpp ADDED Viewed

	@@ -0,0 +1,79 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "accelerator_core.hpp"
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+void print_usage(const char* program_name) {
+    std::cout << "Usage: " << program_name << " [options]" << std::endl;
+    std::cout << std::endl;
+    std::cout << "Options:" << std::endl;
+    std::cout << "  --socket PATH     IPC socket path (default: /tmp/pocket_tts_accelerator.sock)" << std::endl;
+    std::cout << "  --threads N       Number of worker threads (default: 2)" << std::endl;
+    std::cout << "  --memory MB       Memory pool size in megabytes (default: 64)" << std::endl;
+    std::cout << "  --quiet           Disable verbose logging" << std::endl;
+    std::cout << "  --help            Show this help message" << std::endl;
+}
+int main(int argc, char* argv[]) {
+    pocket_tts_accelerator::AcceleratorConfiguration configuration =
+        pocket_tts_accelerator::AcceleratorCore::get_default_configuration();
+    for (int argument_index = 1; argument_index < argc; ++argument_index) {
+        std::string argument(argv[argument_index]);
+        if (argument == "--help" || argument == "-h") {
+            print_usage(argv[0]);
+            return 0;
+        }
+        if (argument == "--socket" && argument_index + 1 < argc) {
+            configuration.ipc_socket_path = argv[++argument_index];
+            continue;
+        }
+        if (argument == "--threads" && argument_index + 1 < argc) {
+            configuration.number_of_worker_threads = std::stoul(argv[++argument_index]);
+            continue;
+        }
+        if (argument == "--memory" && argument_index + 1 < argc) {
+            std::size_t memory_mb = std::stoul(argv[++argument_index]);
+            configuration.memory_pool_size_bytes = memory_mb * 1024 * 1024;
+            continue;
+        }
+        if (argument == "--quiet" || argument == "-q") {
+            configuration.enable_verbose_logging = false;
+            continue;
+        }
+        std::cerr << "Unknown argument: " << argument << std::endl;
+        print_usage(argv[0]);
+        return 1;
+    }
+    if (configuration.number_of_worker_threads < 1) {
+        configuration.number_of_worker_threads = 1;
+    }
+    if (configuration.number_of_worker_threads > 2) {
+        configuration.number_of_worker_threads = 2;
+    }
+    pocket_tts_accelerator::AcceleratorCore accelerator(configuration);
+    if (!accelerator.initialize()) {
+        std::cerr << "Failed to initialize accelerator" << std::endl;
+        return 1;
+    }
+    accelerator.run();
+    return 0;
+}

accelerator/src/memory_pool.cpp ADDED Viewed

	@@ -0,0 +1,216 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "memory_pool.hpp"
+#include <algorithm>
+#include <chrono>
+#include <cstring>
+namespace pocket_tts_accelerator {
+MemoryPool::MemoryPool(std::size_t initial_pool_size_bytes)
+    : total_allocated_bytes(0)
+    , total_used_bytes(0)
+    , maximum_pool_size_bytes(initial_pool_size_bytes) {
+}
+MemoryPool::~MemoryPool() {
+    reset_pool();
+}
+std::uint8_t* MemoryPool::allocate(std::size_t requested_size_bytes) {
+    std::unique_lock<std::mutex> lock(pool_mutex);
+    std::size_t block_index = find_suitable_block_index(requested_size_bytes);
+    if (block_index != static_cast<std::size_t>(-1)) {
+        MemoryBlock& existing_block = memory_blocks[block_index];
+        existing_block.is_in_use = true;
+        existing_block.last_access_timestamp = get_current_timestamp();
+        total_used_bytes += existing_block.block_size;
+        return existing_block.data.get();
+    }
+    if (total_allocated_bytes + requested_size_bytes > maximum_pool_size_bytes) {
+        clear_unused_blocks();
+    }
+    std::size_t aligned_size = ((requested_size_bytes + 63) / 64) * 64;
+    memory_blocks.push_back(MemoryBlock{
+        std::make_unique<std::uint8_t[]>(aligned_size),
+        aligned_size,
+        true,
+        get_current_timestamp()
+    });
+    std::uint8_t* allocated_pointer = memory_blocks.back().data.get();
+    pointer_to_block_index[allocated_pointer] = memory_blocks.size() - 1;
+    total_allocated_bytes += aligned_size;
+    total_used_bytes += aligned_size;
+    return allocated_pointer;
+}
+void MemoryPool::deallocate(std::uint8_t* pointer) {
+    if (pointer == nullptr) {
+        return;
+    }
+    std::unique_lock<std::mutex> lock(pool_mutex);
+    auto iterator = pointer_to_block_index.find(pointer);
+    if (iterator != pointer_to_block_index.end()) {
+        std::size_t block_index = iterator->second;
+        if (block_index < memory_blocks.size()) {
+            MemoryBlock& block = memory_blocks[block_index];
+            if (block.is_in_use) {
+                block.is_in_use = false;
+                block.last_access_timestamp = get_current_timestamp();
+                total_used_bytes -= block.block_size;
+            }
+        }
+    }
+}
+void MemoryPool::clear_unused_blocks() {
+    std::vector<std::size_t> indices_to_remove;
+    for (std::size_t index = 0; index < memory_blocks.size(); ++index) {
+        if (!memory_blocks[index].is_in_use) {
+            indices_to_remove.push_back(index);
+        }
+    }
+    std::sort(indices_to_remove.rbegin(), indices_to_remove.rend());
+    for (std::size_t index : indices_to_remove) {
+        std::uint8_t* pointer = memory_blocks[index].data.get();
+        total_allocated_bytes -= memory_blocks[index].block_size;
+        pointer_to_block_index.erase(pointer);
+        memory_blocks.erase(memory_blocks.begin() + static_cast<std::ptrdiff_t>(index));
+    }
+    for (std::size_t index = 0; index < memory_blocks.size(); ++index) {
+        pointer_to_block_index[memory_blocks[index].data.get()] = index;
+    }
+}
+void MemoryPool::reset_pool() {
+    std::unique_lock<std::mutex> lock(pool_mutex);
+    memory_blocks.clear();
+    pointer_to_block_index.clear();
+    total_allocated_bytes = 0;
+    total_used_bytes = 0;
+}
+std::size_t MemoryPool::get_total_allocated_bytes() const {
+    std::unique_lock<std::mutex> lock(pool_mutex);
+    return total_allocated_bytes;
+}
+std::size_t MemoryPool::get_total_used_bytes() const {
+    std::unique_lock<std::mutex> lock(pool_mutex);
+    return total_used_bytes;
+}
+std::size_t MemoryPool::get_block_count() const {
+    std::unique_lock<std::mutex> lock(pool_mutex);
+    return memory_blocks.size();
+}
+std::size_t MemoryPool::find_suitable_block_index(std::size_t requested_size) const {
+    std::size_t best_fit_index = static_cast<std::size_t>(-1);
+    std::size_t best_fit_size = static_cast<std::size_t>(-1);
+    for (std::size_t index = 0; index < memory_blocks.size(); ++index) {
+        const MemoryBlock& block = memory_blocks[index];
+        if (!block.is_in_use && block.block_size >= requested_size) {
+            if (block.block_size < best_fit_size) {
+                best_fit_size = block.block_size;
+                best_fit_index = index;
+            }
+        }
+    }
+    return best_fit_index;
+}
+void MemoryPool::create_new_block(std::size_t block_size) {
+    std::size_t aligned_size = ((block_size + 63) / 64) * 64;
+    memory_blocks.push_back(MemoryBlock{
+        std::make_unique<std::uint8_t[]>(aligned_size),
+        aligned_size,
+        false,
+        get_current_timestamp()
+    });
+    pointer_to_block_index[memory_blocks.back().data.get()] = memory_blocks.size() - 1;
+    total_allocated_bytes += aligned_size;
+}
+std::uint64_t MemoryPool::get_current_timestamp() const {
+    auto current_time = std::chrono::steady_clock::now();
+    auto duration = current_time.time_since_epoch();
+    return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
+}
+ScopedMemoryAllocation::ScopedMemoryAllocation(MemoryPool& pool, std::size_t size)
+    : memory_pool_pointer(&pool)
+    , allocated_pointer(pool.allocate(size))
+    , allocation_size(size) {
+}
+ScopedMemoryAllocation::~ScopedMemoryAllocation() {
+    if (memory_pool_pointer != nullptr && allocated_pointer != nullptr) {
+        memory_pool_pointer->deallocate(allocated_pointer);
+    }
+}
+ScopedMemoryAllocation::ScopedMemoryAllocation(ScopedMemoryAllocation&& other) noexcept
+    : memory_pool_pointer(other.memory_pool_pointer)
+    , allocated_pointer(other.allocated_pointer)
+    , allocation_size(other.allocation_size) {
+    other.memory_pool_pointer = nullptr;
+    other.allocated_pointer = nullptr;
+    other.allocation_size = 0;
+}
+ScopedMemoryAllocation& ScopedMemoryAllocation::operator=(ScopedMemoryAllocation&& other) noexcept {
+    if (this != &other) {
+        if (memory_pool_pointer != nullptr && allocated_pointer != nullptr) {
+            memory_pool_pointer->deallocate(allocated_pointer);
+        }
+        memory_pool_pointer = other.memory_pool_pointer;
+        allocated_pointer = other.allocated_pointer;
+        allocation_size = other.allocation_size;
+        other.memory_pool_pointer = nullptr;
+        other.allocated_pointer = nullptr;
+        other.allocation_size = 0;
+    }
+    return *this;
+}
+std::uint8_t* ScopedMemoryAllocation::get() const {
+    return allocated_pointer;
+}
+std::size_t ScopedMemoryAllocation::size() const {
+    return allocation_size;
+}
+}

accelerator/src/thread_pool.cpp ADDED Viewed

	@@ -0,0 +1,84 @@

+//
+// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "thread_pool.hpp"
+namespace pocket_tts_accelerator {
+ThreadPool::ThreadPool(std::size_t number_of_threads)
+    : should_stop(false)
+    , is_stopped(false)
+    , thread_count(number_of_threads) {
+    worker_threads.reserve(number_of_threads);
+    for (std::size_t thread_index = 0; thread_index < number_of_threads; ++thread_index) {
+        worker_threads.emplace_back(&ThreadPool::worker_thread_function, this);
+    }
+}
+ThreadPool::~ThreadPool() {
+    shutdown();
+}
+void ThreadPool::shutdown() {
+    {
+        std::unique_lock<std::mutex> lock(queue_mutex);
+        if (is_stopped.load()) {
+            return;
+        }
+        should_stop.store(true);
+    }
+    task_available_condition.notify_all();
+    for (std::thread& worker_thread : worker_threads) {
+        if (worker_thread.joinable()) {
+            worker_thread.join();
+        }
+    }
+    is_stopped.store(true);
+}
+bool ThreadPool::is_running() const {
+    return !should_stop.load() && !is_stopped.load();
+}
+std::size_t ThreadPool::get_pending_task_count() const {
+    std::unique_lock<std::mutex> lock(queue_mutex);
+    return task_queue.size();
+}
+std::size_t ThreadPool::get_thread_count() const {
+    return thread_count;
+}
+void ThreadPool::worker_thread_function() {
+    while (true) {
+        std::function<void()> task_to_execute;
+        {
+            std::unique_lock<std::mutex> lock(queue_mutex);
+            task_available_condition.wait(lock, [this] {
+                return should_stop.load() || !task_queue.empty();
+            });
+            if (should_stop.load() && task_queue.empty()) {
+                return;
+            }
+            task_to_execute = std::move(task_queue.front());
+            task_queue.pop();
+        }
+        task_to_execute();
+    }
+}
+}

app.py CHANGED Viewed

@@ -3,11 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 import math
 import torch
 import gradio as gr
-torch.set_num_threads(1)
-torch.set_num_interop_threads(1)
 from config import (
     AVAILABLE_VOICES,
     DEFAULT_VOICE,
@@ -20,10 +19,22 @@ from config import (
     MAXIMUM_INPUT_LENGTH,
     VOICE_MODE_PRESET,
     VOICE_MODE_CLONE,
-    EXAMPLE_PROMPTS
 )
 from src.core.authentication import authenticate_huggingface
 authenticate_huggingface()
 from src.core.memory import start_background_cleanup_thread
 start_background_cleanup_thread()
 from src.generation.handler import (
@@ -64,8 +75,7 @@ with gr.Blocks(css=CSS, fill_height=False, fill_width=True) as app:
             audio_output_component = gr.Audio(
                 label="Generated Speech Output",
                 type="filepath",
-                interactive=False,
-                autoplay=False
             )
             with gr.Accordion("Voice Selection", open=True):

 # SPDX-License-Identifier: Apache-2.0
 #
+import atexit
 import math
 import torch
 import gradio as gr
 from config import (
     AVAILABLE_VOICES,
     DEFAULT_VOICE,
     MAXIMUM_INPUT_LENGTH,
     VOICE_MODE_PRESET,
     VOICE_MODE_CLONE,
+    EXAMPLE_PROMPTS,
+    ACCELERATOR_WORKER_THREADS,
+    ACCELERATOR_ENABLED
 )
+torch.set_num_threads(ACCELERATOR_WORKER_THREADS)
+torch.set_num_interop_threads(ACCELERATOR_WORKER_THREADS)
 from src.core.authentication import authenticate_huggingface
 authenticate_huggingface()
+if ACCELERATOR_ENABLED:
+    from src.accelerator.client import start_accelerator_daemon, stop_accelerator_daemon
+    accelerator_started = start_accelerator_daemon()
+    if accelerator_started:
+        print("Accelerator daemon started successfully", flush=True)
+    else:
+        print("Accelerator daemon not available, using Python fallback", flush=True)
+    atexit.register(stop_accelerator_daemon)
 from src.core.memory import start_background_cleanup_thread
 start_background_cleanup_thread()
 from src.generation.handler import (
             audio_output_component = gr.Audio(
                 label="Generated Speech Output",
                 type="filepath",
+                interactive=False
             )
             with gr.Accordion("Voice Selection", open=True):

config.py CHANGED Viewed

@@ -110,4 +110,10 @@ COPYRIGHT_NAME = "Hadad Darajat"
 COPYRIGHT_URL = "https://www.linkedin.com/in/hadadrjt"
 DESIGN_BY_NAME = "D3vShoaib/pocket-tts"
-DESIGN_BY_URL = f"https://huggingface.co/spaces/{DESIGN_BY_NAME}"

 COPYRIGHT_URL = "https://www.linkedin.com/in/hadadrjt"
 DESIGN_BY_NAME = "D3vShoaib/pocket-tts"
+DESIGN_BY_URL = f"https://huggingface.co/spaces/{DESIGN_BY_NAME}"
+ACCELERATOR_SOCKET_PATH = "/app/pocket_tts_accelerator.sock"
+ACCELERATOR_BINARY_PATH = "/app/bin/pocket_tts_accelerator"
+ACCELERATOR_WORKER_THREADS = 2
+ACCELERATOR_MEMORY_POOL_MB = 64
+ACCELERATOR_ENABLED = True

src/accelerator/client.py ADDED Viewed

	@@ -0,0 +1,442 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import os
+import socket
+import struct
+import subprocess
+import tempfile
+import threading
+import time
+from typing import Optional, Tuple, Dict, Any
+from config import (
+    ACCELERATOR_SOCKET_PATH,
+    ACCELERATOR_BINARY_PATH,
+    ACCELERATOR_WORKER_THREADS,
+    ACCELERATOR_MEMORY_POOL_MB
+)
+PROTOCOL_MAGIC_NUMBER = 0x50545453
+COMMAND_PING = 0
+COMMAND_PROCESS_AUDIO = 1
+COMMAND_CONVERT_TO_MONO = 2
+COMMAND_CONVERT_TO_PCM = 3
+COMMAND_RESAMPLE_AUDIO = 4
+COMMAND_GET_MEMORY_STATS = 5
+COMMAND_CLEAR_MEMORY_POOL = 6
+COMMAND_SHUTDOWN = 7
+RESPONSE_SUCCESS = 0
+RESPONSE_ERROR_INVALID_COMMAND = 1
+RESPONSE_ERROR_FILE_NOT_FOUND = 2
+RESPONSE_ERROR_PROCESSING_FAILED = 3
+RESPONSE_ERROR_MEMORY_ALLOCATION = 4
+RESPONSE_ERROR_INTERNAL = 5
+REQUEST_HEADER_FORMAT = "=IIII"
+RESPONSE_HEADER_FORMAT = "=IIII"
+REQUEST_HEADER_SIZE = struct.calcsize(REQUEST_HEADER_FORMAT)
+RESPONSE_HEADER_SIZE = struct.calcsize(RESPONSE_HEADER_FORMAT)
+PROCESS_AUDIO_REQUEST_FORMAT = "=512s512sII"
+PROCESS_AUDIO_REQUEST_SIZE = struct.calcsize(PROCESS_AUDIO_REQUEST_FORMAT)
+MEMORY_STATS_RESPONSE_FORMAT = "=QQQ"
+MEMORY_STATS_RESPONSE_SIZE = struct.calcsize(MEMORY_STATS_RESPONSE_FORMAT)
+accelerator_process_handle = None
+accelerator_process_lock = threading.Lock()
+request_id_counter = 0
+request_id_lock = threading.Lock()
+class AcceleratorClient:
+    def __init__(self, socket_path: str = ACCELERATOR_SOCKET_PATH):
+        self.socket_path = socket_path
+        self.connection_timeout = 5.0
+        self.read_timeout = 30.0
+    def is_connected(self) -> bool:
+        try:
+            response = self.send_ping()
+            return response is not None and response.startswith(b"PONG")
+        except Exception:
+            return False
+    def send_ping(self) -> Optional[bytes]:
+        return self._send_command(COMMAND_PING, b"")
+    def process_audio(
+        self,
+        input_file_path: str,
+        output_file_path: str,
+        target_sample_rate: int = 0,
+        options_flags: int = 0
+    ) -> Tuple[bool, str]:
+        payload = self._pack_process_audio_request(
+            input_file_path,
+            output_file_path,
+            target_sample_rate,
+            options_flags
+        )
+        response = self._send_command(COMMAND_PROCESS_AUDIO, payload)
+        if response is None:
+            return False, "Failed to communicate with accelerator"
+        response_string = response.decode("utf-8", errors="ignore")
+        if response_string.startswith("SUCCESS:"):
+            return True, response_string[8:]
+        elif response_string.startswith("ERROR:"):
+            return False, response_string[6:]
+        else:
+            return False, response_string
+    def convert_to_mono(
+        self,
+        input_file_path: str,
+        output_file_path: str
+    ) -> Tuple[bool, str]:
+        payload = self._pack_process_audio_request(
+            input_file_path,
+            output_file_path,
+            0,
+            0
+        )
+        response = self._send_command(COMMAND_CONVERT_TO_MONO, payload)
+        if response is None:
+            return False, "Failed to communicate with accelerator"
+        response_string = response.decode("utf-8", errors="ignore")
+        if response_string.startswith("SUCCESS:"):
+            return True, response_string[8:]
+        elif response_string.startswith("ERROR:"):
+            return False, response_string[6:]
+        else:
+            return False, response_string
+    def convert_to_pcm(
+        self,
+        input_file_path: str,
+        output_file_path: str
+    ) -> Tuple[bool, str]:
+        payload = self._pack_process_audio_request(
+            input_file_path,
+            output_file_path,
+            0,
+            0
+        )
+        response = self._send_command(COMMAND_CONVERT_TO_PCM, payload)
+        if response is None:
+            return False, "Failed to communicate with accelerator"
+        response_string = response.decode("utf-8", errors="ignore")
+        if response_string.startswith("SUCCESS:"):
+            return True, response_string[8:]
+        elif response_string.startswith("ERROR:"):
+            return False, response_string[6:]
+        else:
+            return False, response_string
+    def resample_audio(
+        self,
+        input_file_path: str,
+        output_file_path: str,
+        target_sample_rate: int
+    ) -> Tuple[bool, str]:
+        payload = self._pack_process_audio_request(
+            input_file_path,
+            output_file_path,
+            target_sample_rate,
+            0
+        )
+        response = self._send_command(COMMAND_RESAMPLE_AUDIO, payload)
+        if response is None:
+            return False, "Failed to communicate with accelerator"
+        response_string = response.decode("utf-8", errors="ignore")
+        if response_string.startswith("SUCCESS:"):
+            return True, response_string[8:]
+        elif response_string.startswith("ERROR:"):
+            return False, response_string[6:]
+        else:
+            return False, response_string
+    def get_memory_stats(self) -> Optional[Dict[str, int]]:
+        response = self._send_command(COMMAND_GET_MEMORY_STATS, b"")
+        if response is None or len(response) < MEMORY_STATS_RESPONSE_SIZE:
+            return None
+        total_allocated, total_used, block_count = struct.unpack(
+            MEMORY_STATS_RESPONSE_FORMAT,
+            response[:MEMORY_STATS_RESPONSE_SIZE]
+        )
+        return {
+            "total_allocated_bytes": total_allocated,
+            "total_used_bytes": total_used,
+            "block_count": block_count
+        }
+    def clear_memory_pool(self) -> bool:
+        response = self._send_command(COMMAND_CLEAR_MEMORY_POOL, b"")
+        return response is not None
+    def shutdown_accelerator(self) -> bool:
+        response = self._send_command(COMMAND_SHUTDOWN, b"")
+        return response is not None
+    def _get_next_request_id(self) -> int:
+        global request_id_counter
+        with request_id_lock:
+            request_id_counter += 1
+            return request_id_counter
+    def _pack_process_audio_request(
+        self,
+        input_path: str,
+        output_path: str,
+        target_sample_rate: int,
+        options_flags: int
+    ) -> bytes:
+        input_path_bytes = input_path.encode("utf-8")[:511] + b"\x00"
+        output_path_bytes = output_path.encode("utf-8")[:511] + b"\x00"
+        input_path_padded = input_path_bytes.ljust(512, b"\x00")
+        output_path_padded = output_path_bytes.ljust(512, b"\x00")
+        return struct.pack(
+            PROCESS_AUDIO_REQUEST_FORMAT,
+            input_path_padded,
+            output_path_padded,
+            target_sample_rate,
+            options_flags
+        )
+    def _send_command(
+        self,
+        command_type: int,
+        payload: bytes
+    ) -> Optional[bytes]:
+        try:
+            client_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            client_socket.settimeout(self.connection_timeout)
+            client_socket.connect(self.socket_path)
+            request_id = self._get_next_request_id()
+            request_header = struct.pack(
+                REQUEST_HEADER_FORMAT,
+                PROTOCOL_MAGIC_NUMBER,
+                command_type,
+                len(payload),
+                request_id
+            )
+            client_socket.sendall(request_header)
+            if payload:
+                client_socket.sendall(payload)
+            client_socket.settimeout(self.read_timeout)
+            response_header_data = self._receive_exactly(client_socket, RESPONSE_HEADER_SIZE)
+            if response_header_data is None:
+                client_socket.close()
+                return None
+            magic_number, status_code, payload_size, response_request_id = struct.unpack(
+                RESPONSE_HEADER_FORMAT,
+                response_header_data
+            )
+            if magic_number != PROTOCOL_MAGIC_NUMBER:
+                client_socket.close()
+                return None
+            if response_request_id != request_id:
+                client_socket.close()
+                return None
+            response_payload = b""
+            if payload_size > 0:
+                response_payload = self._receive_exactly(client_socket, payload_size)
+                if response_payload is None:
+                    client_socket.close()
+                    return None
+            client_socket.close()
+            if status_code != RESPONSE_SUCCESS:
+                return response_payload if response_payload else None
+            return response_payload
+        except socket.timeout:
+            return None
+        except socket.error:
+            return None
+        except Exception:
+            return None
+    def _receive_exactly(
+        self,
+        client_socket: socket.socket,
+        num_bytes: int
+    ) -> Optional[bytes]:
+        received_data = b""
+        remaining_bytes = num_bytes
+        while remaining_bytes > 0:
+            try:
+                chunk = client_socket.recv(remaining_bytes)
+                if not chunk:
+                    return None
+                received_data += chunk
+                remaining_bytes -= len(chunk)
+            except socket.timeout:
+                return None
+            except socket.error:
+                return None
+        return received_data
+def is_accelerator_available() -> bool:
+    if not os.path.exists(ACCELERATOR_SOCKET_PATH):
+        return False
+    client = AcceleratorClient()
+    return client.is_connected()
+def start_accelerator_daemon() -> bool:
+    global accelerator_process_handle
+    with accelerator_process_lock:
+        if accelerator_process_handle is not None:
+            if accelerator_process_handle.poll() is None:
+                return True
+        if not os.path.exists(ACCELERATOR_BINARY_PATH):
+            return False
+        try:
+            accelerator_process_handle = subprocess.Popen(
+                [
+                    ACCELERATOR_BINARY_PATH,
+                    "--socket", ACCELERATOR_SOCKET_PATH,
+                    "--threads", str(ACCELERATOR_WORKER_THREADS),
+                    "--memory", str(ACCELERATOR_MEMORY_POOL_MB)
+                ],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                start_new_session=True
+            )
+            for attempt_index in range(50):
+                time.sleep(0.1)
+                if is_accelerator_available():
+                    return True
+            return is_accelerator_available()
+        except Exception:
+            return False
+def stop_accelerator_daemon() -> bool:
+    global accelerator_process_handle
+    with accelerator_process_lock:
+        if is_accelerator_available():
+            try:
+                client = AcceleratorClient()
+                client.shutdown_accelerator()
+                time.sleep(0.5)
+            except Exception:
+                pass
+        if accelerator_process_handle is not None:
+            if accelerator_process_handle.poll() is None:
+                try:
+                    accelerator_process_handle.terminate()
+                    accelerator_process_handle.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    accelerator_process_handle.kill()
+                    accelerator_process_handle.wait()
+            accelerator_process_handle = None
+        return True
+def process_audio_with_accelerator(
+    input_file_path: str,
+    output_file_path: str
+) -> Tuple[bool, str]:
+    if not is_accelerator_available():
+        return False, "Accelerator not available"
+    client = AcceleratorClient()
+    return client.process_audio(input_file_path, output_file_path)
+def convert_to_mono_with_accelerator(
+    input_file_path: str,
+    output_file_path: str
+) -> Tuple[bool, str]:
+    if not is_accelerator_available():
+        return False, "Accelerator not available"
+    client = AcceleratorClient()
+    return client.convert_to_mono(input_file_path, output_file_path)
+def convert_to_pcm_with_accelerator(
+    input_file_path: str,
+    output_file_path: str
+) -> Tuple[bool, str]:
+    if not is_accelerator_available():
+        return False, "Accelerator not available"
+    client = AcceleratorClient()
+    return client.convert_to_pcm(input_file_path, output_file_path)
+def get_accelerator_memory_stats() -> Optional[Dict[str, int]]:
+    if not is_accelerator_available():
+        return None
+    client = AcceleratorClient()
+    return client.get_memory_stats()

src/audio/converter.py CHANGED Viewed

@@ -10,6 +10,11 @@ import numpy as np
 import scipy.io.wavfile
 from ..core.state import temporary_files_registry, temporary_files_lock
 from ..core.memory import trigger_background_cleanup_check
 def convert_audio_data_to_pcm_int16(audio_data):
     if audio_data.dtype == np.float32 or audio_data.dtype == np.float64:
@@ -55,7 +60,30 @@ def register_temporary_file(file_path):
         temporary_files_registry[file_path] = time.time()
     trigger_background_cleanup_check()
 def convert_wav_file_to_pcm_format(input_path):
     try:
         sample_rate, audio_data = scipy.io.wavfile.read(input_path)

 import scipy.io.wavfile
 from ..core.state import temporary_files_registry, temporary_files_lock
 from ..core.memory import trigger_background_cleanup_check
+from ..accelerator.client import (
+    is_accelerator_available,
+    convert_to_pcm_with_accelerator,
+    process_audio_with_accelerator
+)
 def convert_audio_data_to_pcm_int16(audio_data):
     if audio_data.dtype == np.float32 or audio_data.dtype == np.float64:
         temporary_files_registry[file_path] = time.time()
     trigger_background_cleanup_check()
+def convert_wav_file_to_pcm_format_with_accelerator(input_path):
+    output_file = tempfile.NamedTemporaryFile(suffix="_accel_pcm_converted.wav", delete=False)
+    output_path = output_file.name
+    output_file.close()
+    success, result_message = convert_to_pcm_with_accelerator(input_path, output_path)
+    if success:
+        register_temporary_file(output_path)
+        return output_path, None
+    else:
+        if os.path.exists(output_path):
+            try:
+                os.remove(output_path)
+            except Exception:
+                pass
+        return None, result_message
 def convert_wav_file_to_pcm_format(input_path):
+    if is_accelerator_available():
+        accelerated_result, accelerated_error = convert_wav_file_to_pcm_format_with_accelerator(input_path)
+        if accelerated_result is not None:
+            return accelerated_result, None
     try:
         sample_rate, audio_data = scipy.io.wavfile.read(input_path)