pocket-tts-hf-cpu-optimized

Sleeping

App Files Files Community

hadadrjt commited on Jan 24

Commit

1a63d8d

1 Parent(s): d011224

Pocket TTS: Switch to simple demo.

Browse files

Files changed (33) hide show

.dockerignore +0 -3
Dockerfile +1 -16
README.md +3 -0
accelerator/CMakeLists.txt +0 -36
accelerator/include/accelerator_core.hpp +0 -69
accelerator/include/audio_processor.hpp +0 -84
accelerator/include/ipc_handler.hpp +0 -107
accelerator/include/memory_pool.hpp +0 -79
accelerator/include/thread_pool.hpp +0 -83
accelerator/src/accelerator_core.cpp +0 -558
accelerator/src/audio_processor.cpp +0 -352
accelerator/src/ipc_handler.cpp +0 -226
accelerator/src/main.cpp +0 -83
accelerator/src/memory_pool.cpp +0 -216
accelerator/src/thread_pool.cpp +0 -84
app.py +0 -372
assets/css/styles.py +0 -161
assets/static/footer.py +0 -32
assets/static/header.py +0 -18
assets/static/sidebar.py +0 -44
assets/static/title.py +0 -15
config.py +0 -126
src/accelerator/client.py +0 -583
src/audio/converter.py +0 -344
src/audio/validator.py +0 -268
src/core/authentication.py +0 -23
src/core/memory.py +0 -394
src/core/state.py +0 -147
src/generation/handler.py +0 -309
src/tts/manager.py +0 -341
src/ui/handlers.py +0 -58
src/ui/state.py +0 -43
src/validation/text.py +0 -20

.dockerignore DELETED Viewed

@@ -1,3 +0,0 @@
-Dockerfile
-LICENSE
-README.md

Dockerfile CHANGED Viewed

@@ -3,19 +3,4 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-FROM hadadrjt/pocket-tts:hf-20260121
-WORKDIR /app
-COPY . .
-RUN mkdir build \
-    && cd build \
-    && cmake \
-       -DCMAKE_BUILD_TYPE=Release \
-       -DCMAKE_INSTALL_PREFIX=/app \
-       ../accelerator \
-    && make -j$(nproc) \
-    && make install \
-    && cd .. \
-    && rm -rf accelerator build

 # SPDX-License-Identifier: Apache-2.0
 #
+FROM hadadrjt/pocket-tts:hf-simple-demo

README.md CHANGED Viewed

@@ -8,4 +8,7 @@ colorTo: yellow
 sdk: docker
 app_port: 7860
 pinned: false
 ---

 sdk: docker
 app_port: 7860
 pinned: false
+models:
+ - kyutai/pocket-tts
+ - kyutai/tts-voices
 ---

accelerator/CMakeLists.txt DELETED Viewed

@@ -1,36 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-cmake_minimum_required(VERSION 3.31.6)
-project(pocket_tts_accelerator VERSION 0.0.0 LANGUAGES CXX)
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_EXTENSIONS OFF)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -ffast-math -funroll-loops")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
-find_package(Threads REQUIRED)
-set(ACCELERATOR_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
-set(ACCELERATOR_SOURCES
-    src/main.cpp
-    src/accelerator_core.cpp
-    src/audio_processor.cpp
-    src/ipc_handler.cpp
-    src/memory_pool.cpp
-    src/thread_pool.cpp
-)
-add_executable(pocket_tts_accelerator ${ACCELERATOR_SOURCES})
-target_include_directories(pocket_tts_accelerator PRIVATE ${ACCELERATOR_INCLUDE_DIR})
-target_link_libraries(pocket_tts_accelerator PRIVATE Threads::Threads)
-install(TARGETS pocket_tts_accelerator DESTINATION bin)

accelerator/include/accelerator_core.hpp DELETED Viewed

@@ -1,69 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#ifndef POCKET_TTS_ACCELERATOR_CORE_HPP
-#define POCKET_TTS_ACCELERATOR_CORE_HPP
-#include "audio_processor.hpp"
-#include "ipc_handler.hpp"
-#include "memory_pool.hpp"
-#include "thread_pool.hpp"
-#include <atomic>
-#include <memory>
-#include <string>
-namespace pocket_tts_accelerator {
-struct AcceleratorConfiguration {
-    std::size_t number_of_worker_threads;
-    std::size_t memory_pool_size_bytes;
-    std::string ipc_socket_path;
-    bool enable_verbose_logging;
-};
-class AcceleratorCore {
-public:
-    explicit AcceleratorCore(const AcceleratorConfiguration& configuration);
-    ~AcceleratorCore();
-    AcceleratorCore(const AcceleratorCore&) = delete;
-    AcceleratorCore& operator=(const AcceleratorCore&) = delete;
-    bool initialize();
-    void run();
-    void shutdown();
-    bool is_running() const;
-    std::string get_status_string() const;
-    static AcceleratorConfiguration get_default_configuration();
-private:
-    void register_all_command_handlers();
-    void setup_signal_handlers();
-    std::vector<std::uint8_t> handle_ping_command(const std::vector<std::uint8_t>& payload);
-    std::vector<std::uint8_t> handle_process_audio_command(const std::vector<std::uint8_t>& payload);
-    std::vector<std::uint8_t> handle_convert_to_mono_command(const std::vector<std::uint8_t>& payload);
-    std::vector<std::uint8_t> handle_convert_to_pcm_command(const std::vector<std::uint8_t>& payload);
-    std::vector<std::uint8_t> handle_resample_audio_command(const std::vector<std::uint8_t>& payload);
-    std::vector<std::uint8_t> handle_get_memory_stats_command(const std::vector<std::uint8_t>& payload);
-    std::vector<std::uint8_t> handle_clear_memory_pool_command(const std::vector<std::uint8_t>& payload);
-    std::vector<std::uint8_t> handle_shutdown_command(const std::vector<std::uint8_t>& payload);
-    void log_message(const std::string& message) const;
-    AcceleratorConfiguration config;
-    std::unique_ptr<MemoryPool> memory_pool;
-    std::unique_ptr<ThreadPool> thread_pool;
-    std::unique_ptr<AudioProcessor> audio_processor;
-    std::unique_ptr<IpcHandler> ipc_handler;
-    std::atomic<bool> is_initialized;
-    std::atomic<bool> should_shutdown;
-};
-}
-#endif

accelerator/include/audio_processor.hpp DELETED Viewed

@@ -1,84 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#ifndef POCKET_TTS_AUDIO_PROCESSOR_HPP
-#define POCKET_TTS_AUDIO_PROCESSOR_HPP
-#include "memory_pool.hpp"
-#include <cstddef>
-#include <cstdint>
-#include <string>
-#include <vector>
-namespace pocket_tts_accelerator {
-struct WavFileHeader {
-    char riff_marker[4];
-    std::uint32_t file_size;
-    char wave_marker[4];
-    char format_marker[4];
-    std::uint32_t format_chunk_size;
-    std::uint16_t audio_format;
-    std::uint16_t number_of_channels;
-    std::uint32_t sample_rate;
-    std::uint32_t byte_rate;
-    std::uint16_t block_align;
-    std::uint16_t bits_per_sample;
-    char data_marker[4];
-    std::uint32_t data_size;
-};
-struct AudioData {
-    std::vector<std::int16_t> samples;
-    std::uint32_t sample_rate;
-    std::uint16_t number_of_channels;
-    std::uint16_t bits_per_sample;
-    bool is_valid;
-    std::string error_message;
-};
-struct AudioProcessingResult {
-    std::vector<std::int16_t> processed_samples;
-    std::uint32_t output_sample_rate;
-    bool success;
-    std::string error_message;
-};
-class AudioProcessor {
-public:
-    explicit AudioProcessor(MemoryPool& shared_memory_pool);
-    ~AudioProcessor();
-    AudioProcessor(const AudioProcessor&) = delete;
-    AudioProcessor& operator=(const AudioProcessor&) = delete;
-    AudioData read_wav_file(const std::string& file_path);
-    bool write_wav_file(const std::string& file_path, const AudioData& audio_data);
-    AudioProcessingResult convert_to_mono(const AudioData& input_audio);
-    AudioProcessingResult convert_to_pcm_int16(const AudioData& input_audio);
-    AudioProcessingResult resample_audio(const AudioData& input_audio, std::uint32_t target_sample_rate);
-    AudioProcessingResult normalize_audio(const AudioData& input_audio, float target_peak_level);
-    AudioProcessingResult process_audio_for_voice_cloning(
-        const std::string& input_file_path,
-        const std::string& output_file_path
-    );
-    static bool validate_wav_header(const WavFileHeader& header);
-    static std::size_t calculate_audio_duration_milliseconds(const AudioData& audio_data);
-private:
-    void convert_float32_to_int16(const float* input, std::int16_t* output, std::size_t sample_count);
-    void convert_int32_to_int16(const std::int32_t* input, std::int16_t* output, std::size_t sample_count);
-    void convert_uint8_to_int16(const std::uint8_t* input, std::int16_t* output, std::size_t sample_count);
-    void mix_channels_to_mono(const std::int16_t* input, std::int16_t* output, std::size_t frame_count, std::uint16_t channel_count);
-    MemoryPool& memory_pool;
-};
-}
-#endif

accelerator/include/ipc_handler.hpp DELETED Viewed

@@ -1,107 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#ifndef POCKET_TTS_IPC_HANDLER_HPP
-#define POCKET_TTS_IPC_HANDLER_HPP
-#include <atomic>
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <vector>
-namespace pocket_tts_accelerator {
-enum class CommandType : std::uint32_t {
-    PING = 0,
-    PROCESS_AUDIO = 1,
-    CONVERT_TO_MONO = 2,
-    CONVERT_TO_PCM = 3,
-    RESAMPLE_AUDIO = 4,
-    GET_MEMORY_STATS = 5,
-    CLEAR_MEMORY_POOL = 6,
-    SHUTDOWN = 7,
-    UNKNOWN = 255
-};
-enum class ResponseStatus : std::uint32_t {
-    SUCCESS = 0,
-    ERROR_INVALID_COMMAND = 1,
-    ERROR_FILE_NOT_FOUND = 2,
-    ERROR_PROCESSING_FAILED = 3,
-    ERROR_MEMORY_ALLOCATION = 4,
-    ERROR_INTERNAL = 5
-};
-struct RequestHeader {
-    std::uint32_t magic_number;
-    std::uint32_t command_type;
-    std::uint32_t payload_size;
-    std::uint32_t request_id;
-};
-struct ResponseHeader {
-    std::uint32_t magic_number;
-    std::uint32_t status_code;
-    std::uint32_t payload_size;
-    std::uint32_t request_id;
-};
-struct ProcessAudioRequest {
-    char input_file_path[512];
-    char output_file_path[512];
-    std::uint32_t target_sample_rate;
-    std::uint32_t options_flags;
-};
-struct MemoryStatsResponse {
-    std::uint64_t total_allocated_bytes;
-    std::uint64_t total_used_bytes;
-    std::uint64_t block_count;
-};
-class IpcHandler {
-public:
-    using CommandHandlerFunction = std::function<std::vector<std::uint8_t>(const std::vector<std::uint8_t>&)>;
-    explicit IpcHandler(const std::string& socket_path);
-    ~IpcHandler();
-    IpcHandler(const IpcHandler&) = delete;
-    IpcHandler& operator=(const IpcHandler&) = delete;
-    bool start_server();
-    void stop_server();
-    bool is_running() const;
-    void register_command_handler(CommandType command_type, CommandHandlerFunction handler);
-    void set_shutdown_callback(std::function<void()> callback);
-    static constexpr std::uint32_t PROTOCOL_MAGIC_NUMBER = 0x50545453;
-    static constexpr std::size_t MAXIMUM_PAYLOAD_SIZE = 16 * 1024 * 1024;
-    static constexpr int CONNECTION_BACKLOG = 5;
-private:
-    void accept_connections_loop();
-    void handle_client_connection(int client_socket_fd);
-    bool send_response(int socket_fd, const ResponseHeader& header, const std::vector<std::uint8_t>& payload);
-    bool receive_request(int socket_fd, RequestHeader& header, std::vector<std::uint8_t>& payload);
-    std::string socket_file_path;
-    int server_socket_fd;
-    std::atomic<bool> is_server_running;
-    std::thread accept_thread;
-    std::mutex handlers_mutex;
-    std::unordered_map<CommandType, CommandHandlerFunction> command_handlers;
-    std::function<void()> shutdown_callback;
-};
-}
-#endif

accelerator/include/memory_pool.hpp DELETED Viewed

@@ -1,79 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#ifndef POCKET_TTS_MEMORY_POOL_HPP
-#define POCKET_TTS_MEMORY_POOL_HPP
-#include <atomic>
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <mutex>
-#include <unordered_map>
-#include <vector>
-namespace pocket_tts_accelerator {
-struct MemoryBlock {
-    std::unique_ptr<std::uint8_t[]> data;
-    std::size_t block_size;
-    bool is_in_use;
-    std::uint64_t last_access_timestamp;
-};
-class MemoryPool {
-public:
-    explicit MemoryPool(std::size_t initial_pool_size_bytes = 64 * 1024 * 1024);
-    ~MemoryPool();
-    MemoryPool(const MemoryPool&) = delete;
-    MemoryPool& operator=(const MemoryPool&) = delete;
-    MemoryPool(MemoryPool&&) = delete;
-    MemoryPool& operator=(MemoryPool&&) = delete;
-    std::uint8_t* allocate(std::size_t requested_size_bytes);
-    void deallocate(std::uint8_t* pointer);
-    void clear_unused_blocks();
-    void reset_pool();
-    std::size_t get_total_allocated_bytes() const;
-    std::size_t get_total_used_bytes() const;
-    std::size_t get_block_count() const;
-private:
-    std::size_t find_suitable_block_index(std::size_t requested_size) const;
-    void create_new_block(std::size_t block_size);
-    std::uint64_t get_current_timestamp() const;
-    std::vector<MemoryBlock> memory_blocks;
-    std::unordered_map<std::uint8_t*, std::size_t> pointer_to_block_index;
-    mutable std::mutex pool_mutex;
-    std::size_t total_allocated_bytes;
-    std::size_t total_used_bytes;
-    std::size_t maximum_pool_size_bytes;
-};
-class ScopedMemoryAllocation {
-public:
-    ScopedMemoryAllocation(MemoryPool& pool, std::size_t size);
-    ~ScopedMemoryAllocation();
-    ScopedMemoryAllocation(const ScopedMemoryAllocation&) = delete;
-    ScopedMemoryAllocation& operator=(const ScopedMemoryAllocation&) = delete;
-    ScopedMemoryAllocation(ScopedMemoryAllocation&& other) noexcept;
-    ScopedMemoryAllocation& operator=(ScopedMemoryAllocation&& other) noexcept;
-    std::uint8_t* get() const;
-    std::size_t size() const;
-private:
-    MemoryPool* memory_pool_pointer;
-    std::uint8_t* allocated_pointer;
-    std::size_t allocation_size;
-};
-}
-#endif

accelerator/include/thread_pool.hpp DELETED Viewed

@@ -1,83 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#ifndef POCKET_TTS_THREAD_POOL_HPP
-#define POCKET_TTS_THREAD_POOL_HPP
-#include <atomic>
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <thread>
-#include <vector>
-namespace pocket_tts_accelerator {
-class ThreadPool {
-public:
-    explicit ThreadPool(std::size_t number_of_threads);
-    ~ThreadPool();
-    ThreadPool(const ThreadPool&) = delete;
-    ThreadPool& operator=(const ThreadPool&) = delete;
-    ThreadPool(ThreadPool&&) = delete;
-    ThreadPool& operator=(ThreadPool&&) = delete;
-    template<typename FunctionType, typename... ArgumentTypes>
-    auto submit_task(FunctionType&& function, ArgumentTypes&&... arguments)
-        -> std::future<typename std::invoke_result<FunctionType, ArgumentTypes...>::type>;
-    void shutdown();
-    bool is_running() const;
-    std::size_t get_pending_task_count() const;
-    std::size_t get_thread_count() const;
-private:
-    void worker_thread_function();
-    std::vector<std::thread> worker_threads;
-    std::queue<std::function<void()>> task_queue;
-    mutable std::mutex queue_mutex;
-    std::condition_variable task_available_condition;
-    std::atomic<bool> should_stop;
-    std::atomic<bool> is_stopped;
-    std::size_t thread_count;
-};
-template<typename FunctionType, typename... ArgumentTypes>
-auto ThreadPool::submit_task(FunctionType&& function, ArgumentTypes&&... arguments)
-    -> std::future<typename std::invoke_result<FunctionType, ArgumentTypes...>::type> {
-    using ReturnType = typename std::invoke_result<FunctionType, ArgumentTypes...>::type;
-    auto packaged_task = std::make_shared<std::packaged_task<ReturnType()>>(
-        std::bind(std::forward<FunctionType>(function), std::forward<ArgumentTypes>(arguments)...)
-    );
-    std::future<ReturnType> result_future = packaged_task->get_future();
-    {
-        std::unique_lock<std::mutex> lock(queue_mutex);
-        if (should_stop.load()) {
-            throw std::runtime_error("Cannot submit task to stopped thread pool");
-        }
-        task_queue.emplace([packaged_task]() {
-            (*packaged_task)();
-        });
-    }
-    task_available_condition.notify_one();
-    return result_future;
-}
-}
-#endif

accelerator/src/accelerator_core.cpp DELETED Viewed

@@ -1,558 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#include "accelerator_core.hpp"
-#include <chrono>
-#include <cstring>
-#include <ctime>
-#include <iomanip>
-#include <iostream>
-#include <sstream>
-#include <signal.h>
-namespace pocket_tts_accelerator {
-static AcceleratorCore* global_accelerator_instance = nullptr;
-static volatile sig_atomic_t last_received_signal = 0;
-static void signal_handler_function(int signal_number) {
-    last_received_signal = signal_number;
-    if (global_accelerator_instance != nullptr) {
-        global_accelerator_instance->shutdown();
-    }
-}
-AcceleratorCore::AcceleratorCore(const AcceleratorConfiguration& configuration)
-    : config(configuration)
-    , is_initialized(false)
-    , should_shutdown(false) {
-}
-AcceleratorCore::~AcceleratorCore() {
-    shutdown();
-}
-bool AcceleratorCore::initialize() {
-    if (is_initialized.load()) {
-        return true;
-    }
-    log_message("Initializing Pocket TTS Accelerator...");
-    memory_pool = std::make_unique<MemoryPool>(config.memory_pool_size_bytes);
-    log_message("Memory pool initialized with " + std::to_string(config.memory_pool_size_bytes / (1024 * 1024)) + " MB");
-    thread_pool = std::make_unique<ThreadPool>(config.number_of_worker_threads);
-    log_message("Thread pool initialized with " + std::to_string(config.number_of_worker_threads) + " worker threads");
-    audio_processor = std::make_unique<AudioProcessor>(*memory_pool);
-    log_message("Audio processor initialized");
-    ipc_handler = std::make_unique<IpcHandler>(config.ipc_socket_path);
-    log_message("IPC handler created for socket: " + config.ipc_socket_path);
-    register_all_command_handlers();
-    ipc_handler->set_shutdown_callback([this]() {
-        this->shutdown();
-    });
-    if (!ipc_handler->start_server()) {
-        log_message("ERROR: Failed to start IPC server");
-        return false;
-    }
-    log_message("IPC server started successfully");
-    global_accelerator_instance = this;
-    setup_signal_handlers();
-    is_initialized.store(true);
-    log_message("Pocket TTS Accelerator initialized successfully");
-    return true;
-}
-void AcceleratorCore::run() {
-    if (!is_initialized.load()) {
-        log_message("ERROR: Accelerator not initialized");
-        return;
-    }
-    log_message("Accelerator running and waiting for commands...");
-    while (!should_shutdown.load()) {
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-        if (last_received_signal != 0) {
-            log_message("Received signal: " + std::to_string(last_received_signal));
-            last_received_signal = 0;
-        }
-    }
-    log_message("Accelerator main loop exited");
-}
-void AcceleratorCore::shutdown() {
-    if (should_shutdown.exchange(true)) {
-        return;
-    }
-    log_message("Shutting down Pocket TTS Accelerator...");
-    if (ipc_handler) {
-        ipc_handler->stop_server();
-        log_message("IPC server stopped");
-    }
-    if (thread_pool) {
-        thread_pool->shutdown();
-        log_message("Thread pool shut down");
-    }
-    if (memory_pool) {
-        memory_pool->reset_pool();
-        log_message("Memory pool reset");
-    }
-    is_initialized.store(false);
-    log_message("Pocket TTS Accelerator shut down complete");
-}
-bool AcceleratorCore::is_running() const {
-    return is_initialized.load() && !should_shutdown.load();
-}
-std::string AcceleratorCore::get_status_string() const {
-    if (!is_initialized.load()) {
-        return "Not initialized";
-    }
-    if (should_shutdown.load()) {
-        return "Shutting down";
-    }
-    return "Running";
-}
-AcceleratorConfiguration AcceleratorCore::get_default_configuration() {
-    AcceleratorConfiguration default_config;
-    default_config.number_of_worker_threads = 2;
-    default_config.memory_pool_size_bytes = 64 * 1024 * 1024;
-    default_config.ipc_socket_path = "/tmp/pocket_tts_accelerator.sock";
-    default_config.enable_verbose_logging = true;
-    return default_config;
-}
-void AcceleratorCore::register_all_command_handlers() {
-    ipc_handler->register_command_handler(
-        CommandType::PING,
-        [this](const std::vector<std::uint8_t>& payload) {
-            return this->handle_ping_command(payload);
-        }
-    );
-    ipc_handler->register_command_handler(
-        CommandType::PROCESS_AUDIO,
-        [this](const std::vector<std::uint8_t>& payload) {
-            return this->handle_process_audio_command(payload);
-        }
-    );
-    ipc_handler->register_command_handler(
-        CommandType::CONVERT_TO_MONO,
-        [this](const std::vector<std::uint8_t>& payload) {
-            return this->handle_convert_to_mono_command(payload);
-        }
-    );
-    ipc_handler->register_command_handler(
-        CommandType::CONVERT_TO_PCM,
-        [this](const std::vector<std::uint8_t>& payload) {
-            return this->handle_convert_to_pcm_command(payload);
-        }
-    );
-    ipc_handler->register_command_handler(
-        CommandType::RESAMPLE_AUDIO,
-        [this](const std::vector<std::uint8_t>& payload) {
-            return this->handle_resample_audio_command(payload);
-        }
-    );
-    ipc_handler->register_command_handler(
-        CommandType::GET_MEMORY_STATS,
-        [this](const std::vector<std::uint8_t>& payload) {
-            return this->handle_get_memory_stats_command(payload);
-        }
-    );
-    ipc_handler->register_command_handler(
-        CommandType::CLEAR_MEMORY_POOL,
-        [this](const std::vector<std::uint8_t>& payload) {
-            return this->handle_clear_memory_pool_command(payload);
-        }
-    );
-    ipc_handler->register_command_handler(
-        CommandType::SHUTDOWN,
-        [this](const std::vector<std::uint8_t>& payload) {
-            return this->handle_shutdown_command(payload);
-        }
-    );
-    log_message("All command handlers registered");
-}
-void AcceleratorCore::setup_signal_handlers() {
-    signal(SIGINT, signal_handler_function);
-    signal(SIGTERM, signal_handler_function);
-}
-std::vector<std::uint8_t> AcceleratorCore::handle_ping_command(const std::vector<std::uint8_t>& payload) {
-    std::string payload_content;
-    if (!payload.empty()) {
-        payload_content = std::string(payload.begin(), payload.end());
-        log_message("Received PING command with payload: " + payload_content);
-    } else {
-        log_message("Received PING command");
-    }
-    std::string response_message = "PONG";
-    if (!payload_content.empty()) {
-        response_message += ":" + payload_content;
-    }
-    return std::vector<std::uint8_t>(response_message.begin(), response_message.end());
-}
-std::vector<std::uint8_t> AcceleratorCore::handle_process_audio_command(const std::vector<std::uint8_t>& payload) {
-    log_message("Received PROCESS_AUDIO command with payload size: " + std::to_string(payload.size()) + " bytes");
-    if (payload.size() < sizeof(ProcessAudioRequest)) {
-        std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
-        log_message(error_message);
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    ProcessAudioRequest request;
-    std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
-    std::string input_path(request.input_file_path);
-    std::string output_path(request.output_file_path);
-    log_message("Processing audio from: " + input_path + " to: " + output_path);
-    auto future_result = thread_pool->submit_task([this, input_path, output_path]() {
-        return this->audio_processor->process_audio_for_voice_cloning(input_path, output_path);
-    });
-    AudioProcessingResult result = future_result.get();
-    if (result.success) {
-        log_message("Audio processing completed successfully");
-        std::string success_message = "SUCCESS:" + output_path;
-        return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
-    } else {
-        log_message("Audio processing failed: " + result.error_message);
-        std::string error_message = "ERROR:" + result.error_message;
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-}
-std::vector<std::uint8_t> AcceleratorCore::handle_convert_to_mono_command(const std::vector<std::uint8_t>& payload) {
-    log_message("Received CONVERT_TO_MONO command with payload size: " + std::to_string(payload.size()) + " bytes");
-    if (payload.size() < sizeof(ProcessAudioRequest)) {
-        std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
-        log_message(error_message);
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    ProcessAudioRequest request;
-    std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
-    std::string input_path(request.input_file_path);
-    std::string output_path(request.output_file_path);
-    log_message("Converting to mono from: " + input_path + " to: " + output_path);
-    AudioData audio_data = audio_processor->read_wav_file(input_path);
-    if (!audio_data.is_valid) {
-        log_message("Failed to read input file: " + audio_data.error_message);
-        std::string error_message = "ERROR:" + audio_data.error_message;
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    AudioProcessingResult result = audio_processor->convert_to_mono(audio_data);
-    if (!result.success) {
-        log_message("Mono conversion failed: " + result.error_message);
-        std::string error_message = "ERROR:" + result.error_message;
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    AudioData output_audio;
-    output_audio.samples = std::move(result.processed_samples);
-    output_audio.sample_rate = result.output_sample_rate;
-    output_audio.number_of_channels = 1;
-    output_audio.bits_per_sample = 16;
-    output_audio.is_valid = true;
-    if (!audio_processor->write_wav_file(output_path, output_audio)) {
-        log_message("Failed to write output file: " + output_path);
-        std::string error_message = "ERROR:Failed to write output file";
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    log_message("Mono conversion completed successfully: " + output_path);
-    std::string success_message = "SUCCESS:" + output_path;
-    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
-}
-std::vector<std::uint8_t> AcceleratorCore::handle_convert_to_pcm_command(const std::vector<std::uint8_t>& payload) {
-    log_message("Received CONVERT_TO_PCM command with payload size: " + std::to_string(payload.size()) + " bytes");
-    if (payload.size() < sizeof(ProcessAudioRequest)) {
-        std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
-        log_message(error_message);
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    ProcessAudioRequest request;
-    std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
-    std::string input_path(request.input_file_path);
-    std::string output_path(request.output_file_path);
-    log_message("Converting to PCM from: " + input_path + " to: " + output_path);
-    AudioData audio_data = audio_processor->read_wav_file(input_path);
-    if (!audio_data.is_valid) {
-        log_message("Failed to read input file: " + audio_data.error_message);
-        std::string error_message = "ERROR:" + audio_data.error_message;
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    AudioData mono_audio;
-    if (audio_data.number_of_channels > 1) {
-        log_message("Input has " + std::to_string(audio_data.number_of_channels) + " channels, converting to mono");
-        AudioProcessingResult mono_result = audio_processor->convert_to_mono(audio_data);
-        if (!mono_result.success) {
-            log_message("Mono conversion failed: " + mono_result.error_message);
-            std::string error_message = "ERROR:" + mono_result.error_message;
-            return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-        }
-        mono_audio.samples = std::move(mono_result.processed_samples);
-        mono_audio.sample_rate = mono_result.output_sample_rate;
-    } else {
-        mono_audio.samples = std::move(audio_data.samples);
-        mono_audio.sample_rate = audio_data.sample_rate;
-    }
-    mono_audio.number_of_channels = 1;
-    mono_audio.bits_per_sample = 16;
-    mono_audio.is_valid = true;
-    if (!audio_processor->write_wav_file(output_path, mono_audio)) {
-        log_message("Failed to write output file: " + output_path);
-        std::string error_message = "ERROR:Failed to write output file";
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    log_message("PCM conversion completed successfully: " + output_path);
-    std::string success_message = "SUCCESS:" + output_path;
-    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
-}
-std::vector<std::uint8_t> AcceleratorCore::handle_resample_audio_command(const std::vector<std::uint8_t>& payload) {
-    log_message("Received RESAMPLE_AUDIO command with payload size: " + std::to_string(payload.size()) + " bytes");
-    if (payload.size() < sizeof(ProcessAudioRequest)) {
-        std::string error_message = "ERROR:Invalid payload size, expected " + std::to_string(sizeof(ProcessAudioRequest)) + " bytes";
-        log_message(error_message);
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    ProcessAudioRequest request;
-    std::memcpy(&request, payload.data(), sizeof(ProcessAudioRequest));
-    std::string input_path(request.input_file_path);
-    std::string output_path(request.output_file_path);
-    std::uint32_t target_sample_rate = request.target_sample_rate;
-    log_message("Resampling audio from: " + input_path + " to: " + output_path + " at " + std::to_string(target_sample_rate) + " Hz");
-    AudioData audio_data = audio_processor->read_wav_file(input_path);
-    if (!audio_data.is_valid) {
-        log_message("Failed to read input file: " + audio_data.error_message);
-        std::string error_message = "ERROR:" + audio_data.error_message;
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    AudioProcessingResult result = audio_processor->resample_audio(audio_data, target_sample_rate);
-    if (!result.success) {
-        log_message("Resampling failed: " + result.error_message);
-        std::string error_message = "ERROR:" + result.error_message;
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    AudioData output_audio;
-    output_audio.samples = std::move(result.processed_samples);
-    output_audio.sample_rate = result.output_sample_rate;
-    output_audio.number_of_channels = audio_data.number_of_channels;
-    output_audio.bits_per_sample = 16;
-    output_audio.is_valid = true;
-    if (!audio_processor->write_wav_file(output_path, output_audio)) {
-        log_message("Failed to write output file: " + output_path);
-        std::string error_message = "ERROR:Failed to write output file";
-        return std::vector<std::uint8_t>(error_message.begin(), error_message.end());
-    }
-    log_message("Resampling completed successfully: " + output_path);
-    std::string success_message = "SUCCESS:" + output_path;
-    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
-}
-std::vector<std::uint8_t> AcceleratorCore::handle_get_memory_stats_command(const std::vector<std::uint8_t>& payload) {
-    std::uint32_t request_flags = 0;
-    if (payload.size() >= sizeof(std::uint32_t)) {
-        std::memcpy(&request_flags, payload.data(), sizeof(std::uint32_t));
-        log_message("Received GET_MEMORY_STATS command with flags: " + std::to_string(request_flags));
-    } else {
-        log_message("Received GET_MEMORY_STATS command with payload size: " + std::to_string(payload.size()) + " bytes");
-    }
-    MemoryStatsResponse stats;
-    stats.total_allocated_bytes = memory_pool->get_total_allocated_bytes();
-    stats.total_used_bytes = memory_pool->get_total_used_bytes();
-    stats.block_count = memory_pool->get_block_count();
-    bool include_detailed_log = (request_flags & 0x01) != 0;
-    if (include_detailed_log) {
-        log_message("Memory stats (detailed) - Allocated: " + std::to_string(stats.total_allocated_bytes) +
-                    " bytes (" + std::to_string(stats.total_allocated_bytes / (1024 * 1024)) + " MB)" +
-                    ", Used: " + std::to_string(stats.total_used_bytes) +
-                    " bytes (" + std::to_string(stats.total_used_bytes / (1024 * 1024)) + " MB)" +
-                    ", Blocks: " + std::to_string(stats.block_count) +
-                    ", Utilization: " + std::to_string(stats.total_allocated_bytes > 0 ?
-                        (stats.total_used_bytes * 100 / stats.total_allocated_bytes) : 0) + "%");
-    } else {
-        log_message("Memory stats - Allocated: " + std::to_string(stats.total_allocated_bytes) +
-                    " bytes, Used: " + std::to_string(stats.total_used_bytes) +
-                    " bytes, Blocks: " + std::to_string(stats.block_count));
-    }
-    std::vector<std::uint8_t> response(sizeof(MemoryStatsResponse));
-    std::memcpy(response.data(), &stats, sizeof(MemoryStatsResponse));
-    return response;
-}
-std::vector<std::uint8_t> AcceleratorCore::handle_clear_memory_pool_command(const std::vector<std::uint8_t>& payload) {
-    std::uint32_t clear_flags = 0;
-    if (payload.size() >= sizeof(std::uint32_t)) {
-        std::memcpy(&clear_flags, payload.data(), sizeof(std::uint32_t));
-        log_message("Received CLEAR_MEMORY_POOL command with flags: " + std::to_string(clear_flags));
-    } else {
-        log_message("Received CLEAR_MEMORY_POOL command with payload size: " + std::to_string(payload.size()) + " bytes");
-    }
-    std::size_t blocks_before = memory_pool->get_block_count();
-    std::size_t allocated_before = memory_pool->get_total_allocated_bytes();
-    std::size_t used_before = memory_pool->get_total_used_bytes();
-    bool force_full_reset = (clear_flags & 0x01) != 0;
-    if (force_full_reset) {
-        log_message("Performing full memory pool reset (force flag set)");
-        memory_pool->reset_pool();
-    } else {
-        log_message("Clearing unused memory blocks");
-        memory_pool->clear_unused_blocks();
-    }
-    std::size_t blocks_after = memory_pool->get_block_count();
-    std::size_t allocated_after = memory_pool->get_total_allocated_bytes();
-    std::size_t used_after = memory_pool->get_total_used_bytes();
-    std::size_t blocks_freed = blocks_before - blocks_after;
-    std::size_t bytes_freed = allocated_before - allocated_after;
-    log_message("Memory pool cleared - Before: " + std::to_string(blocks_before) + " blocks (" +
-                std::to_string(allocated_before) + " bytes allocated, " +
-                std::to_string(used_before) + " bytes used) -> After: " +
-                std::to_string(blocks_after) + " blocks (" +
-                std::to_string(allocated_after) + " bytes allocated, " +
-                std::to_string(used_after) + " bytes used) -> Freed: " +
-                std::to_string(blocks_freed) + " blocks (" +
-                std::to_string(bytes_freed) + " bytes)");
-    std::string success_message = "SUCCESS:Freed " + std::to_string(blocks_freed) +
-                                  " blocks (" + std::to_string(bytes_freed) + " bytes)";
-    if (force_full_reset) {
-        success_message += " [full reset]";
-    }
-    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
-}
-std::vector<std::uint8_t> AcceleratorCore::handle_shutdown_command(const std::vector<std::uint8_t>& payload) {
-    std::string shutdown_reason;
-    if (!payload.empty()) {
-        shutdown_reason = std::string(payload.begin(), payload.end());
-        log_message("Received SHUTDOWN command with reason: " + shutdown_reason);
-    } else {
-        log_message("Received SHUTDOWN command");
-    }
-    std::string success_message = "SUCCESS:Shutting down";
-    if (!shutdown_reason.empty()) {
-        success_message += " (reason: " + shutdown_reason + ")";
-    }
-    std::thread shutdown_thread([this]() {
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-        this->shutdown();
-    });
-    shutdown_thread.detach();
-    return std::vector<std::uint8_t>(success_message.begin(), success_message.end());
-}
-void AcceleratorCore::log_message(const std::string& message) const {
-    if (config.enable_verbose_logging) {
-        auto now = std::chrono::system_clock::now();
-        std::time_t time_t_now = std::chrono::system_clock::to_time_t(now);
-        auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(
-            now.time_since_epoch()
-        ) % 1000;
-        std::tm time_info;
-        localtime_r(&time_t_now, &time_info);
-        std::ostringstream timestamp_stream;
-        timestamp_stream << std::put_time(&time_info, "%Y-%m-%d %H:%M:%S");
-        timestamp_stream << '.' << std::setfill('0') << std::setw(3) << milliseconds.count();
-        std::cout << "[" << timestamp_stream.str() << "] " << message << std::endl;
-        std::cout.flush();
-    }
-}
-}

accelerator/src/audio_processor.cpp DELETED Viewed

@@ -1,352 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#include "audio_processor.hpp"
-#include <algorithm>
-#include <cmath>
-#include <cstring>
-#include <fstream>
-namespace pocket_tts_accelerator {
-AudioProcessor::AudioProcessor(MemoryPool& shared_memory_pool)
-    : memory_pool(shared_memory_pool) {
-}
-AudioProcessor::~AudioProcessor() {
-}
-AudioData AudioProcessor::read_wav_file(const std::string& file_path) {
-    AudioData result;
-    result.is_valid = false;
-    std::ifstream file_stream(file_path, std::ios::binary);
-    if (!file_stream.is_open()) {
-        result.error_message = "Failed to open file: " + file_path;
-        return result;
-    }
-    WavFileHeader header;
-    file_stream.read(reinterpret_cast<char*>(&header), sizeof(WavFileHeader));
-    if (file_stream.gcount() < static_cast<std::streamsize>(sizeof(WavFileHeader))) {
-        result.error_message = "File is too small to be a valid WAV file";
-        return result;
-    }
-    if (!validate_wav_header(header)) {
-        result.error_message = "Invalid WAV file header";
-        return result;
-    }
-    result.sample_rate = header.sample_rate;
-    result.number_of_channels = header.number_of_channels;
-    result.bits_per_sample = header.bits_per_sample;
-    std::size_t sample_count = header.data_size / (header.bits_per_sample / 8);
-    result.samples.resize(sample_count);
-    if (header.bits_per_sample == 16) {
-        file_stream.read(reinterpret_cast<char*>(result.samples.data()), header.data_size);
-    } else if (header.bits_per_sample == 8) {
-        std::vector<std::uint8_t> raw_data(sample_count);
-        file_stream.read(reinterpret_cast<char*>(raw_data.data()), header.data_size);
-        convert_uint8_to_int16(raw_data.data(), result.samples.data(), sample_count);
-    } else if (header.bits_per_sample == 32) {
-        if (header.audio_format == 3) {
-            std::vector<float> raw_data(sample_count);
-            file_stream.read(reinterpret_cast<char*>(raw_data.data()), header.data_size);
-            convert_float32_to_int16(raw_data.data(), result.samples.data(), sample_count);
-        } else {
-            std::vector<std::int32_t> raw_data(sample_count);
-            file_stream.read(reinterpret_cast<char*>(raw_data.data()), header.data_size);
-            convert_int32_to_int16(raw_data.data(), result.samples.data(), sample_count);
-        }
-    }
-    result.is_valid = true;
-    return result;
-}
-bool AudioProcessor::write_wav_file(const std::string& file_path, const AudioData& audio_data) {
-    std::ofstream file_stream(file_path, std::ios::binary);
-    if (!file_stream.is_open()) {
-        return false;
-    }
-    std::uint32_t data_size = static_cast<std::uint32_t>(audio_data.samples.size() * sizeof(std::int16_t));
-    std::uint32_t file_size = data_size + 36;
-    WavFileHeader header;
-    std::memcpy(header.riff_marker, "RIFF", 4);
-    header.file_size = file_size;
-    std::memcpy(header.wave_marker, "WAVE", 4);
-    std::memcpy(header.format_marker, "fmt ", 4);
-    header.format_chunk_size = 16;
-    header.audio_format = 1;
-    header.number_of_channels = audio_data.number_of_channels;
-    header.sample_rate = audio_data.sample_rate;
-    header.bits_per_sample = 16;
-    header.byte_rate = audio_data.sample_rate * audio_data.number_of_channels * 2;
-    header.block_align = audio_data.number_of_channels * 2;
-    std::memcpy(header.data_marker, "data", 4);
-    header.data_size = data_size;
-    file_stream.write(reinterpret_cast<const char*>(&header), sizeof(WavFileHeader));
-    file_stream.write(reinterpret_cast<const char*>(audio_data.samples.data()), data_size);
-    return file_stream.good();
-}
-AudioProcessingResult AudioProcessor::convert_to_mono(const AudioData& input_audio) {
-    AudioProcessingResult result;
-    result.success = false;
-    if (!input_audio.is_valid) {
-        result.error_message = "Invalid input audio";
-        return result;
-    }
-    if (input_audio.number_of_channels == 1) {
-        result.processed_samples = input_audio.samples;
-        result.output_sample_rate = input_audio.sample_rate;
-        result.success = true;
-        return result;
-    }
-    std::size_t frame_count = input_audio.samples.size() / input_audio.number_of_channels;
-    result.processed_samples.resize(frame_count);
-    mix_channels_to_mono(
-        input_audio.samples.data(),
-        result.processed_samples.data(),
-        frame_count,
-        input_audio.number_of_channels
-    );
-    result.output_sample_rate = input_audio.sample_rate;
-    result.success = true;
-    return result;
-}
-AudioProcessingResult AudioProcessor::convert_to_pcm_int16(const AudioData& input_audio) {
-    AudioProcessingResult result;
-    result.success = false;
-    if (!input_audio.is_valid) {
-        result.error_message = "Invalid input audio";
-        return result;
-    }
-    result.processed_samples = input_audio.samples;
-    result.output_sample_rate = input_audio.sample_rate;
-    result.success = true;
-    return result;
-}
-AudioProcessingResult AudioProcessor::resample_audio(const AudioData& input_audio, std::uint32_t target_sample_rate) {
-    AudioProcessingResult result;
-    result.success = false;
-    if (!input_audio.is_valid) {
-        result.error_message = "Invalid input audio";
-        return result;
-    }
-    if (input_audio.sample_rate == target_sample_rate) {
-        result.processed_samples = input_audio.samples;
-        result.output_sample_rate = target_sample_rate;
-        result.success = true;
-        return result;
-    }
-    double ratio = static_cast<double>(target_sample_rate) / static_cast<double>(input_audio.sample_rate);
-    std::size_t output_sample_count = static_cast<std::size_t>(input_audio.samples.size() * ratio);
-    result.processed_samples.resize(output_sample_count);
-    for (std::size_t output_index = 0; output_index < output_sample_count; ++output_index) {
-        double source_position = output_index / ratio;
-        std::size_t source_index_floor = static_cast<std::size_t>(source_position);
-        std::size_t source_index_ceil = source_index_floor + 1;
-        double fractional_part = source_position - source_index_floor;
-        if (source_index_ceil >= input_audio.samples.size()) {
-            source_index_ceil = input_audio.samples.size() - 1;
-        }
-        double interpolated_value =
-            input_audio.samples[source_index_floor] * (1.0 - fractional_part) +
-            input_audio.samples[source_index_ceil] * fractional_part;
-        result.processed_samples[output_index] = static_cast<std::int16_t>(
-            std::clamp(interpolated_value, -32768.0, 32767.0)
-        );
-    }
-    result.output_sample_rate = target_sample_rate;
-    result.success = true;
-    return result;
-}
-AudioProcessingResult AudioProcessor::normalize_audio(const AudioData& input_audio, float target_peak_level) {
-    AudioProcessingResult result;
-    result.success = false;
-    if (!input_audio.is_valid) {
-        result.error_message = "Invalid input audio";
-        return result;
-    }
-    std::int16_t max_absolute_value = 0;
-    for (const std::int16_t sample : input_audio.samples) {
-        std::int16_t absolute_value = static_cast<std::int16_t>(std::abs(sample));
-        if (absolute_value > max_absolute_value) {
-            max_absolute_value = absolute_value;
-        }
-    }
-    if (max_absolute_value == 0) {
-        result.processed_samples = input_audio.samples;
-        result.output_sample_rate = input_audio.sample_rate;
-        result.success = true;
-        return result;
-    }
-    float normalization_factor = (target_peak_level * 32767.0f) / static_cast<float>(max_absolute_value);
-    result.processed_samples.resize(input_audio.samples.size());
-    for (std::size_t index = 0; index < input_audio.samples.size(); ++index) {
-        float normalized_sample = static_cast<float>(input_audio.samples[index]) * normalization_factor;
-        result.processed_samples[index] = static_cast<std::int16_t>(
-            std::clamp(normalized_sample, -32768.0f, 32767.0f)
-        );
-    }
-    result.output_sample_rate = input_audio.sample_rate;
-    result.success = true;
-    return result;
-}
-AudioProcessingResult AudioProcessor::process_audio_for_voice_cloning(
-    const std::string& input_file_path,
-    const std::string& output_file_path
-) {
-    AudioProcessingResult result;
-    result.success = false;
-    AudioData input_audio = read_wav_file(input_file_path);
-    if (!input_audio.is_valid) {
-        result.error_message = "Failed to read input file: " + input_audio.error_message;
-        return result;
-    }
-    AudioProcessingResult mono_result = convert_to_mono(input_audio);
-    if (!mono_result.success) {
-        result.error_message = "Failed to convert to mono: " + mono_result.error_message;
-        return result;
-    }
-    AudioData mono_audio;
-    mono_audio.samples = std::move(mono_result.processed_samples);
-    mono_audio.sample_rate = mono_result.output_sample_rate;
-    mono_audio.number_of_channels = 1;
-    mono_audio.bits_per_sample = 16;
-    mono_audio.is_valid = true;
-    if (!write_wav_file(output_file_path, mono_audio)) {
-        result.error_message = "Failed to write output file";
-        return result;
-    }
-    result.processed_samples = std::move(mono_audio.samples);
-    result.output_sample_rate = mono_audio.sample_rate;
-    result.success = true;
-    return result;
-}
-bool AudioProcessor::validate_wav_header(const WavFileHeader& header) {
-    if (std::memcmp(header.riff_marker, "RIFF", 4) != 0) {
-        return false;
-    }
-    if (std::memcmp(header.wave_marker, "WAVE", 4) != 0) {
-        return false;
-    }
-    if (std::memcmp(header.format_marker, "fmt ", 4) != 0) {
-        return false;
-    }
-    if (header.audio_format != 1 && header.audio_format != 3) {
-        return false;
-    }
-    if (header.number_of_channels < 1 || header.number_of_channels > 16) {
-        return false;
-    }
-    if (header.sample_rate < 100 || header.sample_rate > 384000) {
-        return false;
-    }
-    if (header.bits_per_sample != 8 && header.bits_per_sample != 16 && header.bits_per_sample != 32) {
-        return false;
-    }
-    return true;
-}
-std::size_t AudioProcessor::calculate_audio_duration_milliseconds(const AudioData& audio_data) {
-    if (!audio_data.is_valid || audio_data.sample_rate == 0) {
-        return 0;
-    }
-    std::size_t frame_count = audio_data.samples.size() / audio_data.number_of_channels;
-    return (frame_count * 1000) / audio_data.sample_rate;
-}
-void AudioProcessor::convert_float32_to_int16(const float* input, std::int16_t* output, std::size_t sample_count) {
-    for (std::size_t index = 0; index < sample_count; ++index) {
-        float clamped_value = std::clamp(input[index], -1.0f, 1.0f);
-        output[index] = static_cast<std::int16_t>(clamped_value * 32767.0f);
-    }
-}
-void AudioProcessor::convert_int32_to_int16(const std::int32_t* input, std::int16_t* output, std::size_t sample_count) {
-    for (std::size_t index = 0; index < sample_count; ++index) {
-        output[index] = static_cast<std::int16_t>(input[index] >> 16);
-    }
-}
-void AudioProcessor::convert_uint8_to_int16(const std::uint8_t* input, std::int16_t* output, std::size_t sample_count) {
-    for (std::size_t index = 0; index < sample_count; ++index) {
-        output[index] = static_cast<std::int16_t>((static_cast<std::int16_t>(input[index]) - 128) * 256);
-    }
-}
-void AudioProcessor::mix_channels_to_mono(
-    const std::int16_t* input,
-    std::int16_t* output,
-    std::size_t frame_count,
-    std::uint16_t channel_count
-) {
-    for (std::size_t frame_index = 0; frame_index < frame_count; ++frame_index) {
-        std::int32_t sum = 0;
-        for (std::uint16_t channel_index = 0; channel_index < channel_count; ++channel_index) {
-            sum += input[frame_index * channel_count + channel_index];
-        }
-        output[frame_index] = static_cast<std::int16_t>(sum / channel_count);
-    }
-}
-}

accelerator/src/ipc_handler.cpp DELETED Viewed

@@ -1,226 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#include "ipc_handler.hpp"
-#include <cstring>
-#include <iostream>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <unistd.h>
-namespace pocket_tts_accelerator {
-IpcHandler::IpcHandler(const std::string& socket_path)
-    : socket_file_path(socket_path)
-    , server_socket_fd(-1)
-    , is_server_running(false) {
-}
-IpcHandler::~IpcHandler() {
-    stop_server();
-}
-bool IpcHandler::start_server() {
-    if (is_server_running.load()) {
-        return true;
-    }
-    unlink(socket_file_path.c_str());
-    server_socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
-    if (server_socket_fd < 0) {
-        std::cerr << "Failed to create socket: " << strerror(errno) << std::endl;
-        return false;
-    }
-    struct sockaddr_un server_address;
-    std::memset(&server_address, 0, sizeof(server_address));
-    server_address.sun_family = AF_UNIX;
-    std::strncpy(server_address.sun_path, socket_file_path.c_str(), sizeof(server_address.sun_path) - 1);
-    if (bind(server_socket_fd, reinterpret_cast<struct sockaddr*>(&server_address), sizeof(server_address)) < 0) {
-        std::cerr << "Failed to bind socket: " << strerror(errno) << std::endl;
-        close(server_socket_fd);
-        server_socket_fd = -1;
-        return false;
-    }
-    if (listen(server_socket_fd, CONNECTION_BACKLOG) < 0) {
-        std::cerr << "Failed to listen on socket: " << strerror(errno) << std::endl;
-        close(server_socket_fd);
-        server_socket_fd = -1;
-        return false;
-    }
-    is_server_running.store(true);
-    accept_thread = std::thread(&IpcHandler::accept_connections_loop, this);
-    return true;
-}
-void IpcHandler::stop_server() {
-    if (!is_server_running.load()) {
-        return;
-    }
-    is_server_running.store(false);
-    if (server_socket_fd >= 0) {
-        shutdown(server_socket_fd, SHUT_RDWR);
-        close(server_socket_fd);
-        server_socket_fd = -1;
-    }
-    if (accept_thread.joinable()) {
-        accept_thread.join();
-    }
-    unlink(socket_file_path.c_str());
-}
-bool IpcHandler::is_running() const {
-    return is_server_running.load();
-}
-void IpcHandler::register_command_handler(CommandType command_type, CommandHandlerFunction handler) {
-    std::unique_lock<std::mutex> lock(handlers_mutex);
-    command_handlers[command_type] = std::move(handler);
-}
-void IpcHandler::set_shutdown_callback(std::function<void()> callback) {
-    shutdown_callback = std::move(callback);
-}
-void IpcHandler::accept_connections_loop() {
-    while (is_server_running.load()) {
-        struct sockaddr_un client_address;
-        socklen_t client_address_length = sizeof(client_address);
-        int client_socket_fd = accept(
-            server_socket_fd,
-            reinterpret_cast<struct sockaddr*>(&client_address),
-            &client_address_length
-        );
-        if (client_socket_fd < 0) {
-            if (!is_server_running.load()) {
-                break;
-            }
-            continue;
-        }
-        handle_client_connection(client_socket_fd);
-        close(client_socket_fd);
-    }
-}
-void IpcHandler::handle_client_connection(int client_socket_fd) {
-    RequestHeader request_header;
-    std::vector<std::uint8_t> request_payload;
-    if (!receive_request(client_socket_fd, request_header, request_payload)) {
-        return;
-    }
-    if (request_header.magic_number != PROTOCOL_MAGIC_NUMBER) {
-        ResponseHeader error_response;
-        error_response.magic_number = PROTOCOL_MAGIC_NUMBER;
-        error_response.status_code = static_cast<std::uint32_t>(ResponseStatus::ERROR_INVALID_COMMAND);
-        error_response.payload_size = 0;
-        error_response.request_id = request_header.request_id;
-        send_response(client_socket_fd, error_response, {});
-        return;
-    }
-    CommandType command_type = static_cast<CommandType>(request_header.command_type);
-    std::vector<std::uint8_t> response_payload;
-    ResponseStatus status = ResponseStatus::SUCCESS;
-    {
-        std::unique_lock<std::mutex> lock(handlers_mutex);
-        auto handler_iterator = command_handlers.find(command_type);
-        if (handler_iterator != command_handlers.end()) {
-            try {
-                response_payload = handler_iterator->second(request_payload);
-            } catch (const std::exception& exception) {
-                std::cerr << "Handler exception: " << exception.what() << std::endl;
-                status = ResponseStatus::ERROR_INTERNAL;
-            } catch (...) {
-                std::cerr << "Handler unknown exception" << std::endl;
-                status = ResponseStatus::ERROR_INTERNAL;
-            }
-        } else {
-            status = ResponseStatus::ERROR_INVALID_COMMAND;
-        }
-    }
-    ResponseHeader response_header;
-    response_header.magic_number = PROTOCOL_MAGIC_NUMBER;
-    response_header.status_code = static_cast<std::uint32_t>(status);
-    response_header.payload_size = static_cast<std::uint32_t>(response_payload.size());
-    response_header.request_id = request_header.request_id;
-    send_response(client_socket_fd, response_header, response_payload);
-}
-bool IpcHandler::send_response(
-    int socket_fd,
-    const ResponseHeader& header,
-    const std::vector<std::uint8_t>& payload
-) {
-    ssize_t bytes_written = write(socket_fd, &header, sizeof(ResponseHeader));
-    if (bytes_written != sizeof(ResponseHeader)) {
-        return false;
-    }
-    if (!payload.empty()) {
-        bytes_written = write(socket_fd, payload.data(), payload.size());
-        if (bytes_written != static_cast<ssize_t>(payload.size())) {
-            return false;
-        }
-    }
-    return true;
-}
-bool IpcHandler::receive_request(
-    int socket_fd,
-    RequestHeader& header,
-    std::vector<std::uint8_t>& payload
-) {
-    ssize_t bytes_read = read(socket_fd, &header, sizeof(RequestHeader));
-    if (bytes_read != sizeof(RequestHeader)) {
-        return false;
-    }
-    if (header.payload_size > MAXIMUM_PAYLOAD_SIZE) {
-        return false;
-    }
-    if (header.payload_size > 0) {
-        payload.resize(header.payload_size);
-        std::size_t total_read = 0;
-        while (total_read < header.payload_size) {
-            bytes_read = read(socket_fd, payload.data() + total_read, header.payload_size - total_read);
-            if (bytes_read <= 0) {
-                return false;
-            }
-            total_read += static_cast<std::size_t>(bytes_read);
-        }
-    }
-    return true;
-}
-}

accelerator/src/main.cpp DELETED Viewed

@@ -1,83 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#include "accelerator_core.hpp"
-#include <cstdlib>
-#include <cstring>
-#include <iostream>
-#include <string>
-void print_usage(const char* program_name) {
-    std::cout << "Usage: " << program_name << " [options]" << std::endl;
-    std::cout << std::endl;
-    std::cout << "Options:" << std::endl;
-    std::cout << "  --socket PATH     IPC socket path (default: /tmp/pocket_tts_accelerator.sock)" << std::endl;
-    std::cout << "  --threads N       Number of worker threads (default: 2)" << std::endl;
-    std::cout << "  --memory MB       Memory pool size in megabytes (default: 64)" << std::endl;
-    std::cout << "  --quiet           Disable verbose logging" << std::endl;
-    std::cout << "  --help            Show this help message" << std::endl;
-    std::cout.flush();
-}
-int main(int argc, char* argv[]) {
-    std::cout.setf(std::ios::unitbuf);
-    std::cerr.setf(std::ios::unitbuf);
-    pocket_tts_accelerator::AcceleratorConfiguration configuration =
-        pocket_tts_accelerator::AcceleratorCore::get_default_configuration();
-    for (int argument_index = 1; argument_index < argc; ++argument_index) {
-        std::string argument(argv[argument_index]);
-        if (argument == "--help" || argument == "-h") {
-            print_usage(argv[0]);
-            return 0;
-        }
-        if (argument == "--socket" && argument_index + 1 < argc) {
-            configuration.ipc_socket_path = argv[++argument_index];
-            continue;
-        }
-        if (argument == "--threads" && argument_index + 1 < argc) {
-            configuration.number_of_worker_threads = std::stoul(argv[++argument_index]);
-            continue;
-        }
-        if (argument == "--memory" && argument_index + 1 < argc) {
-            std::size_t memory_mb = std::stoul(argv[++argument_index]);
-            configuration.memory_pool_size_bytes = memory_mb * 1024 * 1024;
-            continue;
-        }
-        if (argument == "--quiet" || argument == "-q") {
-            configuration.enable_verbose_logging = false;
-            continue;
-        }
-        std::cerr << "Unknown argument: " << argument << std::endl;
-        print_usage(argv[0]);
-        return 1;
-    }
-    if (configuration.number_of_worker_threads < 1) {
-        configuration.number_of_worker_threads = 1;
-    }
-    if (configuration.number_of_worker_threads > 2) {
-        configuration.number_of_worker_threads = 2;
-    }
-    pocket_tts_accelerator::AcceleratorCore accelerator(configuration);
-    if (!accelerator.initialize()) {
-        std::cerr << "Failed to initialize accelerator" << std::endl;
-        return 1;
-    }
-    accelerator.run();
-    return 0;
-}

accelerator/src/memory_pool.cpp DELETED Viewed

@@ -1,216 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#include "memory_pool.hpp"
-#include <algorithm>
-#include <chrono>
-#include <cstring>
-namespace pocket_tts_accelerator {
-MemoryPool::MemoryPool(std::size_t initial_pool_size_bytes)
-    : total_allocated_bytes(0)
-    , total_used_bytes(0)
-    , maximum_pool_size_bytes(initial_pool_size_bytes) {
-}
-MemoryPool::~MemoryPool() {
-    reset_pool();
-}
-std::uint8_t* MemoryPool::allocate(std::size_t requested_size_bytes) {
-    std::unique_lock<std::mutex> lock(pool_mutex);
-    std::size_t block_index = find_suitable_block_index(requested_size_bytes);
-    if (block_index != static_cast<std::size_t>(-1)) {
-        MemoryBlock& existing_block = memory_blocks[block_index];
-        existing_block.is_in_use = true;
-        existing_block.last_access_timestamp = get_current_timestamp();
-        total_used_bytes += existing_block.block_size;
-        return existing_block.data.get();
-    }
-    if (total_allocated_bytes + requested_size_bytes > maximum_pool_size_bytes) {
-        clear_unused_blocks();
-    }
-    std::size_t aligned_size = ((requested_size_bytes + 63) / 64) * 64;
-    memory_blocks.push_back(MemoryBlock{
-        std::make_unique<std::uint8_t[]>(aligned_size),
-        aligned_size,
-        true,
-        get_current_timestamp()
-    });
-    std::uint8_t* allocated_pointer = memory_blocks.back().data.get();
-    pointer_to_block_index[allocated_pointer] = memory_blocks.size() - 1;
-    total_allocated_bytes += aligned_size;
-    total_used_bytes += aligned_size;
-    return allocated_pointer;
-}
-void MemoryPool::deallocate(std::uint8_t* pointer) {
-    if (pointer == nullptr) {
-        return;
-    }
-    std::unique_lock<std::mutex> lock(pool_mutex);
-    auto iterator = pointer_to_block_index.find(pointer);
-    if (iterator != pointer_to_block_index.end()) {
-        std::size_t block_index = iterator->second;
-        if (block_index < memory_blocks.size()) {
-            MemoryBlock& block = memory_blocks[block_index];
-            if (block.is_in_use) {
-                block.is_in_use = false;
-                block.last_access_timestamp = get_current_timestamp();
-                total_used_bytes -= block.block_size;
-            }
-        }
-    }
-}
-void MemoryPool::clear_unused_blocks() {
-    std::vector<std::size_t> indices_to_remove;
-    for (std::size_t index = 0; index < memory_blocks.size(); ++index) {
-        if (!memory_blocks[index].is_in_use) {
-            indices_to_remove.push_back(index);
-        }
-    }
-    std::sort(indices_to_remove.rbegin(), indices_to_remove.rend());
-    for (std::size_t index : indices_to_remove) {
-        std::uint8_t* pointer = memory_blocks[index].data.get();
-        total_allocated_bytes -= memory_blocks[index].block_size;
-        pointer_to_block_index.erase(pointer);
-        memory_blocks.erase(memory_blocks.begin() + static_cast<std::ptrdiff_t>(index));
-    }
-    for (std::size_t index = 0; index < memory_blocks.size(); ++index) {
-        pointer_to_block_index[memory_blocks[index].data.get()] = index;
-    }
-}
-void MemoryPool::reset_pool() {
-    std::unique_lock<std::mutex> lock(pool_mutex);
-    memory_blocks.clear();
-    pointer_to_block_index.clear();
-    total_allocated_bytes = 0;
-    total_used_bytes = 0;
-}
-std::size_t MemoryPool::get_total_allocated_bytes() const {
-    std::unique_lock<std::mutex> lock(pool_mutex);
-    return total_allocated_bytes;
-}
-std::size_t MemoryPool::get_total_used_bytes() const {
-    std::unique_lock<std::mutex> lock(pool_mutex);
-    return total_used_bytes;
-}
-std::size_t MemoryPool::get_block_count() const {
-    std::unique_lock<std::mutex> lock(pool_mutex);
-    return memory_blocks.size();
-}
-std::size_t MemoryPool::find_suitable_block_index(std::size_t requested_size) const {
-    std::size_t best_fit_index = static_cast<std::size_t>(-1);
-    std::size_t best_fit_size = static_cast<std::size_t>(-1);
-    for (std::size_t index = 0; index < memory_blocks.size(); ++index) {
-        const MemoryBlock& block = memory_blocks[index];
-        if (!block.is_in_use && block.block_size >= requested_size) {
-            if (block.block_size < best_fit_size) {
-                best_fit_size = block.block_size;
-                best_fit_index = index;
-            }
-        }
-    }
-    return best_fit_index;
-}
-void MemoryPool::create_new_block(std::size_t block_size) {
-    std::size_t aligned_size = ((block_size + 63) / 64) * 64;
-    memory_blocks.push_back(MemoryBlock{
-        std::make_unique<std::uint8_t[]>(aligned_size),
-        aligned_size,
-        false,
-        get_current_timestamp()
-    });
-    pointer_to_block_index[memory_blocks.back().data.get()] = memory_blocks.size() - 1;
-    total_allocated_bytes += aligned_size;
-}
-std::uint64_t MemoryPool::get_current_timestamp() const {
-    auto current_time = std::chrono::steady_clock::now();
-    auto duration = current_time.time_since_epoch();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
-}
-ScopedMemoryAllocation::ScopedMemoryAllocation(MemoryPool& pool, std::size_t size)
-    : memory_pool_pointer(&pool)
-    , allocated_pointer(pool.allocate(size))
-    , allocation_size(size) {
-}
-ScopedMemoryAllocation::~ScopedMemoryAllocation() {
-    if (memory_pool_pointer != nullptr && allocated_pointer != nullptr) {
-        memory_pool_pointer->deallocate(allocated_pointer);
-    }
-}
-ScopedMemoryAllocation::ScopedMemoryAllocation(ScopedMemoryAllocation&& other) noexcept
-    : memory_pool_pointer(other.memory_pool_pointer)
-    , allocated_pointer(other.allocated_pointer)
-    , allocation_size(other.allocation_size) {
-    other.memory_pool_pointer = nullptr;
-    other.allocated_pointer = nullptr;
-    other.allocation_size = 0;
-}
-ScopedMemoryAllocation& ScopedMemoryAllocation::operator=(ScopedMemoryAllocation&& other) noexcept {
-    if (this != &other) {
-        if (memory_pool_pointer != nullptr && allocated_pointer != nullptr) {
-            memory_pool_pointer->deallocate(allocated_pointer);
-        }
-        memory_pool_pointer = other.memory_pool_pointer;
-        allocated_pointer = other.allocated_pointer;
-        allocation_size = other.allocation_size;
-        other.memory_pool_pointer = nullptr;
-        other.allocated_pointer = nullptr;
-        other.allocation_size = 0;
-    }
-    return *this;
-}
-std::uint8_t* ScopedMemoryAllocation::get() const {
-    return allocated_pointer;
-}
-std::size_t ScopedMemoryAllocation::size() const {
-    return allocation_size;
-}
-}

accelerator/src/thread_pool.cpp DELETED Viewed

@@ -1,84 +0,0 @@
-//
-// SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-// SPDX-License-Identifier: Apache-2.0
-//
-#include "thread_pool.hpp"
-namespace pocket_tts_accelerator {
-ThreadPool::ThreadPool(std::size_t number_of_threads)
-    : should_stop(false)
-    , is_stopped(false)
-    , thread_count(number_of_threads) {
-    worker_threads.reserve(number_of_threads);
-    for (std::size_t thread_index = 0; thread_index < number_of_threads; ++thread_index) {
-        worker_threads.emplace_back(&ThreadPool::worker_thread_function, this);
-    }
-}
-ThreadPool::~ThreadPool() {
-    shutdown();
-}
-void ThreadPool::shutdown() {
-    {
-        std::unique_lock<std::mutex> lock(queue_mutex);
-        if (is_stopped.load()) {
-            return;
-        }
-        should_stop.store(true);
-    }
-    task_available_condition.notify_all();
-    for (std::thread& worker_thread : worker_threads) {
-        if (worker_thread.joinable()) {
-            worker_thread.join();
-        }
-    }
-    is_stopped.store(true);
-}
-bool ThreadPool::is_running() const {
-    return !should_stop.load() && !is_stopped.load();
-}
-std::size_t ThreadPool::get_pending_task_count() const {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    return task_queue.size();
-}
-std::size_t ThreadPool::get_thread_count() const {
-    return thread_count;
-}
-void ThreadPool::worker_thread_function() {
-    while (true) {
-        std::function<void()> task_to_execute;
-        {
-            std::unique_lock<std::mutex> lock(queue_mutex);
-            task_available_condition.wait(lock, [this] {
-                return should_stop.load() || !task_queue.empty();
-            });
-            if (should_stop.load() && task_queue.empty()) {
-                return;
-            }
-            task_to_execute = std::move(task_queue.front());
-            task_queue.pop();
-        }
-        task_to_execute();
-    }
-}
-}

app.py DELETED Viewed

@@ -1,372 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import atexit
-import math
-import torch
-import gradio as gr
-from config import (
-    AVAILABLE_VOICES,
-    DEFAULT_VOICE,
-    DEFAULT_MODEL_VARIANT,
-    DEFAULT_TEMPERATURE,
-    DEFAULT_LSD_DECODE_STEPS,
-    DEFAULT_EOS_THRESHOLD,
-    DEFAULT_NOISE_CLAMP,
-    DEFAULT_FRAMES_AFTER_EOS,
-    MAXIMUM_INPUT_LENGTH,
-    VOICE_MODE_PRESET,
-    VOICE_MODE_CLONE,
-    EXAMPLE_PROMPTS,
-    ACCELERATOR_ENABLED,
-    ACCELERATOR_WORKER_THREADS
-)
-from src.core.authentication import authenticate_huggingface
-authenticate_huggingface()
-if ACCELERATOR_ENABLED:
-    from src.accelerator.client import start_accelerator_daemon, stop_accelerator_daemon
-    accelerator_started = start_accelerator_daemon()
-    if accelerator_started:
-        print("Accelerator daemon started successfully", flush=True)
-    else:
-        print("Accelerator daemon not available, using Python fallback", flush=True)
-    atexit.register(stop_accelerator_daemon)
-from src.core.memory import start_background_cleanup_thread
-start_background_cleanup_thread()
-from src.generation.handler import (
-    perform_speech_generation,
-    request_generation_stop
-)
-from src.ui.state import (
-    check_generate_button_state,
-    calculate_character_count_display,
-    determine_clear_button_visibility,
-    update_voice_mode_visibility
-)
-from src.ui.handlers import (
-    switch_to_generating_state,
-    switch_to_idle_state,
-    perform_clear_action,
-    create_example_handler,
-    format_example_button_label
-)
-from assets.css.styles import CSS
-from assets.static.title import TITLE
-from assets.static.header import HEADER
-from assets.static.footer import FOOTER
-from assets.static.sidebar import SIDEBAR
-with gr.Blocks(css=CSS, fill_height=False, fill_width=True) as app:
-    torch.set_num_threads(ACCELERATOR_WORKER_THREADS)
-    torch.set_num_interop_threads(ACCELERATOR_WORKER_THREADS)
-    ui_state = gr.State({"generating": False})
-    with gr.Sidebar():
-        gr.HTML(SIDEBAR())
-    with gr.Column(elem_classes="header-section"):
-        gr.HTML(TITLE())
-        gr.HTML(HEADER())
-    with gr.Row():
-        with gr.Column():
-            audio_output_component = gr.Audio(
-                label="Generated Speech Output",
-                type="filepath",
-                interactive=False
-            )
-            with gr.Accordion("Voice Selection", open=True):
-                voice_mode_radio = gr.Radio(
-                    label="Voice Mode",
-                    choices=[
-                        VOICE_MODE_PRESET,
-                        VOICE_MODE_CLONE
-                    ],
-                    value=VOICE_MODE_PRESET,
-                    info="Choose between preset voices or clone a voice from uploaded audio",
-                    elem_id="voice-mode"
-                )
-                with gr.Column(visible=True) as preset_voice_container:
-                    voice_preset_dropdown = gr.Dropdown(
-                        label="Select Preset Voice",
-                        choices=AVAILABLE_VOICES,
-                        value=DEFAULT_VOICE
-                    )
-                with gr.Column(visible=False) as clone_voice_container:
-                    voice_clone_audio_input = gr.Audio(
-                        label="Upload Audio for Voice Cloning",
-                        type="filepath"
-                    )
-            with gr.Accordion("Model Parameters", open=False):
-                with gr.Row():
-                    temperature_slider = gr.Slider(
-                        label="Temperature",
-                        minimum=0.1,
-                        maximum=2.0,
-                        step=0.05,
-                        value=DEFAULT_TEMPERATURE,
-                        info="Higher values produce more expressive speech"
-                    )
-                    lsd_decode_steps_slider = gr.Slider(
-                        label="LSD Decode Steps",
-                        minimum=1,
-                        maximum=20,
-                        step=1,
-                        value=DEFAULT_LSD_DECODE_STEPS,
-                        info="More steps may improve quality but slower"
-                    )
-                with gr.Row():
-                    noise_clamp_slider = gr.Slider(
-                        label="Noise Clamp",
-                        minimum=0.0,
-                        maximum=2.0,
-                        step=0.05,
-                        value=DEFAULT_NOISE_CLAMP,
-                        info="Maximum noise sampling value (0 = disabled)"
-                    )
-                    eos_threshold_slider = gr.Slider(
-                        label="End of Sequence Threshold",
-                        minimum=-10.0,
-                        maximum=0.0,
-                        step=0.25,
-                        value=DEFAULT_EOS_THRESHOLD,
-                        info="Smaller values cause earlier completion"
-                    )
-            with gr.Accordion("Advanced Settings", open=False):
-                model_variant_textbox = gr.Textbox(
-                    label="Model Variant Identifier",
-                    value=DEFAULT_MODEL_VARIANT,
-                    info="Model signature for generation"
-                )
-                with gr.Row():
-                    enable_custom_frames_checkbox = gr.Checkbox(
-                        label="Enable Custom Frames After EOS",
-                        value=False,
-                        info="Manually control post-EOS frame generation"
-                    )
-                    frames_after_eos_slider = gr.Slider(
-                        label="Frames After EOS",
-                        minimum=0,
-                        maximum=100,
-                        step=1,
-                        value=DEFAULT_FRAMES_AFTER_EOS,
-                        info="Additional frames after end-of-sequence (80ms per frame)"
-                    )
-        with gr.Column(scale=1):
-            text_input_component = gr.Textbox(
-                label="Prompt",
-                placeholder="Enter the text you want to convert to speech...",
-                lines=2,
-                max_lines=20,
-                max_length=MAXIMUM_INPUT_LENGTH,
-                autoscroll=True
-            )
-            character_count_display = gr.HTML(
-                f"""
-                <div class="character-count">
-                    <span>0 / {MAXIMUM_INPUT_LENGTH}</span>
-                </div>
-                """,
-                visible=False
-            )
-            generate_button = gr.Button(
-                "Generate",
-                variant="primary",
-                size="lg",
-                interactive=False
-            )
-            stop_button = gr.Button(
-                "Stop",
-                variant="stop",
-                size="lg",
-                visible=False
-            )
-            clear_button = gr.Button(
-                "Clear",
-                variant="secondary",
-                size="lg",
-                visible=False
-            )
-            gr.HTML(
-                """
-                <div class="example-prompts">
-                    <h3>Example Prompts</h3>
-                    <p>Click any example to generate speech with its assigned voice</p>
-                </div>
-                """
-            )
-            example_buttons_list = []
-            num_examples = len(EXAMPLE_PROMPTS)
-            examples_per_row = 2
-            num_rows = math.ceil(num_examples / examples_per_row)
-            for row_idx in range(num_rows):
-                with gr.Row():
-                    start_idx = row_idx * examples_per_row
-                    end_idx = min(start_idx + examples_per_row, num_examples)
-                    for i in range(start_idx, end_idx):
-                        btn = gr.Button(
-                            format_example_button_label(
-                                EXAMPLE_PROMPTS[i]["text"],
-                                EXAMPLE_PROMPTS[i]["voice"]
-                            ),
-                            size="sm",
-                            variant="secondary"
-                        )
-                        example_buttons_list.append(btn)
-    gr.HTML(FOOTER())
-    generation_inputs = [
-        text_input_component,
-        voice_mode_radio,
-        voice_preset_dropdown,
-        voice_clone_audio_input,
-        model_variant_textbox,
-        lsd_decode_steps_slider,
-        temperature_slider,
-        noise_clamp_slider,
-        eos_threshold_slider,
-        frames_after_eos_slider,
-        enable_custom_frames_checkbox
-    ]
-    voice_mode_radio.change(
-        fn=update_voice_mode_visibility,
-        inputs=[voice_mode_radio],
-        outputs=[
-            preset_voice_container,
-            clone_voice_container
-        ]
-    )
-    text_input_component.change(
-        fn=calculate_character_count_display,
-        inputs=[text_input_component],
-        outputs=[character_count_display]
-    )
-    text_input_component.change(
-        fn=check_generate_button_state,
-        inputs=[
-            text_input_component,
-            ui_state
-        ],
-        outputs=[generate_button]
-    )
-    text_input_component.change(
-        fn=determine_clear_button_visibility,
-        inputs=[
-            text_input_component,
-            ui_state
-        ],
-        outputs=[clear_button]
-    )
-    generate_button.click(
-        fn=switch_to_generating_state,
-        inputs=[ui_state],
-        outputs=[
-            generate_button,
-            stop_button,
-            clear_button,
-            ui_state
-        ]
-    ).then(
-        fn=perform_speech_generation,
-        inputs=generation_inputs,
-        outputs=[audio_output_component]
-    ).then(
-        fn=switch_to_idle_state,
-        inputs=[
-            text_input_component,
-            ui_state
-        ],
-        outputs=[
-            generate_button,
-            stop_button,
-            clear_button,
-            ui_state
-        ]
-    )
-    stop_button.click(
-        fn=request_generation_stop,
-        outputs=[stop_button]
-    )
-    clear_button.click(
-        fn=perform_clear_action,
-        outputs=[
-            text_input_component,
-            audio_output_component,
-            clear_button,
-            voice_mode_radio,
-            voice_preset_dropdown,
-            voice_clone_audio_input
-        ]
-    )
-    for button_index, example_button in enumerate(example_buttons_list):
-        example_text = EXAMPLE_PROMPTS[button_index]["text"]
-        example_voice = EXAMPLE_PROMPTS[button_index]["voice"]
-        example_button.click(
-            fn=switch_to_generating_state,
-            inputs=[ui_state],
-            outputs=[
-                generate_button,
-                stop_button,
-                clear_button,
-                ui_state
-            ]
-        ).then(
-            fn=create_example_handler(example_text, example_voice),
-            outputs=[
-                text_input_component,
-                voice_mode_radio,
-                voice_preset_dropdown
-            ]
-        ).then(
-            fn=perform_speech_generation,
-            inputs=generation_inputs,
-            outputs=[audio_output_component]
-        ).then(
-            fn=switch_to_idle_state,
-            inputs=[
-                text_input_component,
-                ui_state
-            ],
-            outputs=[
-                generate_button,
-                stop_button,
-                clear_button,
-                ui_state
-            ]
-        )
-app.launch(
-    server_name="0.0.0.0",
-    max_file_size="1mb"
-)

assets/css/styles.py DELETED Viewed

@@ -1,161 +0,0 @@
-#
-# https://huggingface.co/spaces/D3vShoaib/pocket-tts
-#
-CSS = """
-footer {
-    visibility: hidden;
-}
-.gradio-container {
-    max-width: 100% !important;
-    padding: 0 !important;
-}
-@media (min-width: 768px) {
-    .gradio-container {
-        padding-left: 2% !important;
-        padding-right: 2% !important;
-    }
-}
-.header-section {
-    text-align: left;
-    margin-bottom: 1.5rem;
-}
-.main-title {
-    color: #10b981;
-    font-weight: 800;
-    font-size: 1.8rem;
-    margin: 5px 0;
-}
-@media (min-width: 768px) {
-    .main-title {
-        font-size: 2.2rem;
-    }
-}
-.logo-container {
-    display: flex;
-    justify-content: flex-start;
-    align-items: center;
-    gap: 10px;
-    margin-bottom: 0;
-}
-.logo-img {
-    height: 40px;
-    border-radius: 8px;
-}
-@media (min-width: 768px) {
-    .logo-img {
-        height: 50px;
-    }
-    .logo-container {
-        gap: 15px;
-    }
-}
-.links-row {
-    display: flex;
-    flex-wrap: wrap;
-    justify-content: flex-start;
-    gap: 8px;
-    margin: 5px 0 10px 0;
-    font-size: 0.85rem;
-}
-@media (min-width: 768px) {
-    .links-row {
-        gap: 10px;
-        font-size: 0.9rem;
-    }
-}
-.links-row a {
-    color: #10b981;
-    text-decoration: none;
-    padding: 3px 10px;
-    border: 1px solid #10b981;
-    border-radius: 15px;
-    transition: all 0.2s;
-    white-space: nowrap;
-}
-.links-row a:hover {
-    background-color: #10b981;
-    color: white;
-}
-.disclaimer {
-    text-align: center;
-    font-size: 10px;
-    line-height: 1.4;
-    color: #9ca3af;
-    margin-top: 30px;
-    padding: 15px;
-    border-top: 1px solid currentColor;
-}
-@media (min-width: 768px) {
-    .disclaimer {
-        margin-top: 40px;
-        padding: 20px;
-    }
-}
-.disclaimer-copyright {
-    opacity: 0.8;
-}
-.disclaimer-warning {
-    font-size: 8px;
-    opacity: 0.7;
-}
-.accent-link {
-    color: #10b981;
-    text-decoration: none;
-}
-#voice-mode .wrap {
-    display: flex !important;
-    flex-direction: row !important;
-    width: 100% !important;
-}
-#voice-mode .wrap label {
-    flex: 1 !important;
-    justify-content: center !important;
-    text-align: center !important;
-}
-.example-prompts {
-    padding: 16px 0 8px 0;
-}
-.example-prompts h3 {
-    margin: 0 0 8px 0;
-    font-size: 1.1em;
-}
-.example-prompts p {
-    margin: 0;
-    opacity: 0.7;
-    font-size: 0.9em;
-}
-.character-count {
-    text-align: right;
-    padding: 4px 0;
-}
-.character-count span {
-    color: var(--body-text-color-subdued);
-    font-size: 0.85em;
-}
-"""

assets/static/footer.py DELETED Viewed

@@ -1,32 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-from config import (
-    COPYRIGHT_NAME,
-    COPYRIGHT_URL,
-    DESIGN_BY_NAME,
-    DESIGN_BY_URL
-)
-def FOOTER():
-    return f"""
-    <div class="disclaimer">
-        <br>
-        <p class="disclaimer-copyright">
-            Copyright © 2026
-            <a href="{COPYRIGHT_URL}" target="_blank" class="accent-link">
-                {COPYRIGHT_NAME}
-            </a>,
-            design inspired by
-            <a href="{DESIGN_BY_URL}" target="_blank" class="accent-link">
-                {DESIGN_BY_NAME}
-            </a>.
-        </p>
-        <p class="disclaimer-warning">
-            ⚠️ This Space is not affiliated with Kyutai TTS and is provided for demonstration purposes only.
-        </p>
-    </div>
-    """

assets/static/header.py DELETED Viewed

@@ -1,18 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-from config import HEADER_LINKS
-def HEADER():
-    data = ""
-    for link in HEADER_LINKS:
-        data += f'<a href="{link["url"]}" target="_blank">{link["icon"]} {link["text"]}</a>\n'
-    return f"""
-    <div class="links-row">
-        {data}
-    </div>
-    """

assets/static/sidebar.py DELETED Viewed

@@ -1,44 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-def SIDEBAR():
-    return f"""
-    <h1>
-        Audio Generation Playground part of the
-        <a href="https://huggingface.co/spaces/hadadxyz/ai" target="_blank" class="accent-link">
-            Demo Playground
-        </a>,
-        and the
-        <a href="https://huggingface.co/umint" target="_blank" class="accent-link">
-            UltimaX Intelligence
-        </a>
-        project.
-    </h1><br />
-    <p>
-        This Space runs the
-        <b>
-            <a href="https://huggingface.co/kyutai/pocket-tts" target="_blank" class="accent-link">
-                Pocket TTS
-            </a>
-        </b>
-        model from <b>Kyutai</b>.<br /><br />
-        A lightweight text-to-speech (TTS) application designed to run
-        efficiently on CPUs. Forget about the hassle of using GPUs and
-        web APIs serving TTS models.<br /><br />
-        Additionally, this Space uses a custom Docker image to
-        maximize model performance and is optimized for the
-        constraints of Hugging Face Spaces.
-    </p><br />
-    <p>
-        <b>Like this project?</b> You can support me by buying a
-        <a href="https://ko-fi.com/hadad" target="_blank" class="accent-link">
-            coffee
-        </a>.
-    </p>
-    """

assets/static/title.py DELETED Viewed

@@ -1,15 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-from config import KYUTAI_LOGO_URL, POCKET_TTS_LOGO_URL, SPACE_INFO
-def TITLE():
-    return f"""
-    <div class="logo-container">
-        <img src="{KYUTAI_LOGO_URL}" class="logo-img" alt="Kyutai Logo">
-        <img src="{POCKET_TTS_LOGO_URL}" class="logo-img" alt="PocketTTS Logo">
-        <h1 class='main-title'>{SPACE_INFO}</h1>
-    </div>
-    """

config.py DELETED Viewed

@@ -1,126 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import os
-HF_TOKEN = os.getenv("HF_TOKEN", None)
-AVAILABLE_VOICES = [
-    "alba",
-    "marius",
-    "javert",
-    "jean",
-    "fantine",
-    "cosette",
-    "eponine",
-    "azelma"
-]
-DEFAULT_VOICE = "alba"
-DEFAULT_MODEL_VARIANT = "b6369a24"
-DEFAULT_TEMPERATURE = 0.7
-DEFAULT_LSD_DECODE_STEPS = 1
-DEFAULT_EOS_THRESHOLD = -4.0
-DEFAULT_NOISE_CLAMP = 0.0
-DEFAULT_FRAMES_AFTER_EOS = 10
-VOICE_MODE_PRESET = "Preset Voices"
-VOICE_MODE_CLONE = "Voice Cloning"
-VOICE_STATE_CACHE_MAXIMUM_SIZE = 8
-VOICE_STATE_CACHE_CLEANUP_THRESHOLD = 4
-BACKGROUND_CLEANUP_INTERVAL = 300
-MAXIMUM_INPUT_LENGTH = 1000
-TEMPORARY_FILE_LIFETIME_SECONDS = 7200
-MAXIMUM_MEMORY_USAGE = 1 * 1024 * 1024 * 1024
-MEMORY_WARNING_THRESHOLD = int(0.7 * MAXIMUM_MEMORY_USAGE)
-MEMORY_CRITICAL_THRESHOLD = int(0.85 * MAXIMUM_MEMORY_USAGE)
-MEMORY_CHECK_INTERVAL = 30
-MEMORY_IDLE_TARGET = int(0.5 * MAXIMUM_MEMORY_USAGE)
-MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES = 1 * 1024 * 1024
-AUDIO_CONVERSION_QUEUE_TIMEOUT_SECONDS = 60
-MODEL_LOAD_RETRY_ATTEMPTS = 3
-SUPPORTED_AUDIO_EXTENSIONS = [
-    ".wav",
-    ".mp3",
-    ".flac",
-    ".ogg",
-    ".m4a",
-    ".aac",
-    ".wma",
-    ".aiff",
-    ".aif",
-    ".opus",
-    ".webm",
-    ".mp4",
-    ".mkv",
-    ".avi",
-    ".mov",
-    ".3gp"
-]
-AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES = {
-    "m4a": "M4A/AAC",
-    "aif": "AIFF",
-    "3gp": "3GP"
-}
-EXAMPLE_PROMPTS = [
-    {
-        "text": "The quick brown fox jumps over the lazy dog near the riverbank.",
-        "voice": "alba"
-    },
-    {
-        "text": "Welcome to the future of text to speech technology powered by artificial intelligence.",
-        "voice": "marius"
-    },
-    {
-        "text": "Technology continues to push the boundaries of what we thought was possible.",
-        "voice": "javert"
-    },
-    {
-        "text": "The weather today is absolutely beautiful and perfect for a relaxing walk outside.",
-        "voice": "fantine"
-    },
-    {
-        "text": "Science and innovation are transforming how we interact with the world around us.",
-        "voice": "jean"
-    }
-]
-KYUTAI_LOGO_URL = "https://cdn-avatars.huggingface.co/v1/production/uploads/6355a3c1805be5a8f30fea49/8xGdIOlfkopZfhbMitw_k.jpeg"
-POCKET_TTS_LOGO_URL = "https://raw.githubusercontent.com/kyutai-labs/pocket-tts/refs/heads/main/docs/logo.png"
-SPACE_INFO = "Pocket TTS"
-HEADER_LINKS = [
-    {"icon": "🔊", "text": "Demo", "url": "https://kyutai.org/tts"},
-    {"icon": "🐱‍💻", "text": "GitHub", "url": "https://github.com/kyutai-labs/pocket-tts"},
-    {"icon": "🤗", "text": "Model Card", "url": "https://huggingface.co/kyutai/pocket-tts"},
-    {"icon": "🤗", "text": "Space", "url": "https://huggingface.co/spaces/hadadxyz/pocket-tts-hf-cpu-optimized"},
-    {"icon": "📄", "text": "Paper", "url": "https://arxiv.org/abs/2509.06926"},
-    {"icon": "📚", "text": "Docs", "url": "https://github.com/kyutai-labs/pocket-tts/tree/main/docs"},
-]
-COPYRIGHT_NAME = "Hadad Darajat"
-COPYRIGHT_URL = "https://www.linkedin.com/in/hadadrjt"
-DESIGN_BY_NAME = "D3vShoaib/pocket-tts"
-DESIGN_BY_URL = f"https://huggingface.co/spaces/{DESIGN_BY_NAME}"
-ACCELERATOR_SOCKET_PATH = "/app/pocket_tts_accelerator.sock"
-ACCELERATOR_BINARY_PATH = "/app/bin/pocket_tts_accelerator"
-ACCELERATOR_WORKER_THREADS = 1
-ACCELERATOR_MEMORY_POOL_MB = 64
-ACCELERATOR_LOG_PREFIX = "[ACCELERATOR]"
-ACCELERATOR_ENABLED = True

src/accelerator/client.py DELETED Viewed

@@ -1,583 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import os
-import socket
-import struct
-import subprocess
-import tempfile
-import threading
-import sys
-from typing import Optional, Tuple, Dict, Any
-from config import (
-    ACCELERATOR_SOCKET_PATH,
-    ACCELERATOR_BINARY_PATH,
-    ACCELERATOR_WORKER_THREADS,
-    ACCELERATOR_MEMORY_POOL_MB,
-    ACCELERATOR_LOG_PREFIX
-)
-from ..core.state import (
-    accelerator_log_lock,
-    accelerator_log_thread,
-    accelerator_log_stop_event
-)
-PROTOCOL_MAGIC_NUMBER = 0x50545453
-COMMAND_PING = 0
-COMMAND_PROCESS_AUDIO = 1
-COMMAND_CONVERT_TO_MONO = 2
-COMMAND_CONVERT_TO_PCM = 3
-COMMAND_RESAMPLE_AUDIO = 4
-COMMAND_GET_MEMORY_STATS = 5
-COMMAND_CLEAR_MEMORY_POOL = 6
-COMMAND_SHUTDOWN = 7
-RESPONSE_SUCCESS = 0
-RESPONSE_ERROR_INVALID_COMMAND = 1
-RESPONSE_ERROR_FILE_NOT_FOUND = 2
-RESPONSE_ERROR_PROCESSING_FAILED = 3
-RESPONSE_ERROR_MEMORY_ALLOCATION = 4
-RESPONSE_ERROR_INTERNAL = 5
-REQUEST_HEADER_FORMAT = "=IIII"
-RESPONSE_HEADER_FORMAT = "=IIII"
-REQUEST_HEADER_SIZE = struct.calcsize(REQUEST_HEADER_FORMAT)
-RESPONSE_HEADER_SIZE = struct.calcsize(RESPONSE_HEADER_FORMAT)
-PROCESS_AUDIO_REQUEST_FORMAT = "=512s512sII"
-PROCESS_AUDIO_REQUEST_SIZE = struct.calcsize(PROCESS_AUDIO_REQUEST_FORMAT)
-MEMORY_STATS_RESPONSE_FORMAT = "=QQQ"
-MEMORY_STATS_RESPONSE_SIZE = struct.calcsize(MEMORY_STATS_RESPONSE_FORMAT)
-accelerator_process_handle = None
-accelerator_process_lock = threading.Lock()
-request_id_counter = 0
-request_id_lock = threading.Lock()
-def log_accelerator_message(message: str):
-    with accelerator_log_lock:
-        print(f"{ACCELERATOR_LOG_PREFIX} {message}", flush=True)
-def stream_accelerator_output(process_handle: subprocess.Popen):
-    try:
-        while not accelerator_log_stop_event.is_set():
-            if process_handle.poll() is not None:
-                break
-            if process_handle.stdout:
-                line = process_handle.stdout.readline()
-                if line:
-                    decoded_line = line.decode("utf-8", errors="replace").rstrip()
-                    if decoded_line:
-                        log_accelerator_message(decoded_line)
-    except Exception as stream_error:
-        log_accelerator_message(f"Log stream error: {str(stream_error)}")
-def stream_accelerator_stderr(process_handle: subprocess.Popen):
-    try:
-        while not accelerator_log_stop_event.is_set():
-            if process_handle.poll() is not None:
-                break
-            if process_handle.stderr:
-                line = process_handle.stderr.readline()
-                if line:
-                    decoded_line = line.decode("utf-8", errors="replace").rstrip()
-                    if decoded_line:
-                        log_accelerator_message(f"[STDERR] {decoded_line}")
-    except Exception as stream_error:
-        log_accelerator_message(f"Stderr stream error: {str(stream_error)}")
-class AcceleratorClient:
-    def __init__(self, socket_path: str = ACCELERATOR_SOCKET_PATH):
-        self.socket_path = socket_path
-        self.connection_timeout = 5.0
-        self.read_timeout = 30.0
-    def is_connected(self) -> bool:
-        try:
-            response = self.send_ping()
-            return response is not None and response.startswith(b"PONG")
-        except Exception:
-            return False
-    def send_ping(self) -> Optional[bytes]:
-        return self._send_command(COMMAND_PING, b"")
-    def process_audio(
-        self,
-        input_file_path: str,
-        output_file_path: str,
-        target_sample_rate: int = 0,
-        options_flags: int = 0
-    ) -> Tuple[bool, str]:
-        payload = self._pack_process_audio_request(
-            input_file_path,
-            output_file_path,
-            target_sample_rate,
-            options_flags
-        )
-        log_accelerator_message(f"Processing audio: {input_file_path} -> {output_file_path}")
-        response = self._send_command(COMMAND_PROCESS_AUDIO, payload)
-        if response is None:
-            log_accelerator_message("Failed to communicate with accelerator for process_audio")
-            return False, "Failed to communicate with accelerator"
-        response_string = response.decode("utf-8", errors="ignore")
-        if response_string.startswith("SUCCESS:"):
-            log_accelerator_message(f"Audio processing succeeded: {response_string[8:]}")
-            return True, response_string[8:]
-        elif response_string.startswith("ERROR:"):
-            log_accelerator_message(f"Audio processing failed: {response_string[6:]}")
-            return False, response_string[6:]
-        else:
-            log_accelerator_message(f"Audio processing unknown response: {response_string}")
-            return False, response_string
-    def convert_to_mono(
-        self,
-        input_file_path: str,
-        output_file_path: str
-    ) -> Tuple[bool, str]:
-        payload = self._pack_process_audio_request(
-            input_file_path,
-            output_file_path,
-            0,
-            0
-        )
-        log_accelerator_message(f"Converting to mono: {input_file_path} -> {output_file_path}")
-        response = self._send_command(COMMAND_CONVERT_TO_MONO, payload)
-        if response is None:
-            log_accelerator_message("Failed to communicate with accelerator for convert_to_mono")
-            return False, "Failed to communicate with accelerator"
-        response_string = response.decode("utf-8", errors="ignore")
-        if response_string.startswith("SUCCESS:"):
-            log_accelerator_message(f"Mono conversion succeeded: {response_string[8:]}")
-            return True, response_string[8:]
-        elif response_string.startswith("ERROR:"):
-            log_accelerator_message(f"Mono conversion failed: {response_string[6:]}")
-            return False, response_string[6:]
-        else:
-            log_accelerator_message(f"Mono conversion unknown response: {response_string}")
-            return False, response_string
-    def convert_to_pcm(
-        self,
-        input_file_path: str,
-        output_file_path: str
-    ) -> Tuple[bool, str]:
-        payload = self._pack_process_audio_request(
-            input_file_path,
-            output_file_path,
-            0,
-            0
-        )
-        log_accelerator_message(f"Converting to PCM: {input_file_path} -> {output_file_path}")
-        response = self._send_command(COMMAND_CONVERT_TO_PCM, payload)
-        if response is None:
-            log_accelerator_message("Failed to communicate with accelerator for convert_to_pcm")
-            return False, "Failed to communicate with accelerator"
-        response_string = response.decode("utf-8", errors="ignore")
-        if response_string.startswith("SUCCESS:"):
-            log_accelerator_message(f"PCM conversion succeeded: {response_string[8:]}")
-            return True, response_string[8:]
-        elif response_string.startswith("ERROR:"):
-            log_accelerator_message(f"PCM conversion failed: {response_string[6:]}")
-            return False, response_string[6:]
-        else:
-            log_accelerator_message(f"PCM conversion unknown response: {response_string}")
-            return False, response_string
-    def resample_audio(
-        self,
-        input_file_path: str,
-        output_file_path: str,
-        target_sample_rate: int
-    ) -> Tuple[bool, str]:
-        payload = self._pack_process_audio_request(
-            input_file_path,
-            output_file_path,
-            target_sample_rate,
-            0
-        )
-        log_accelerator_message(f"Resampling audio to {target_sample_rate}Hz: {input_file_path} -> {output_file_path}")
-        response = self._send_command(COMMAND_RESAMPLE_AUDIO, payload)
-        if response is None:
-            log_accelerator_message("Failed to communicate with accelerator for resample_audio")
-            return False, "Failed to communicate with accelerator"
-        response_string = response.decode("utf-8", errors="ignore")
-        if response_string.startswith("SUCCESS:"):
-            log_accelerator_message(f"Resampling succeeded: {response_string[8:]}")
-            return True, response_string[8:]
-        elif response_string.startswith("ERROR:"):
-            log_accelerator_message(f"Resampling failed: {response_string[6:]}")
-            return False, response_string[6:]
-        else:
-            log_accelerator_message(f"Resampling unknown response: {response_string}")
-            return False, response_string
-    def get_memory_stats(self) -> Optional[Dict[str, int]]:
-        response = self._send_command(COMMAND_GET_MEMORY_STATS, b"")
-        if response is None or len(response) < MEMORY_STATS_RESPONSE_SIZE:
-            log_accelerator_message("Failed to get memory stats from accelerator")
-            return None
-        total_allocated, total_used, block_count = struct.unpack(
-            MEMORY_STATS_RESPONSE_FORMAT,
-            response[:MEMORY_STATS_RESPONSE_SIZE]
-        )
-        stats = {
-            "total_allocated_bytes": total_allocated,
-            "total_used_bytes": total_used,
-            "block_count": block_count
-        }
-        log_accelerator_message(f"Memory stats: allocated={total_allocated}, used={total_used}, blocks={block_count}")
-        return stats
-    def clear_memory_pool(self) -> bool:
-        log_accelerator_message("Clearing accelerator memory pool")
-        response = self._send_command(COMMAND_CLEAR_MEMORY_POOL, b"")
-        success = response is not None
-        if success:
-            log_accelerator_message("Memory pool cleared successfully")
-        else:
-            log_accelerator_message("Failed to clear memory pool")
-        return success
-    def shutdown_accelerator(self) -> bool:
-        log_accelerator_message("Sending shutdown command to accelerator")
-        response = self._send_command(COMMAND_SHUTDOWN, b"")
-        return response is not None
-    def _get_next_request_id(self) -> int:
-        global request_id_counter
-        with request_id_lock:
-            request_id_counter += 1
-            return request_id_counter
-    def _pack_process_audio_request(
-        self,
-        input_path: str,
-        output_path: str,
-        target_sample_rate: int,
-        options_flags: int
-    ) -> bytes:
-        input_path_bytes = input_path.encode("utf-8")[:511] + b"\x00"
-        output_path_bytes = output_path.encode("utf-8")[:511] + b"\x00"
-        input_path_padded = input_path_bytes.ljust(512, b"\x00")
-        output_path_padded = output_path_bytes.ljust(512, b"\x00")
-        return struct.pack(
-            PROCESS_AUDIO_REQUEST_FORMAT,
-            input_path_padded,
-            output_path_padded,
-            target_sample_rate,
-            options_flags
-        )
-    def _send_command(
-        self,
-        command_type: int,
-        payload: bytes
-    ) -> Optional[bytes]:
-        try:
-            client_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
-            client_socket.settimeout(self.connection_timeout)
-            client_socket.connect(self.socket_path)
-            request_id = self._get_next_request_id()
-            request_header = struct.pack(
-                REQUEST_HEADER_FORMAT,
-                PROTOCOL_MAGIC_NUMBER,
-                command_type,
-                len(payload),
-                request_id
-            )
-            client_socket.sendall(request_header)
-            if payload:
-                client_socket.sendall(payload)
-            client_socket.settimeout(self.read_timeout)
-            response_header_data = self._receive_exactly(client_socket, RESPONSE_HEADER_SIZE)
-            if response_header_data is None:
-                client_socket.close()
-                return None
-            magic_number, status_code, payload_size, response_request_id = struct.unpack(
-                RESPONSE_HEADER_FORMAT,
-                response_header_data
-            )
-            if magic_number != PROTOCOL_MAGIC_NUMBER:
-                log_accelerator_message(f"Invalid magic number in response: {magic_number}")
-                client_socket.close()
-                return None
-            if response_request_id != request_id:
-                log_accelerator_message(f"Request ID mismatch: expected {request_id}, got {response_request_id}")
-                client_socket.close()
-                return None
-            response_payload = b""
-            if payload_size > 0:
-                response_payload = self._receive_exactly(client_socket, payload_size)
-                if response_payload is None:
-                    client_socket.close()
-                    return None
-            client_socket.close()
-            if status_code != RESPONSE_SUCCESS:
-                return response_payload if response_payload else None
-            return response_payload
-        except socket.timeout:
-            log_accelerator_message("Socket timeout while communicating with accelerator")
-            return None
-        except socket.error as socket_err:
-            log_accelerator_message(f"Socket error: {str(socket_err)}")
-            return None
-        except Exception as general_error:
-            log_accelerator_message(f"Unexpected error: {str(general_error)}")
-            return None
-    def _receive_exactly(
-        self,
-        client_socket: socket.socket,
-        num_bytes: int
-    ) -> Optional[bytes]:
-        received_data = b""
-        remaining_bytes = num_bytes
-        while remaining_bytes > 0:
-            try:
-                chunk = client_socket.recv(remaining_bytes)
-                if not chunk:
-                    return None
-                received_data += chunk
-                remaining_bytes -= len(chunk)
-            except socket.timeout:
-                return None
-            except socket.error:
-                return None
-        return received_data
-def is_accelerator_available() -> bool:
-    if not os.path.exists(ACCELERATOR_SOCKET_PATH):
-        return False
-    client = AcceleratorClient()
-    return client.is_connected()
-def start_accelerator_daemon() -> bool:
-    global accelerator_process_handle
-    from ..core import state as global_state
-    with accelerator_process_lock:
-        if accelerator_process_handle is not None:
-            if accelerator_process_handle.poll() is None:
-                return True
-        if not os.path.exists(ACCELERATOR_BINARY_PATH):
-            log_accelerator_message(f"Accelerator binary not found: {ACCELERATOR_BINARY_PATH}")
-            return False
-        try:
-            log_accelerator_message("Starting accelerator daemon...")
-            global_state.accelerator_log_stop_event.clear()
-            accelerator_process_handle = subprocess.Popen(
-                [
-                    ACCELERATOR_BINARY_PATH,
-                    "--socket", ACCELERATOR_SOCKET_PATH,
-                    "--threads", str(ACCELERATOR_WORKER_THREADS),
-                    "--memory", str(ACCELERATOR_MEMORY_POOL_MB)
-                ],
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                start_new_session=True
-            )
-            stdout_thread = threading.Thread(
-                target=stream_accelerator_output,
-                args=(accelerator_process_handle,),
-                daemon=True,
-                name="AcceleratorStdoutThread"
-            )
-            stdout_thread.start()
-            stderr_thread = threading.Thread(
-                target=stream_accelerator_stderr,
-                args=(accelerator_process_handle,),
-                daemon=True,
-                name="AcceleratorStderrThread"
-            )
-            stderr_thread.start()
-            for attempt_index in range(50):
-                if is_accelerator_available():
-                    log_accelerator_message("Accelerator daemon started and responding")
-                    return True
-            available = is_accelerator_available()
-            if available:
-                log_accelerator_message("Accelerator daemon started successfully")
-            else:
-                log_accelerator_message("Accelerator daemon started but not responding")
-            return available
-        except Exception as start_error:
-            log_accelerator_message(f"Failed to start accelerator daemon: {str(start_error)}")
-            return False
-def stop_accelerator_daemon() -> bool:
-    global accelerator_process_handle
-    from ..core import state as global_state
-    with accelerator_process_lock:
-        global_state.accelerator_log_stop_event.set()
-        if is_accelerator_available():
-            try:
-                log_accelerator_message("Sending shutdown command to accelerator...")
-                client = AcceleratorClient()
-                client.shutdown_accelerator()
-            except Exception as shutdown_error:
-                log_accelerator_message(f"Error during shutdown command: {str(shutdown_error)}")
-        if accelerator_process_handle is not None:
-            if accelerator_process_handle.poll() is None:
-                try:
-                    log_accelerator_message("Terminating accelerator process...")
-                    accelerator_process_handle.terminate()
-                    accelerator_process_handle.wait(timeout=5)
-                    log_accelerator_message("Accelerator process terminated")
-                except subprocess.TimeoutExpired:
-                    log_accelerator_message("Accelerator process did not terminate, killing...")
-                    accelerator_process_handle.kill()
-                    accelerator_process_handle.wait()
-                    log_accelerator_message("Accelerator process killed")
-            accelerator_process_handle = None
-        return True
-def process_audio_with_accelerator(
-    input_file_path: str,
-    output_file_path: str
-) -> Tuple[bool, str]:
-    if not is_accelerator_available():
-        return False, "Accelerator not available"
-    client = AcceleratorClient()
-    return client.process_audio(input_file_path, output_file_path)
-def convert_to_mono_with_accelerator(
-    input_file_path: str,
-    output_file_path: str
-) -> Tuple[bool, str]:
-    if not is_accelerator_available():
-        return False, "Accelerator not available"
-    client = AcceleratorClient()
-    return client.convert_to_mono(input_file_path, output_file_path)
-def convert_to_pcm_with_accelerator(
-    input_file_path: str,
-    output_file_path: str
-) -> Tuple[bool, str]:
-    if not is_accelerator_available():
-        return False, "Accelerator not available"
-    client = AcceleratorClient()
-    return client.convert_to_pcm(input_file_path, output_file_path)
-def resample_audio_with_accelerator(
-    input_file_path: str,
-    output_file_path: str,
-    target_sample_rate: int
-) -> Tuple[bool, str]:
-    if not is_accelerator_available():
-        return False, "Accelerator not available"
-    client = AcceleratorClient()
-    return client.resample_audio(input_file_path, output_file_path, target_sample_rate)
-def get_accelerator_memory_stats() -> Optional[Dict[str, int]]:
-    if not is_accelerator_available():
-        return None
-    client = AcceleratorClient()
-    return client.get_memory_stats()
-def clear_accelerator_memory_pool() -> bool:
-    if not is_accelerator_available():
-        return False
-    client = AcceleratorClient()
-    return client.clear_memory_pool()

src/audio/converter.py DELETED Viewed

@@ -1,344 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import os
-import time
-import tempfile
-import numpy as np
-import scipy.io.wavfile
-from config import AUDIO_CONVERSION_QUEUE_TIMEOUT_SECONDS
-from ..core.state import (
-    temporary_files_registry,
-    temporary_files_lock,
-    audio_conversion_semaphore,
-    increment_audio_conversion_active,
-    decrement_audio_conversion_active,
-    increment_audio_conversion_waiting,
-    decrement_audio_conversion_waiting,
-    is_audio_conversion_queue_busy
-)
-from ..core.memory import trigger_background_cleanup_check
-from ..accelerator.client import (
-    is_accelerator_available,
-    convert_to_pcm_with_accelerator,
-    process_audio_with_accelerator,
-    log_accelerator_message
-)
-class AudioConversionQueueError(Exception):
-    pass
-class AudioConversionQueueBusyError(AudioConversionQueueError):
-    pass
-class AudioConversionQueueTimeoutError(AudioConversionQueueError):
-    pass
-def convert_audio_data_to_pcm_int16(audio_data):
-    if audio_data.dtype == np.float32 or audio_data.dtype == np.float64:
-        audio_data_clipped = np.clip(audio_data, -1.0, 1.0)
-        audio_data_int16 = (audio_data_clipped * 32767).astype(np.int16)
-        return audio_data_int16
-    if audio_data.dtype == np.int32:
-        audio_data_int16 = (audio_data >> 16).astype(np.int16)
-        return audio_data_int16
-    if audio_data.dtype == np.uint8:
-        audio_data_int16 = ((audio_data.astype(np.int16) - 128) * 256).astype(np.int16)
-        return audio_data_int16
-    if audio_data.dtype == np.int16:
-        return audio_data
-    if audio_data.dtype == np.int64:
-        audio_data_int16 = (audio_data >> 48).astype(np.int16)
-        return audio_data_int16
-    return audio_data.astype(np.int16)
-def convert_stereo_to_mono(audio_data):
-    if len(audio_data.shape) == 1:
-        return audio_data
-    if len(audio_data.shape) == 2:
-        if audio_data.shape[0] > audio_data.shape[1]:
-            audio_data = audio_data.T
-        if audio_data.shape[0] > 1:
-            mono_audio = np.mean(audio_data, axis=0)
-            return mono_audio.astype(audio_data.dtype)
-        return audio_data[0]
-    return audio_data
-def register_temporary_file(file_path):
-    with temporary_files_lock:
-        temporary_files_registry[file_path] = time.time()
-    trigger_background_cleanup_check()
-def acquire_audio_conversion_slot(wait_for_slot=True):
-    if is_audio_conversion_queue_busy():
-        if not wait_for_slot:
-            raise AudioConversionQueueBusyError(
-                "Audio conversion is currently in progress for another user. Please wait a moment and try again."
-            )
-        increment_audio_conversion_waiting()
-        try:
-            acquired = audio_conversion_semaphore.acquire(
-                blocking=True,
-                timeout=AUDIO_CONVERSION_QUEUE_TIMEOUT_SECONDS
-            )
-            if not acquired:
-                raise AudioConversionQueueTimeoutError(
-                    "Audio conversion queue timed out. The server is busy processing other requests. Please try again in a moment."
-                )
-        finally:
-            decrement_audio_conversion_waiting()
-    else:
-        acquired = audio_conversion_semaphore.acquire(blocking=False)
-        if not acquired:
-            if not wait_for_slot:
-                raise AudioConversionQueueBusyError(
-                    "Audio conversion is currently in progress for another user. Please wait a moment and try again."
-                )
-            increment_audio_conversion_waiting()
-            try:
-                acquired = audio_conversion_semaphore.acquire(
-                    blocking=True,
-                    timeout=AUDIO_CONVERSION_QUEUE_TIMEOUT_SECONDS
-                )
-                if not acquired:
-                    raise AudioConversionQueueTimeoutError(
-                        "Audio conversion queue timed out. The server is busy processing other requests. Please try again in a moment."
-                    )
-            finally:
-                decrement_audio_conversion_waiting()
-    increment_audio_conversion_active()
-def release_audio_conversion_slot():
-    decrement_audio_conversion_active()
-    audio_conversion_semaphore.release()
-def convert_wav_file_to_pcm_format_with_accelerator(input_path):
-    output_file = tempfile.NamedTemporaryFile(suffix="_accel_pcm_converted.wav", delete=False)
-    output_path = output_file.name
-    output_file.close()
-    success, result_message = convert_to_pcm_with_accelerator(input_path, output_path)
-    if success:
-        register_temporary_file(output_path)
-        return output_path, None
-    else:
-        if os.path.exists(output_path):
-            try:
-                os.remove(output_path)
-            except Exception:
-                pass
-        return None, result_message
-def convert_wav_file_to_pcm_format(input_path):
-    if is_accelerator_available():
-        log_accelerator_message(f"Using accelerator for PCM conversion: {input_path}")
-        accelerated_result, accelerated_error = convert_wav_file_to_pcm_format_with_accelerator(input_path)
-        if accelerated_result is not None:
-            return accelerated_result, None
-        log_accelerator_message(f"Accelerator PCM conversion failed, falling back to Python: {accelerated_error}")
-    try:
-        sample_rate, audio_data = scipy.io.wavfile.read(input_path)
-        if len(audio_data.shape) > 1:
-            audio_data = convert_stereo_to_mono(audio_data)
-        audio_data_pcm = convert_audio_data_to_pcm_int16(audio_data)
-        output_file = tempfile.NamedTemporaryFile(suffix="_pcm_converted.wav", delete=False)
-        scipy.io.wavfile.write(output_file.name, sample_rate, audio_data_pcm)
-        register_temporary_file(output_file.name)
-        return output_file.name, None
-    except Exception as conversion_error:
-        return None, f"Failed to convert WAV to PCM format: {str(conversion_error)}"
-def convert_audio_using_pydub(input_path, target_sample_rate=None):
-    try:
-        from pydub import AudioSegment
-        audio_segment = AudioSegment.from_file(input_path)
-        audio_segment = audio_segment.set_channels(1)
-        audio_segment = audio_segment.set_sample_width(2)
-        if target_sample_rate is not None:
-            audio_segment = audio_segment.set_frame_rate(target_sample_rate)
-        output_file = tempfile.NamedTemporaryFile(suffix="_pydub_converted.wav", delete=False)
-        audio_segment.export(output_file.name, format="wav")
-        register_temporary_file(output_file.name)
-        return output_file.name, None
-    except ImportError:
-        return None, "pydub_library_not_available"
-    except Exception as conversion_error:
-        error_message = str(conversion_error)
-        if "ffmpeg" in error_message.lower() or "ffprobe" in error_message.lower():
-            return None, "ffmpeg_not_available"
-        return None, f"Failed to convert audio using pydub: {error_message}"
-def convert_audio_using_soundfile(input_path):
-    try:
-        import soundfile
-        audio_data, sample_rate = soundfile.read(input_path, dtype='float32')
-        if len(audio_data.shape) > 1:
-            audio_data = np.mean(audio_data, axis=1)
-        audio_data_pcm = convert_audio_data_to_pcm_int16(audio_data)
-        output_file = tempfile.NamedTemporaryFile(suffix="_soundfile_converted.wav", delete=False)
-        scipy.io.wavfile.write(output_file.name, sample_rate, audio_data_pcm)
-        register_temporary_file(output_file.name)
-        return output_file.name, None
-    except ImportError:
-        return None, "soundfile_library_not_available"
-    except Exception as conversion_error:
-        return None, f"Failed to convert audio using soundfile: {str(conversion_error)}"
-def convert_audio_using_librosa(input_path):
-    try:
-        import librosa
-        audio_data, sample_rate = librosa.load(input_path, sr=None, mono=True)
-        audio_data_pcm = convert_audio_data_to_pcm_int16(audio_data)
-        output_file = tempfile.NamedTemporaryFile(suffix="_librosa_converted.wav", delete=False)
-        scipy.io.wavfile.write(output_file.name, sample_rate, audio_data_pcm)
-        register_temporary_file(output_file.name)
-        return output_file.name, None
-    except ImportError:
-        return None, "librosa_library_not_available"
-    except Exception as conversion_error:
-        return None, f"Failed to convert audio using librosa: {str(conversion_error)}"
-def convert_non_wav_audio_to_wav(input_path):
-    converted_path, pydub_error = convert_audio_using_pydub(input_path)
-    if converted_path is not None:
-        return converted_path, None, "pydub"
-    converted_path, soundfile_error = convert_audio_using_soundfile(input_path)
-    if converted_path is not None:
-        return converted_path, None, "soundfile"
-    converted_path, librosa_error = convert_audio_using_librosa(input_path)
-    if converted_path is not None:
-        return converted_path, None, "librosa"
-    pydub_unavailable = pydub_error in ["pydub_library_not_available", "ffmpeg_not_available"]
-    soundfile_unavailable = soundfile_error == "soundfile_library_not_available"
-    librosa_unavailable = librosa_error == "librosa_library_not_available"
-    if pydub_unavailable and soundfile_unavailable and librosa_unavailable:
-        return None, "No audio conversion library is available on the server. Please upload a WAV file directly.", None
-    all_errors = []
-    if not pydub_unavailable and pydub_error:
-        all_errors.append(f"pydub: {pydub_error}")
-    if not soundfile_unavailable and soundfile_error:
-        all_errors.append(f"soundfile: {soundfile_error}")
-    if not librosa_unavailable and librosa_error:
-        all_errors.append(f"librosa: {librosa_error}")
-    if all_errors:
-        combined_error = " | ".join(all_errors)
-        return None, f"Audio conversion failed with all available methods. {combined_error}", None
-    return None, "Audio conversion failed. Please try uploading a different audio file or use WAV format.", None
-def prepare_audio_file_for_voice_cloning_internal(input_path):
-    from .validator import perform_comprehensive_audio_validation, get_format_display_name
-    is_valid, is_wav_format, detected_format, validation_error = perform_comprehensive_audio_validation(input_path)
-    if not is_valid:
-        return None, validation_error, False, detected_format
-    if is_wav_format:
-        converted_path, conversion_error = convert_wav_file_to_pcm_format(input_path)
-        if converted_path is not None:
-            return converted_path, None, False, 'wav'
-        return None, conversion_error, False, 'wav'
-    format_display_name = get_format_display_name(detected_format)
-    converted_path, conversion_error, conversion_method = convert_non_wav_audio_to_wav(input_path)
-    if converted_path is not None:
-        final_path, pcm_error = convert_wav_file_to_pcm_format(converted_path)
-        if final_path is not None:
-            return final_path, None, True, detected_format
-        return converted_path, None, True, detected_format
-    return None, conversion_error, True, detected_format
-def prepare_audio_file_for_voice_cloning(input_path, wait_for_queue=True):
-    try:
-        acquire_audio_conversion_slot(wait_for_slot=wait_for_queue)
-    except AudioConversionQueueBusyError as queue_busy_error:
-        return None, str(queue_busy_error), False, None
-    except AudioConversionQueueTimeoutError as queue_timeout_error:
-        return None, str(queue_timeout_error), False, None
-    try:
-        result_path, result_error, was_converted, detected_format = prepare_audio_file_for_voice_cloning_internal(input_path)
-        return result_path, result_error, was_converted, detected_format
-    finally:
-        release_audio_conversion_slot()
-def convert_audio_to_pcm_wav(input_path):
-    converted_path, error, was_converted, detected_format = prepare_audio_file_for_voice_cloning(input_path)
-    if converted_path is not None:
-        return converted_path
-    if error:
-        print(f"Warning: Audio conversion failed - {error}")
-    return input_path

src/audio/validator.py DELETED Viewed

@@ -1,268 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import os
-import wave
-from config import (
-    SUPPORTED_AUDIO_EXTENSIONS,
-    AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES,
-    MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES
-)
-def build_format_display_names_from_supported_extensions():
-    format_display_names = {}
-    for extension in SUPPORTED_AUDIO_EXTENSIONS:
-        format_code = extension.lstrip(".")
-        if format_code in AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES:
-            format_display_names[format_code] = AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES[format_code]
-        else:
-            format_display_names[format_code] = format_code.upper()
-    format_display_names["unknown"] = "Unknown"
-    return format_display_names
-FORMAT_DISPLAY_NAMES = build_format_display_names_from_supported_extensions()
-def get_audio_file_extension(file_path):
-    if not file_path:
-        return None
-    _, extension = os.path.splitext(file_path)
-    return extension.lower()
-def is_supported_audio_extension(file_path):
-    extension = get_audio_file_extension(file_path)
-    if extension is None:
-        return False
-    return extension in SUPPORTED_AUDIO_EXTENSIONS
-def format_file_size_for_display(size_bytes):
-    if size_bytes < 1024:
-        return f"{size_bytes} bytes"
-    elif size_bytes < 1024 * 1024:
-        return f"{size_bytes / 1024:.1f} KB"
-    else:
-        return f"{size_bytes / (1024 * 1024):.2f} MB"
-def validate_file_size_for_voice_cloning(file_path):
-    if not file_path:
-        return False, "No audio file provided."
-    try:
-        file_size = os.path.getsize(file_path)
-    except OSError as size_error:
-        return False, f"Cannot read file size: {str(size_error)}"
-    if file_size > MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES:
-        max_size_display = format_file_size_for_display(MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES)
-        actual_size_display = format_file_size_for_display(file_size)
-        return False, f"Audio file size ({actual_size_display}) exceeds the maximum allowed size of {max_size_display}. Please upload a smaller audio file."
-    return True, None
-def validate_file_exists_and_readable(file_path):
-    if not file_path:
-        return False, "No audio file provided."
-    if not os.path.exists(file_path):
-        return False, "Audio file does not exist."
-    if not os.path.isfile(file_path):
-        return False, "The provided path is not a valid file."
-    try:
-        file_size = os.path.getsize(file_path)
-    except OSError as size_error:
-        return False, f"Cannot read file size: {str(size_error)}"
-    if file_size == 0:
-        return False, "Audio file is empty (0 bytes)."
-    if file_size < 44:
-        return False, "Audio file is too small to be a valid audio file."
-    try:
-        with open(file_path, "rb") as test_file:
-            test_file.read(1)
-    except IOError as read_error:
-        return False, f"Audio file is not readable: {str(read_error)}"
-    return True, None
-def detect_audio_format_from_header(file_path):
-    try:
-        with open(file_path, "rb") as audio_file:
-            header_bytes = audio_file.read(32)
-            if len(header_bytes) < 4:
-                return None, "File is too small to determine audio format."
-            if len(header_bytes) >= 12:
-                if header_bytes[:4] == b"RIFF" and header_bytes[8:12] == b"WAVE":
-                    return "wav", None
-            if header_bytes[:3] == b"ID3":
-                return "mp3", None
-            if len(header_bytes) >= 2:
-                first_two_bytes = header_bytes[:2]
-                mp3_sync_bytes = [
-                    b"\xff\xfb",
-                    b"\xff\xfa",
-                    b"\xff\xf3",
-                    b"\xff\xf2",
-                    b"\xff\xe0",
-                    b"\xff\xe2",
-                    b"\xff\xe3"
-                ]
-                if first_two_bytes in mp3_sync_bytes:
-                    return "mp3", None
-            if header_bytes[:4] == b"fLaC":
-                return "flac", None
-            if header_bytes[:4] == b"OggS":
-                return "ogg", None
-            if len(header_bytes) >= 12:
-                if header_bytes[:4] == b"FORM" and header_bytes[8:12] in [b"AIFF", b"AIFC"]:
-                    return "aiff", None
-            if len(header_bytes) >= 8:
-                if header_bytes[4:8] == b"ftyp":
-                    return "m4a", None
-            if len(header_bytes) >= 4:
-                if header_bytes[:4] == b"\x1aE\xdf\xa3":
-                    return "webm", None
-            if len(header_bytes) >= 8:
-                if header_bytes[4:8] in [b"mdat", b"moov", b"free", b"skip", b"wide"]:
-                    return "m4a", None
-            file_extension = get_audio_file_extension(file_path)
-            if file_extension and file_extension in SUPPORTED_AUDIO_EXTENSIONS:
-                return file_extension.lstrip("."), None
-            return "unknown", "Could not determine audio format from file header. The file may be corrupted or in an unsupported format."
-    except IOError as io_error:
-        return None, f"Error reading file header: {str(io_error)}"
-    except Exception as detection_error:
-        return None, f"Unexpected error detecting audio format: {str(detection_error)}"
-def validate_wav_file_structure(file_path):
-    try:
-        with wave.open(file_path, "rb") as wav_file:
-            number_of_channels = wav_file.getnchannels()
-            sample_width_bytes = wav_file.getsampwidth()
-            sample_rate = wav_file.getframerate()
-            number_of_frames = wav_file.getnframes()
-            if number_of_channels < 1:
-                return False, "WAV file has no audio channels."
-            if number_of_channels > 16:
-                return False, f"WAV file has too many channels ({number_of_channels}). Maximum supported is 16."
-            if sample_width_bytes < 1:
-                return False, "WAV file has invalid sample width (less than 1 byte)."
-            if sample_width_bytes > 4:
-                return False, f"WAV file has unsupported sample width ({sample_width_bytes} bytes). Maximum supported is 4 bytes (32-bit)."
-            if sample_rate < 100:
-                return False, f"WAV file has invalid sample rate ({sample_rate} Hz). Minimum supported is 100 Hz."
-            if sample_rate > 384000:
-                return False, f"WAV file has unsupported sample rate ({sample_rate} Hz). Maximum supported is 384000 Hz."
-            if number_of_frames < 1:
-                return False, "WAV file contains no audio frames."
-            audio_duration_seconds = number_of_frames / sample_rate
-            if audio_duration_seconds < 0.1:
-                return False, f"Audio is too short ({audio_duration_seconds:.2f} seconds). Minimum duration is 0.1 seconds."
-            if audio_duration_seconds > 60:
-                return False, f"Audio is too long ({audio_duration_seconds:.0f} seconds). Maximum duration is 1 minute."
-            return True, None
-    except wave.Error as wav_error:
-        error_message = str(wav_error)
-        if "file does not start with RIFF id" in error_message:
-            return False, "File has .wav extension but is not a valid WAV file. It may be a different audio format renamed to .wav."
-        if "unknown format" in error_message.lower():
-            return False, "WAV file uses an unsupported audio encoding format."
-        return False, f"Invalid WAV file structure: {error_message}"
-    except EOFError:
-        return False, "WAV file is truncated or corrupted (unexpected end of file)."
-    except Exception as validation_error:
-        return False, f"Error validating WAV file: {str(validation_error)}"
-def perform_comprehensive_audio_validation(file_path):
-    file_exists_valid, file_exists_error = validate_file_exists_and_readable(file_path)
-    if not file_exists_valid:
-        return False, False, None, file_exists_error
-    file_extension = get_audio_file_extension(file_path)
-    if not is_supported_audio_extension(file_path):
-        supported_formats_list = ", ".join(SUPPORTED_AUDIO_EXTENSIONS)
-        return False, False, None, f"Unsupported file format '{file_extension}'. Supported formats are: {supported_formats_list}"
-    detected_format, detection_error = detect_audio_format_from_header(file_path)
-    if detected_format is None:
-        return False, False, None, detection_error
-    is_wav_format = (detected_format == "wav")
-    if is_wav_format:
-        wav_structure_valid, wav_structure_error = validate_wav_file_structure(file_path)
-        if not wav_structure_valid:
-            return False, True, "wav", wav_structure_error
-    return True, is_wav_format, detected_format, None
-def perform_voice_clone_file_validation(file_path):
-    file_size_valid, file_size_error = validate_file_size_for_voice_cloning(file_path)
-    if not file_size_valid:
-        return False, False, None, file_size_error
-    return perform_comprehensive_audio_validation(file_path)
-def get_format_display_name(format_code):
-    if format_code is None:
-        return "Unknown"
-    if format_code in FORMAT_DISPLAY_NAMES:
-        return FORMAT_DISPLAY_NAMES[format_code]
-    return format_code.upper()

src/core/authentication.py DELETED Viewed

@@ -1,23 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-from config import HF_TOKEN
-from huggingface_hub import login
-def authenticate_huggingface():
-    if HF_TOKEN:
-        try:
-            login(token=HF_TOKEN, add_to_git_credential=False)
-            print("Authenticated with Hugging Face", flush=True)
-        except Exception as authentication_error:
-            print(f"Hugging Face authentication failed: {authentication_error}", flush=True)
-            print("Voice cloning may not be available", flush=True)
-    else:
-        print("Missing Hugging Face authentication required for the license agreement", flush=True)
-def get_huggingface_token():
-    return HF_TOKEN

src/core/memory.py DELETED Viewed

@@ -1,394 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import os
-import gc
-import time
-import atexit
-import threading
-import torch
-from config import (
-    TEMPORARY_FILE_LIFETIME_SECONDS,
-    BACKGROUND_CLEANUP_INTERVAL,
-    MEMORY_WARNING_THRESHOLD,
-    MEMORY_CRITICAL_THRESHOLD,
-    MEMORY_CHECK_INTERVAL,
-    MEMORY_IDLE_TARGET,
-    MAXIMUM_MEMORY_USAGE
-)
-from ..core.state import (
-    temporary_files_registry,
-    temporary_files_lock,
-    memory_enforcement_lock,
-    background_cleanup_thread,
-    background_cleanup_stop_event,
-    background_cleanup_trigger_event,
-    check_if_generation_is_currently_active,
-    get_text_to_speech_manager,
-    is_model_in_use
-)
-def get_current_memory_usage():
-    try:
-        with open('/proc/self/status', 'r') as status_file:
-            for line in status_file:
-                if line.startswith('VmRSS:'):
-                    memory_value_kb = int(line.split()[1])
-                    return memory_value_kb * 1024
-    except Exception:
-        pass
-    try:
-        with open('/proc/self/statm', 'r') as statm_file:
-            statm_values = statm_file.read().split()
-            resident_pages = int(statm_values[1])
-            page_size = os.sysconf('SC_PAGE_SIZE')
-            return resident_pages * page_size
-    except Exception:
-        pass
-    try:
-        import resource
-        import platform
-        memory_usage_kilobytes = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-        if platform.system() == "Darwin":
-            return memory_usage_kilobytes
-        else:
-            return memory_usage_kilobytes * 1024
-    except Exception:
-        pass
-    return 0
-def is_memory_usage_within_limit():
-    current_memory_usage = get_current_memory_usage()
-    return current_memory_usage < MAXIMUM_MEMORY_USAGE
-def is_memory_usage_approaching_limit():
-    current_memory_usage = get_current_memory_usage()
-    return current_memory_usage >= MEMORY_WARNING_THRESHOLD
-def is_memory_usage_critical():
-    current_memory_usage = get_current_memory_usage()
-    return current_memory_usage >= MEMORY_CRITICAL_THRESHOLD
-def is_memory_above_idle_target():
-    current_memory_usage = get_current_memory_usage()
-    return current_memory_usage > MEMORY_IDLE_TARGET
-def force_garbage_collection():
-    gc.collect(0)
-    gc.collect(1)
-    gc.collect(2)
-    if torch.cuda.is_available():
-        try:
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-        except Exception:
-            pass
-def memory_cleanup():
-    force_garbage_collection()
-    try:
-        import ctypes
-        libc = ctypes.CDLL("libc.so.6")
-        libc.malloc_trim(0)
-    except Exception:
-        pass
-    force_garbage_collection()
-def perform_memory_cleanup():
-    force_garbage_collection()
-    tts_manager = get_text_to_speech_manager()
-    if tts_manager is not None:
-        try:
-            tts_manager.evict_least_recently_used_voice_states()
-        except Exception:
-            pass
-    memory_cleanup()
-def cleanup_expired_temporary_files():
-    current_timestamp = time.time()
-    expired_files = []
-    with temporary_files_lock:
-        for file_path, creation_timestamp in list(temporary_files_registry.items()):
-            if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
-                expired_files.append(file_path)
-        for file_path in expired_files:
-            try:
-                if os.path.exists(file_path):
-                    os.remove(file_path)
-                del temporary_files_registry[file_path]
-            except Exception:
-                pass
-def cleanup_all_temporary_files_immediately():
-    with temporary_files_lock:
-        for file_path in list(temporary_files_registry.keys()):
-            try:
-                if os.path.exists(file_path):
-                    os.remove(file_path)
-                del temporary_files_registry[file_path]
-            except Exception:
-                pass
-def has_temporary_files_pending_cleanup():
-    with temporary_files_lock:
-        if len(temporary_files_registry) == 0:
-            return False
-        current_timestamp = time.time()
-        for file_path, creation_timestamp in temporary_files_registry.items():
-            if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
-                return True
-        return False
-def has_any_temporary_files_registered():
-    with temporary_files_lock:
-        return len(temporary_files_registry) > 0
-def calculate_time_until_next_file_expiration():
-    with temporary_files_lock:
-        if len(temporary_files_registry) == 0:
-            return None
-        current_timestamp = time.time()
-        minimum_time_until_expiration = None
-        for file_path, creation_timestamp in temporary_files_registry.items():
-            time_since_creation = current_timestamp - creation_timestamp
-            time_until_expiration = TEMPORARY_FILE_LIFETIME_SECONDS - time_since_creation
-            if time_until_expiration <= 0:
-                return 0
-            if minimum_time_until_expiration is None or time_until_expiration < minimum_time_until_expiration:
-                minimum_time_until_expiration = time_until_expiration
-        return minimum_time_until_expiration
-def enforce_memory_limit_if_exceeded():
-    with memory_enforcement_lock:
-        generation_is_active = check_if_generation_is_currently_active()
-        model_is_in_use = is_model_in_use()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage < MEMORY_WARNING_THRESHOLD:
-            return True
-        force_garbage_collection()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage < MEMORY_WARNING_THRESHOLD:
-            return True
-        tts_manager = get_text_to_speech_manager()
-        if tts_manager is not None:
-            try:
-                tts_manager.evict_least_recently_used_voice_states()
-            except Exception:
-                pass
-        memory_cleanup()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage < MEMORY_CRITICAL_THRESHOLD:
-            return True
-        generation_is_active = check_if_generation_is_currently_active()
-        model_is_in_use = is_model_in_use()
-        if generation_is_active or model_is_in_use:
-            if tts_manager is not None:
-                try:
-                    tts_manager.clear_voice_state_cache_completely()
-                except Exception:
-                    pass
-            cleanup_all_temporary_files_immediately()
-            memory_cleanup()
-            return current_memory_usage < MAXIMUM_MEMORY_USAGE
-        if tts_manager is not None:
-            try:
-                tts_manager.clear_voice_state_cache_completely()
-            except Exception:
-                pass
-        cleanup_all_temporary_files_immediately()
-        memory_cleanup()
-        current_memory_usage = get_current_memory_usage()
-        return current_memory_usage < MAXIMUM_MEMORY_USAGE
-def perform_idle_memory_reduction():
-    if check_if_generation_is_currently_active():
-        return
-    if is_model_in_use():
-        return
-    with memory_enforcement_lock:
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage <= MEMORY_IDLE_TARGET:
-            return
-        force_garbage_collection()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage <= MEMORY_IDLE_TARGET:
-            return
-        if check_if_generation_is_currently_active() or is_model_in_use():
-            return
-        tts_manager = get_text_to_speech_manager()
-        if tts_manager is not None:
-            try:
-                tts_manager.evict_least_recently_used_voice_states()
-            except Exception:
-                pass
-        memory_cleanup()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage <= MEMORY_IDLE_TARGET:
-            return
-        if check_if_generation_is_currently_active() or is_model_in_use():
-            return
-        if tts_manager is not None:
-            try:
-                tts_manager.clear_voice_state_cache_completely()
-            except Exception:
-                pass
-        memory_cleanup()
-def perform_background_cleanup_cycle():
-    last_memory_check_timestamp = 0
-    while not background_cleanup_stop_event.is_set():
-        time_until_next_expiration = calculate_time_until_next_file_expiration()
-        current_timestamp = time.time()
-        time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
-        if time_until_next_expiration is not None:
-            if time_until_next_expiration <= 0:
-                wait_duration = 1
-            else:
-                wait_duration = min(
-                    time_until_next_expiration + 1,
-                    MEMORY_CHECK_INTERVAL,
-                    BACKGROUND_CLEANUP_INTERVAL
-                )
-        else:
-            should_check_memory = (
-                is_memory_above_idle_target() and
-                not check_if_generation_is_currently_active() and
-                not is_model_in_use()
-            )
-            if should_check_memory:
-                wait_duration = MEMORY_CHECK_INTERVAL
-            else:
-                background_cleanup_trigger_event.clear()
-                triggered = background_cleanup_trigger_event.wait(timeout=BACKGROUND_CLEANUP_INTERVAL)
-                if background_cleanup_stop_event.is_set():
-                    break
-                if triggered:
-                    continue
-                else:
-                    if not check_if_generation_is_currently_active() and not is_model_in_use():
-                        perform_idle_memory_reduction()
-                    continue
-        background_cleanup_stop_event.wait(timeout=wait_duration)
-        if background_cleanup_stop_event.is_set():
-            break
-        if has_temporary_files_pending_cleanup():
-            cleanup_expired_temporary_files()
-        current_timestamp = time.time()
-        time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
-        if time_since_last_memory_check >= MEMORY_CHECK_INTERVAL:
-            generation_active = check_if_generation_is_currently_active()
-            model_in_use = is_model_in_use()
-            if not generation_active and not model_in_use:
-                if is_memory_usage_critical():
-                    enforce_memory_limit_if_exceeded()
-                elif is_memory_above_idle_target():
-                    perform_idle_memory_reduction()
-            last_memory_check_timestamp = current_timestamp
-def trigger_background_cleanup_check():
-    background_cleanup_trigger_event.set()
-def start_background_cleanup_thread():
-    global background_cleanup_thread
-    from ..core import state as global_state
-    if global_state.background_cleanup_thread is None or not global_state.background_cleanup_thread.is_alive():
-        background_cleanup_stop_event.clear()
-        background_cleanup_trigger_event.clear()
-        global_state.background_cleanup_thread = threading.Thread(
-            target=perform_background_cleanup_cycle,
-            daemon=True,
-            name="BackgroundCleanupThread"
-        )
-        global_state.background_cleanup_thread.start()
-def stop_background_cleanup_thread():
-    from ..core import state as global_state
-    background_cleanup_stop_event.set()
-    background_cleanup_trigger_event.set()
-    if global_state.background_cleanup_thread is not None and global_state.background_cleanup_thread.is_alive():
-        global_state.background_cleanup_thread.join(timeout=5)
-atexit.register(stop_background_cleanup_thread)

src/core/state.py DELETED Viewed

@@ -1,147 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import threading
-generation_state_lock = threading.Lock()
-is_currently_generating = False
-stop_generation_requested = False
-temporary_files_registry = {}
-temporary_files_lock = threading.Lock()
-memory_enforcement_lock = threading.Lock()
-background_cleanup_thread = None
-background_cleanup_stop_event = threading.Event()
-background_cleanup_trigger_event = threading.Event()
-text_to_speech_manager = None
-audio_conversion_semaphore = threading.Semaphore(1)
-audio_conversion_queue_lock = threading.Lock()
-audio_conversion_active_count = 0
-audio_conversion_waiting_count = 0
-accelerator_log_lock = threading.Lock()
-accelerator_log_thread = None
-accelerator_log_stop_event = threading.Event()
-model_usage_lock = threading.Lock()
-model_usage_count = 0
-generation_protection_lock = threading.RLock()
-generation_protection_count = 0
-def set_text_to_speech_manager(manager_instance):
-    global text_to_speech_manager
-    text_to_speech_manager = manager_instance
-def get_text_to_speech_manager():
-    global text_to_speech_manager
-    return text_to_speech_manager
-def check_if_generation_is_currently_active():
-    with generation_state_lock:
-        return is_currently_generating
-def set_generation_active(is_active):
-    global is_currently_generating
-    with generation_state_lock:
-        is_currently_generating = is_active
-def set_stop_generation_requested(requested):
-    global stop_generation_requested
-    with generation_state_lock:
-        stop_generation_requested = requested
-def get_stop_generation_requested():
-    with generation_state_lock:
-        return stop_generation_requested
-def increment_audio_conversion_active():
-    global audio_conversion_active_count
-    with audio_conversion_queue_lock:
-        audio_conversion_active_count += 1
-        return audio_conversion_active_count
-def decrement_audio_conversion_active():
-    global audio_conversion_active_count
-    with audio_conversion_queue_lock:
-        audio_conversion_active_count = max(0, audio_conversion_active_count - 1)
-        return audio_conversion_active_count
-def get_audio_conversion_active_count():
-    with audio_conversion_queue_lock:
-        return audio_conversion_active_count
-def increment_audio_conversion_waiting():
-    global audio_conversion_waiting_count
-    with audio_conversion_queue_lock:
-        audio_conversion_waiting_count += 1
-        return audio_conversion_waiting_count
-def decrement_audio_conversion_waiting():
-    global audio_conversion_waiting_count
-    with audio_conversion_queue_lock:
-        audio_conversion_waiting_count = max(0, audio_conversion_waiting_count - 1)
-        return audio_conversion_waiting_count
-def get_audio_conversion_waiting_count():
-    with audio_conversion_queue_lock:
-        return audio_conversion_waiting_count
-def is_audio_conversion_queue_busy():
-    with audio_conversion_queue_lock:
-        return audio_conversion_active_count > 0
-def increment_model_usage():
-    global model_usage_count
-    with model_usage_lock:
-        model_usage_count += 1
-        return model_usage_count
-def decrement_model_usage():
-    global model_usage_count
-    with model_usage_lock:
-        model_usage_count = max(0, model_usage_count - 1)
-        current_count = model_usage_count
-        return current_count
-def get_model_usage_count():
-    with model_usage_lock:
-        return model_usage_count
-def is_model_in_use():
-    with model_usage_lock:
-        return model_usage_count > 0
-def acquire_generation_protection():
-    global generation_protection_count
-    generation_protection_lock.acquire()
-    generation_protection_count += 1
-    return generation_protection_count
-def release_generation_protection():
-    global generation_protection_count
-    generation_protection_count = max(0, generation_protection_count - 1)
-    generation_protection_lock.release()
-def is_generation_protected():
-    if generation_protection_lock.acquire(blocking=False):
-        is_protected = generation_protection_count > 0
-        generation_protection_lock.release()
-        return is_protected
-    return True
-def try_acquire_generation_protection_for_cleanup(timeout_seconds=0.1):
-    acquired = generation_protection_lock.acquire(blocking=True, timeout=timeout_seconds)
-    if acquired:
-        if generation_protection_count > 0:
-            generation_protection_lock.release()
-            return False
-        return True
-    return False
-def release_generation_protection_for_cleanup():
-    generation_protection_lock.release()

src/generation/handler.py DELETED Viewed

@@ -1,309 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import gradio as gr
-from config import VOICE_MODE_CLONE, MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES
-from ..core.state import (
-    generation_state_lock,
-    get_stop_generation_requested,
-    set_stop_generation_requested,
-    is_audio_conversion_queue_busy,
-    get_audio_conversion_waiting_count,
-    acquire_generation_protection,
-    release_generation_protection
-)
-from ..core.authentication import get_huggingface_token
-from ..core.memory import (
-    has_temporary_files_pending_cleanup,
-    cleanup_expired_temporary_files,
-    perform_memory_cleanup,
-    memory_cleanup,
-    trigger_background_cleanup_check
-)
-from ..tts.manager import text_to_speech_manager, ModelNotLoadedError, ModelLoadingError
-from ..validation.text import validate_text_input
-from ..audio.validator import (
-    perform_voice_clone_file_validation,
-    get_format_display_name,
-    format_file_size_for_display,
-    validate_file_size_for_voice_cloning
-)
-from ..audio.converter import (
-    prepare_audio_file_for_voice_cloning,
-    AudioConversionQueueBusyError,
-    AudioConversionQueueTimeoutError
-)
-def check_if_generating():
-    from ..core.state import is_currently_generating
-    with generation_state_lock:
-        return is_currently_generating
-def request_generation_stop():
-    set_stop_generation_requested(True)
-    return gr.update(interactive=False)
-def validate_voice_clone_file_size(voice_clone_audio_file):
-    if not voice_clone_audio_file:
-        return True, None
-    file_size_valid, file_size_error = validate_file_size_for_voice_cloning(voice_clone_audio_file)
-    if not file_size_valid:
-        return False, file_size_error
-    return True, None
-def validate_and_prepare_voice_clone_audio(voice_clone_audio_file):
-    if not voice_clone_audio_file:
-        return None, "Please upload an audio file for voice cloning.", None, None
-    file_size_valid, file_size_error = validate_file_size_for_voice_cloning(voice_clone_audio_file)
-    if not file_size_valid:
-        return None, file_size_error, None, None
-    is_valid, is_wav_format, detected_format, validation_error = perform_voice_clone_file_validation(voice_clone_audio_file)
-    if not is_valid:
-        format_display_name = get_format_display_name(detected_format) if detected_format else "Unknown"
-        if validation_error:
-            if "too short" in validation_error.lower():
-                return None, f"The uploaded audio file is too short. Please upload a longer audio sample for better voice cloning results.", None, detected_format
-            if "too long" in validation_error.lower():
-                return None, f"The uploaded audio file is too long. Please upload a shorter audio sample (maximum 1 hour).", None, detected_format
-            if "empty" in validation_error.lower() or "0 bytes" in validation_error.lower():
-                return None, "The uploaded audio file is empty. Please upload a valid audio file.", None, detected_format
-            if "corrupted" in validation_error.lower() or "truncated" in validation_error.lower():
-                return None, f"The uploaded {format_display_name} file appears to be corrupted or incomplete. Please upload a valid audio file.", None, detected_format
-            if "unsupported" in validation_error.lower():
-                return None, validation_error, None, detected_format
-            if "exceeds" in validation_error.lower() or "maximum" in validation_error.lower():
-                return None, validation_error, None, detected_format
-            return None, f"Invalid audio file: {validation_error}", None, detected_format
-        return None, "The uploaded file could not be validated as a valid audio file.", None, detected_format
-    format_display_name = get_format_display_name(detected_format)
-    if is_audio_conversion_queue_busy():
-        waiting_count = get_audio_conversion_waiting_count()
-        if waiting_count > 0:
-            gr.Warning(f"Audio conversion queue is busy. Your request is queued (position: {waiting_count + 1}). Please wait...")
-        else:
-            gr.Warning("Audio conversion is in progress for another user. Your request has been queued. Please wait...")
-    try:
-        if is_wav_format:
-            prepared_path, preparation_error, was_converted, final_format = prepare_audio_file_for_voice_cloning(
-                voice_clone_audio_file,
-                wait_for_queue=True
-            )
-            if prepared_path is None:
-                return None, f"Failed to process WAV file: {preparation_error}", None, 'wav'
-            return prepared_path, None, False, 'wav'
-        else:
-            prepared_path, preparation_error, was_converted, final_format = prepare_audio_file_for_voice_cloning(
-                voice_clone_audio_file,
-                wait_for_queue=True
-            )
-            if prepared_path is None:
-                if "no audio conversion library" in preparation_error.lower():
-                    return None, f"Cannot convert {format_display_name} format. Please upload a WAV file directly.", None, detected_format
-                if "queue" in preparation_error.lower() or "busy" in preparation_error.lower():
-                    return None, preparation_error, None, detected_format
-                return None, f"Failed to convert {format_display_name} to WAV format: {preparation_error}", None, detected_format
-            return prepared_path, None, True, detected_format
-    except AudioConversionQueueBusyError as queue_busy_error:
-        return None, str(queue_busy_error), None, detected_format
-    except AudioConversionQueueTimeoutError as queue_timeout_error:
-        return None, str(queue_timeout_error), None, detected_format
-def perform_speech_generation(
-    text_input,
-    voice_mode_selection,
-    voice_preset_selection,
-    voice_clone_audio_file,
-    model_variant,
-    lsd_decode_steps,
-    temperature,
-    noise_clamp,
-    eos_threshold,
-    frames_after_eos,
-    enable_custom_frames
-):
-    from ..core import state as global_state
-    if has_temporary_files_pending_cleanup():
-        cleanup_expired_temporary_files()
-    is_valid, validation_result = validate_text_input(text_input)
-    if not is_valid:
-        if validation_result:
-            raise gr.Error(validation_result)
-        raise gr.Error("Please enter valid text to generate speech.")
-    prepared_audio_path = None
-    was_audio_converted = False
-    original_audio_format = None
-    if voice_mode_selection == VOICE_MODE_CLONE:
-        if not voice_clone_audio_file:
-            raise gr.Error("Please upload an audio file for voice cloning.")
-        file_size_valid, file_size_error = validate_voice_clone_file_size(voice_clone_audio_file)
-        if not file_size_valid:
-            max_size_display = format_file_size_for_display(MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES)
-            raise gr.Error(f"File size exceeds maximum limit of {max_size_display}. {file_size_error}")
-        if not get_huggingface_token():
-            raise gr.Error("Voice cloning is not configured properly at the moment. Please try again later.")
-        prepared_audio_path, audio_error, was_audio_converted, original_audio_format = validate_and_prepare_voice_clone_audio(voice_clone_audio_file)
-        if prepared_audio_path is None:
-            raise gr.Error(audio_error)
-        if was_audio_converted:
-            format_display_name = get_format_display_name(original_audio_format)
-            gr.Warning(f"Audio converted from {format_display_name} to WAV format for voice cloning.")
-    with generation_state_lock:
-        if global_state.is_currently_generating:
-            raise gr.Error("A generation is already in progress. Please wait.")
-        global_state.is_currently_generating = True
-        global_state.stop_generation_requested = False
-    acquire_generation_protection()
-    generated_audio_tensor = None
-    cloned_voice_state_tensor = None
-    try:
-        perform_memory_cleanup()
-        loaded_model = text_to_speech_manager.load_or_get_model(
-            model_variant,
-            temperature,
-            lsd_decode_steps,
-            noise_clamp,
-            eos_threshold
-        )
-        if loaded_model is None:
-            raise gr.Error("Failed to load TTS model. Please try again.")
-        with generation_state_lock:
-            if global_state.stop_generation_requested:
-                return None
-        if voice_mode_selection == VOICE_MODE_CLONE:
-            cloned_voice_state_tensor = text_to_speech_manager.get_voice_state_for_clone(
-                voice_clone_audio_file,
-                prepared_audio_path=prepared_audio_path
-            )
-            voice_state = cloned_voice_state_tensor
-        else:
-            voice_state = text_to_speech_manager.get_voice_state_for_preset(voice_preset_selection)
-        with generation_state_lock:
-            if global_state.stop_generation_requested:
-                return None
-        generated_audio_tensor = text_to_speech_manager.generate_audio(
-            validation_result,
-            voice_state,
-            frames_after_eos,
-            enable_custom_frames
-        )
-        with generation_state_lock:
-            if global_state.stop_generation_requested:
-                return None
-        output_file_path = text_to_speech_manager.save_audio_to_file(generated_audio_tensor)
-        return output_file_path
-    except gr.Error:
-        raise
-    except ModelNotLoadedError as model_not_loaded_error:
-        raise gr.Error(str(model_not_loaded_error))
-    except ModelLoadingError as model_loading_error:
-        raise gr.Error(f"Failed to load TTS model: {str(model_loading_error)}")
-    except RuntimeError as runtime_error:
-        error_message = str(runtime_error)
-        if "not loaded" in error_message.lower():
-            if text_to_speech_manager.ensure_model_loaded():
-                raise gr.Error("Model was temporarily unavailable. Please try again.")
-            else:
-                raise gr.Error("TTS model could not be loaded. Please try again later.")
-        raise gr.Error(error_message)
-    except Exception as generation_error:
-        error_message = str(generation_error)
-        if "file does not start with RIFF id" in error_message:
-            raise gr.Error("The audio file format is not supported. Please upload a valid WAV file or a common audio format (MP3, FLAC, OGG, M4A).")
-        if "unknown format" in error_message.lower():
-            raise gr.Error("The audio file uses an unsupported encoding format. Please convert it to a standard format and try again.")
-        raise gr.Error(f"Speech generation failed: {error_message}")
-    finally:
-        release_generation_protection()
-        with generation_state_lock:
-            global_state.is_currently_generating = False
-            global_state.stop_generation_requested = False
-        if generated_audio_tensor is not None:
-            try:
-                del generated_audio_tensor
-            except Exception:
-                pass
-            generated_audio_tensor = None
-        if cloned_voice_state_tensor is not None:
-            try:
-                del cloned_voice_state_tensor
-            except Exception:
-                pass
-            cloned_voice_state_tensor = None
-        memory_cleanup()
-        trigger_background_cleanup_check()

src/tts/manager.py DELETED Viewed

@@ -1,341 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import time
-import tempfile
-import threading
-import torch
-import scipy.io.wavfile
-from pocket_tts import TTSModel
-from config import (
-    AVAILABLE_VOICES,
-    DEFAULT_VOICE,
-    DEFAULT_MODEL_VARIANT,
-    DEFAULT_TEMPERATURE,
-    DEFAULT_LSD_DECODE_STEPS,
-    DEFAULT_EOS_THRESHOLD,
-    VOICE_STATE_CACHE_MAXIMUM_SIZE,
-    VOICE_STATE_CACHE_CLEANUP_THRESHOLD,
-    MODEL_LOAD_RETRY_ATTEMPTS
-)
-from ..core.state import (
-    temporary_files_registry,
-    temporary_files_lock,
-    set_text_to_speech_manager,
-    increment_model_usage,
-    decrement_model_usage,
-    is_model_in_use
-)
-from ..core.memory import (
-    force_garbage_collection,
-    memory_cleanup,
-    perform_memory_cleanup,
-    trigger_background_cleanup_check,
-    is_memory_usage_approaching_limit
-)
-class ModelNotLoadedError(RuntimeError):
-    pass
-class ModelLoadingError(RuntimeError):
-    pass
-class TextToSpeechManager:
-    def __init__(self):
-        self.loaded_model = None
-        self.current_configuration = {}
-        self.voice_state_cache = {}
-        self.voice_state_cache_access_timestamps = {}
-        self.voice_state_cache_lock = threading.Lock()
-        self.model_lock = threading.RLock()
-        self.model_loading_in_progress = False
-        self.last_successful_configuration = None
-    def is_model_loaded(self):
-        with self.model_lock:
-            return self.loaded_model is not None
-    def is_model_loading(self):
-        with self.model_lock:
-            return self.model_loading_in_progress
-    def _clear_voice_state_cache_internal(self):
-        with self.voice_state_cache_lock:
-            for voice_name in list(self.voice_state_cache.keys()):
-                voice_state_tensor = self.voice_state_cache.pop(voice_name, None)
-                if voice_state_tensor is not None:
-                    try:
-                        del voice_state_tensor
-                    except Exception:
-                        pass
-            self.voice_state_cache.clear()
-            self.voice_state_cache_access_timestamps.clear()
-    def load_or_get_model(
-        self,
-        model_variant,
-        temperature,
-        lsd_decode_steps,
-        noise_clamp,
-        eos_threshold
-    ):
-        processed_variant = str(model_variant or DEFAULT_MODEL_VARIANT).strip()
-        processed_temperature = float(temperature) if temperature is not None else DEFAULT_TEMPERATURE
-        processed_lsd_steps = int(lsd_decode_steps) if lsd_decode_steps is not None else DEFAULT_LSD_DECODE_STEPS
-        processed_noise_clamp = float(noise_clamp) if noise_clamp and float(noise_clamp) > 0 else None
-        processed_eos_threshold = float(eos_threshold) if eos_threshold is not None else DEFAULT_EOS_THRESHOLD
-        requested_configuration = {
-            "variant": processed_variant,
-            "temp": processed_temperature,
-            "lsd_decode_steps": processed_lsd_steps,
-            "noise_clamp": processed_noise_clamp,
-            "eos_threshold": processed_eos_threshold
-        }
-        with self.model_lock:
-            if self.loaded_model is not None and self.current_configuration == requested_configuration:
-                return self.loaded_model
-            return self._load_model_with_retry(requested_configuration)
-    def _load_model_with_retry(self, requested_configuration):
-        last_exception = None
-        for attempt_number in range(MODEL_LOAD_RETRY_ATTEMPTS):
-            try:
-                self.model_loading_in_progress = True
-                if self.loaded_model is not None:
-                    self._clear_voice_state_cache_internal()
-                    try:
-                        del self.loaded_model
-                    except Exception:
-                        pass
-                    self.loaded_model = None
-                    memory_cleanup()
-                perform_memory_cleanup()
-                self.loaded_model = TTSModel.load_model(**requested_configuration)
-                if self.loaded_model is None:
-                    raise ModelLoadingError("TTSModel.load_model returned None")
-                self.current_configuration = requested_configuration.copy()
-                self.last_successful_configuration = requested_configuration.copy()
-                self.voice_state_cache = {}
-                self.model_loading_in_progress = False
-                return self.loaded_model
-            except Exception as loading_exception:
-                last_exception = loading_exception
-                self.loaded_model = None
-                self.model_loading_in_progress = False
-                if attempt_number < MODEL_LOAD_RETRY_ATTEMPTS - 1:
-                    memory_cleanup()
-        self.model_loading_in_progress = False
-        raise ModelLoadingError(f"Failed to load TTS model after {MODEL_LOAD_RETRY_ATTEMPTS} attempts: {str(last_exception)}")
-    def ensure_model_loaded(self):
-        with self.model_lock:
-            if self.loaded_model is not None:
-                return True
-            if self.last_successful_configuration is not None:
-                try:
-                    self._load_model_with_retry(self.last_successful_configuration)
-                    return self.loaded_model is not None
-                except Exception:
-                    pass
-            default_configuration = {
-                "variant": DEFAULT_MODEL_VARIANT,
-                "temp": DEFAULT_TEMPERATURE,
-                "lsd_decode_steps": DEFAULT_LSD_DECODE_STEPS,
-                "noise_clamp": None,
-                "eos_threshold": DEFAULT_EOS_THRESHOLD
-            }
-            try:
-                self._load_model_with_retry(default_configuration)
-                return self.loaded_model is not None
-            except Exception:
-                return False
-    def clear_voice_state_cache_completely(self):
-        with self.model_lock:
-            self._clear_voice_state_cache_internal()
-        force_garbage_collection()
-    def evict_least_recently_used_voice_states(self):
-        with self.voice_state_cache_lock:
-            if len(self.voice_state_cache) == 0:
-                return
-            if len(self.voice_state_cache) <= VOICE_STATE_CACHE_CLEANUP_THRESHOLD:
-                sorted_voice_names_by_access_time = sorted(
-                    self.voice_state_cache_access_timestamps.keys(),
-                    key=lambda voice_name: self.voice_state_cache_access_timestamps.get(voice_name, 0)
-                )
-                number_of_entries_to_remove = max(1, len(self.voice_state_cache) // 2)
-                for index in range(min(number_of_entries_to_remove, len(sorted_voice_names_by_access_time))):
-                    voice_name_to_remove = sorted_voice_names_by_access_time[index]
-                    voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
-                    self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
-                    if voice_state_tensor is not None:
-                        try:
-                            del voice_state_tensor
-                        except Exception:
-                            pass
-                force_garbage_collection()
-                return
-            sorted_voice_names_by_access_time = sorted(
-                self.voice_state_cache_access_timestamps.keys(),
-                key=lambda voice_name: self.voice_state_cache_access_timestamps.get(voice_name, 0)
-            )
-            number_of_entries_to_remove = len(self.voice_state_cache) - VOICE_STATE_CACHE_CLEANUP_THRESHOLD
-            for index in range(number_of_entries_to_remove):
-                if index >= len(sorted_voice_names_by_access_time):
-                    break
-                voice_name_to_remove = sorted_voice_names_by_access_time[index]
-                voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
-                self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
-                if voice_state_tensor is not None:
-                    try:
-                        del voice_state_tensor
-                    except Exception:
-                        pass
-        force_garbage_collection()
-    def get_voice_state_for_preset(self, voice_name):
-        validated_voice = voice_name if voice_name in AVAILABLE_VOICES else DEFAULT_VOICE
-        with self.voice_state_cache_lock:
-            if validated_voice in self.voice_state_cache:
-                self.voice_state_cache_access_timestamps[validated_voice] = time.time()
-                cached_state = self.voice_state_cache[validated_voice]
-                return cached_state
-        if is_memory_usage_approaching_limit():
-            self.evict_least_recently_used_voice_states()
-        if len(self.voice_state_cache) >= VOICE_STATE_CACHE_MAXIMUM_SIZE:
-            self.evict_least_recently_used_voice_states()
-        increment_model_usage()
-        try:
-            with self.model_lock:
-                if self.loaded_model is None:
-                    if not self.ensure_model_loaded():
-                        raise ModelNotLoadedError("TTS model is not loaded and could not be reloaded. Please try again.")
-                computed_voice_state = self.loaded_model.get_state_for_audio_prompt(
-                    audio_conditioning=validated_voice,
-                    truncate=False
-                )
-            with self.voice_state_cache_lock:
-                self.voice_state_cache[validated_voice] = computed_voice_state
-                self.voice_state_cache_access_timestamps[validated_voice] = time.time()
-            return computed_voice_state
-        finally:
-            decrement_model_usage()
-    def get_voice_state_for_clone(self, audio_file_path, prepared_audio_path=None):
-        audio_path_to_use = prepared_audio_path if prepared_audio_path is not None else audio_file_path
-        increment_model_usage()
-        try:
-            with self.model_lock:
-                if self.loaded_model is None:
-                    if not self.ensure_model_loaded():
-                        raise ModelNotLoadedError("TTS model is not loaded and could not be reloaded. Please try again.")
-                cloned_voice_state = self.loaded_model.get_state_for_audio_prompt(
-                    audio_conditioning=audio_path_to_use,
-                    truncate=False
-                )
-            return cloned_voice_state
-        finally:
-            decrement_model_usage()
-    def generate_audio(self, text_content, voice_state, frames_after_eos, enable_custom_frames):
-        increment_model_usage()
-        try:
-            with self.model_lock:
-                if self.loaded_model is None:
-                    if not self.ensure_model_loaded():
-                        raise ModelNotLoadedError("TTS model is not loaded and could not be reloaded. Please try again.")
-                processed_frames = int(frames_after_eos) if enable_custom_frames else None
-                generated_audio = self.loaded_model.generate_audio(
-                    model_state=voice_state,
-                    text_to_generate=text_content,
-                    frames_after_eos=processed_frames,
-                    copy_state=True
-                )
-            force_garbage_collection()
-            return generated_audio
-        finally:
-            decrement_model_usage()
-    def save_audio_to_file(self, audio_tensor):
-        with self.model_lock:
-            if self.loaded_model is None:
-                raise ModelNotLoadedError("TTS model is not loaded. Cannot determine sample rate.")
-            audio_sample_rate = self.loaded_model.sample_rate
-        audio_numpy_data = audio_tensor.numpy()
-        output_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-        scipy.io.wavfile.write(output_file.name, audio_sample_rate, audio_numpy_data)
-        with temporary_files_lock:
-            temporary_files_registry[output_file.name] = time.time()
-        trigger_background_cleanup_check()
-        return output_file.name
-text_to_speech_manager = TextToSpeechManager()
-set_text_to_speech_manager(text_to_speech_manager)

src/ui/handlers.py DELETED Viewed

@@ -1,58 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import gradio as gr
-from config import VOICE_MODE_PRESET, DEFAULT_VOICE
-from ..validation.text import validate_text_input
-def switch_to_generating_state(ui_state):
-    new_state = {"generating": True}
-    return (
-        gr.update(visible=False),
-        gr.update(visible=True, interactive=True),
-        gr.update(visible=False),
-        new_state
-    )
-def switch_to_idle_state(text_content, ui_state):
-    new_state = {"generating": False}
-    has_text_content = bool(text_content and text_content.strip())
-    should_show_clear = has_text_content
-    is_valid_text, _ = validate_text_input(text_content)
-    return (
-        gr.update(visible=True, interactive=is_valid_text),
-        gr.update(visible=False),
-        gr.update(visible=should_show_clear),
-        new_state
-    )
-def perform_clear_action():
-    return (
-        "",
-        None,
-        gr.update(visible=False),
-        VOICE_MODE_PRESET,
-        DEFAULT_VOICE,
-        None
-    )
-def create_example_handler(example_text, example_voice):
-    def set_example_values():
-        return example_text, VOICE_MODE_PRESET, example_voice
-    return set_example_values
-def format_example_button_label(example_text, example_voice, max_text_length=40):
-    truncated_text = (
-        example_text[:max_text_length] + "..."
-        if len(example_text) > max_text_length
-        else example_text
-    )
-    return f"[{example_voice}] {truncated_text}"

src/ui/state.py DELETED Viewed

@@ -1,43 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import gradio as gr
-from config import MAXIMUM_INPUT_LENGTH, VOICE_MODE_CLONE
-from ..validation.text import validate_text_input
-def check_generate_button_state(text_content, ui_state):
-    if ui_state.get("generating", False):
-        return gr.update(interactive=False)
-    is_valid, _ = validate_text_input(text_content)
-    return gr.update(interactive=is_valid)
-def calculate_character_count_display(text_content):
-    character_count = len(text_content) if text_content else 0
-    display_color = (
-        "var(--error-text-color)"
-        if character_count > MAXIMUM_INPUT_LENGTH
-        else "var(--body-text-color-subdued)"
-    )
-    return f"<div style='text-align: right; padding: 4px 0;'><span style='color: {display_color}; font-size: 0.85em;'>{character_count} / {MAXIMUM_INPUT_LENGTH}</span></div>"
-def determine_clear_button_visibility(text_content, ui_state):
-    if ui_state.get("generating", False):
-        return gr.update(visible=False)
-    has_text_content = bool(text_content and text_content.strip())
-    should_show_clear = has_text_content
-    return gr.update(visible=should_show_clear)
-def update_voice_mode_visibility(voice_mode_value):
-    if voice_mode_value == VOICE_MODE_CLONE:
-        return gr.update(visible=False), gr.update(visible=True)
-    else:
-        return gr.update(visible=True), gr.update(visible=False)

src/validation/text.py DELETED Viewed

@@ -1,20 +0,0 @@
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-from config import MAXIMUM_INPUT_LENGTH
-def validate_text_input(text_content):
-    if not text_content or not isinstance(text_content, str):
-        return False, ""
-    cleaned_text = text_content.strip()
-    if not cleaned_text:
-        return False, ""
-    if len(cleaned_text) > MAXIMUM_INPUT_LENGTH:
-        return False, f"Input exceeds maximum length of {MAXIMUM_INPUT_LENGTH} characters."
-    return True, cleaned_text