tts

Sleeping

App Files Files Community

hadadrjt commited on Jan 15

Commit

d24817e

1 Parent(s): 4388640

Pocket TTS: Enforce resource management.

Browse files

Files changed (1) hide show

app.py +408 -30

app.py CHANGED Viewed

@@ -164,25 +164,330 @@ import gc
 import atexit
 BACKGROUND_CLEANUP_INTERVAL = 300
-VOICE_STATE_CACHE_MAXIMUM_SIZE = 16
-VOICE_STATE_CACHE_CLEANUP_THRESHOLD = 12
 background_cleanup_thread = None
 background_cleanup_stop_event = threading.Event()
 def perform_background_cleanup_cycle():
     while not background_cleanup_stop_event.is_set():
-        background_cleanup_stop_event.wait(timeout=BACKGROUND_CLEANUP_INTERVAL)
-        if not background_cleanup_stop_event.is_set():
             cleanup_expired_temporary_files()
-            force_garbage_collection()
 def start_background_cleanup_thread():
     global background_cleanup_thread
     if background_cleanup_thread is None or not background_cleanup_thread.is_alive():
         background_cleanup_stop_event.clear()
         background_cleanup_thread = threading.Thread(
             target=perform_background_cleanup_cycle,
@@ -194,16 +499,11 @@ def start_background_cleanup_thread():
 def stop_background_cleanup_thread():
     background_cleanup_stop_event.set()
     if background_cleanup_thread is not None and background_cleanup_thread.is_alive():
         background_cleanup_thread.join(timeout=5)
-def force_garbage_collection():
-    gc.collect()
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
 atexit.register(stop_background_cleanup_thread)
 # =============================================================================
@@ -243,6 +543,8 @@ def convert_audio_to_pcm_wav(input_path):
         with temporary_files_lock:
             temporary_files_registry[output_file.name] = time.time()
         return output_file.name
     except Exception as conversion_error:
@@ -285,6 +587,23 @@ class TextToSpeechManager:
         self.voice_state_cache_access_timestamps = {}
         self.voice_state_cache_lock = threading.Lock()
     def load_or_get_model(
         self,
@@ -311,6 +630,8 @@ class TextToSpeechManager:
         Returns:
             TTSModel: Loaded and configured TTS model instance
         """
         # Process and validate input parameters with defaults
         processed_variant = str(model_variant or DEFAULT_MODEL_VARIANT).strip()
         processed_temperature = float(temperature) if temperature is not None else DEFAULT_TEMPERATURE
@@ -327,15 +648,20 @@ class TextToSpeechManager:
             "eos_threshold": processed_eos_threshold
         }
-        # Load new model if configuration changed or no model loaded
-        if self.loaded_model is None or self.current_configuration != requested_configuration:
-            self.clear_voice_state_cache_completely()
-            self.loaded_model = TTSModel.load_model(**requested_configuration)
-            self.current_configuration = requested_configuration
-            self.voice_state_cache = {}  # Clear cache on model change
-        return self.loaded_model
     def clear_voice_state_cache_completely(self):
         with self.voice_state_cache_lock:
@@ -355,6 +681,23 @@ class TextToSpeechManager:
         with self.voice_state_cache_lock:
             if len(self.voice_state_cache) <= VOICE_STATE_CACHE_CLEANUP_THRESHOLD:
                 return
             sorted_voice_names_by_access_time = sorted(
@@ -395,9 +738,16 @@ class TextToSpeechManager:
                 self.voice_state_cache_access_timestamps[validated_voice] = time.time()
                 return self.voice_state_cache[validated_voice]
         if len(self.voice_state_cache) >= VOICE_STATE_CACHE_MAXIMUM_SIZE:
             self.evict_least_recently_used_voice_states()
         # Compute and cache voice state if not already cached
         if validated_voice not in self.voice_state_cache:
@@ -426,6 +776,9 @@ class TextToSpeechManager:
         Returns:
             Voice state tensor extracted from the audio file
         """
         converted_audio_path = convert_audio_to_pcm_wav(audio_file_path)
@@ -447,15 +800,23 @@ class TextToSpeechManager:
         Returns:
             torch.Tensor: Generated audio waveform
         """
-        # Apply custom frames setting if enabled
-        processed_frames = int(frames_after_eos) if enable_custom_frames else None
-        return self.loaded_model.generate_audio(
-            model_state=voice_state,
-            text_to_generate=text_content,
-            frames_after_eos=processed_frames,
-            copy_state=True
-        )
     def save_audio_to_file(self, audio_tensor):
         """
@@ -470,9 +831,14 @@ class TextToSpeechManager:
         Returns:
             str: Path to the saved temporary WAV file
         """
         # Convert tensor to numpy array for scipy
         audio_numpy_data = audio_tensor.numpy()
-        audio_sample_rate = self.loaded_model.sample_rate
         # Create temporary file and write audio data
         output_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
@@ -482,6 +848,8 @@ class TextToSpeechManager:
         with temporary_files_lock:
             temporary_files_registry[output_file.name] = time.time()
         return output_file.name
@@ -625,7 +993,10 @@ def perform_speech_generation(
     global is_currently_generating, stop_generation_requested
     # Run cleanup before starting new generation
-    cleanup_expired_temporary_files()
     # Validate text input
     is_valid, validation_result = validate_text_input(text_input)
@@ -701,6 +1072,9 @@ def perform_speech_generation(
     except gr.Error:
         raise
     except Exception as generation_error:
         raise gr.Error(f"Speech generation failed: {str(generation_error)}")
@@ -712,11 +1086,15 @@ def perform_speech_generation(
         if generated_audio_tensor is not None:
             del generated_audio_tensor
         if cloned_voice_state_tensor is not None:
             del cloned_voice_state_tensor
-        force_garbage_collection()
 # =============================================================================

 import atexit
 BACKGROUND_CLEANUP_INTERVAL = 300
+VOICE_STATE_CACHE_MAXIMUM_SIZE = 8
+VOICE_STATE_CACHE_CLEANUP_THRESHOLD = 4
+MAXIMUM_MEMORY_USAGE = 1 * 1024 * 1024 * 1024
+MEMORY_WARNING_THRESHOLD = int(0.7 * MAXIMUM_MEMORY_USAGE)
+MEMORY_CRITICAL_THRESHOLD = int(0.85 * MAXIMUM_MEMORY_USAGE)
+MEMORY_CHECK_INTERVAL = 30
+MEMORY_IDLE_TARGET = int(0.5 * MAXIMUM_MEMORY_USAGE)
 background_cleanup_thread = None
 background_cleanup_stop_event = threading.Event()
+background_cleanup_trigger_event = threading.Event()
+memory_enforcement_lock = threading.Lock()
+text_to_speech_manager = None
+def get_current_memory_usage():
+    try:
+        with open('/proc/self/status', 'r') as status_file:
+            for line in status_file:
+                if line.startswith('VmRSS:'):
+                    memory_value_kb = int(line.split()[1])
+                    return memory_value_kb * 1024
+    except Exception:
+        pass
+    try:
+        with open('/proc/self/statm', 'r') as statm_file:
+            statm_values = statm_file.read().split()
+            resident_pages = int(statm_values[1])
+            page_size = os.sysconf('SC_PAGE_SIZE')
+            return resident_pages * page_size
+    except Exception:
+        pass
+    try:
+        import resource
+        memory_usage_kilobytes = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+        import platform
+        if platform.system() == "Darwin":
+            return memory_usage_kilobytes
+        else:
+            return memory_usage_kilobytes * 1024
+    except Exception:
+        pass
+    return 0
+def check_if_generation_is_currently_active():
+    with generation_state_lock:
+        return is_currently_generating
+def is_memory_usage_within_limit():
+    current_memory_usage = get_current_memory_usage()
+    return current_memory_usage < MAXIMUM_MEMORY_USAGE
+def is_memory_usage_approaching_limit():
+    current_memory_usage = get_current_memory_usage()
+    return current_memory_usage >= MEMORY_WARNING_THRESHOLD
+def is_memory_usage_critical():
+    current_memory_usage = get_current_memory_usage()
+    return current_memory_usage >= MEMORY_CRITICAL_THRESHOLD
+def is_memory_above_idle_target():
+    current_memory_usage = get_current_memory_usage()
+    return current_memory_usage > MEMORY_IDLE_TARGET
+def force_garbage_collection():
+    gc.collect(0)
+    gc.collect(1)
+    gc.collect(2)
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+def memory_cleanup():
+    force_garbage_collection()
+    try:
+        import ctypes
+        libc = ctypes.CDLL("libc.so.6")
+        libc.malloc_trim(0)
+    except Exception:
+        pass
+    force_garbage_collection()
+def perform_memory_cleanup():
+    global text_to_speech_manager
+    force_garbage_collection()
+    if text_to_speech_manager is not None:
+        text_to_speech_manager.evict_least_recently_used_voice_states()
+    memory_cleanup()
+def enforce_memory_limit_if_exceeded():
+    global text_to_speech_manager
+    with memory_enforcement_lock:
+        generation_is_active = check_if_generation_is_currently_active()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage < MEMORY_WARNING_THRESHOLD:
+            return True
+        force_garbage_collection()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage < MEMORY_WARNING_THRESHOLD:
+            return True
+        if text_to_speech_manager is not None:
+            text_to_speech_manager.evict_least_recently_used_voice_states()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage < MEMORY_CRITICAL_THRESHOLD:
+            return True
+        if text_to_speech_manager is not None:
+            text_to_speech_manager.clear_voice_state_cache_completely()
+        cleanup_all_temporary_files_immediately()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage < MAXIMUM_MEMORY_USAGE:
+            return True
+        if generation_is_active:
+            return current_memory_usage < MAXIMUM_MEMORY_USAGE
+        if text_to_speech_manager is not None:
+            text_to_speech_manager.unload_model_completely()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        return current_memory_usage < MAXIMUM_MEMORY_USAGE
+def perform_idle_memory_reduction():
+    global text_to_speech_manager
+    if check_if_generation_is_currently_active():
+        return
+    with memory_enforcement_lock:
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage <= MEMORY_IDLE_TARGET:
+            return
+        force_garbage_collection()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage <= MEMORY_IDLE_TARGET:
+            return
+        if check_if_generation_is_currently_active():
+            return
+        if text_to_speech_manager is not None:
+            text_to_speech_manager.evict_least_recently_used_voice_states()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage <= MEMORY_IDLE_TARGET:
+            return
+        if check_if_generation_is_currently_active():
+            return
+        if text_to_speech_manager is not None:
+            text_to_speech_manager.clear_voice_state_cache_completely()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage <= MEMORY_IDLE_TARGET:
+            return
+        if check_if_generation_is_currently_active():
+            return
+        if text_to_speech_manager is not None:
+            text_to_speech_manager.unload_model_completely()
+        memory_cleanup()
+def cleanup_all_temporary_files_immediately():
+    with temporary_files_lock:
+        for file_path in list(temporary_files_registry.keys()):
+            try:
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+                del temporary_files_registry[file_path]
+            except Exception:
+                pass
+def has_temporary_files_pending_cleanup():
+    with temporary_files_lock:
+        if len(temporary_files_registry) == 0:
+            return False
+        current_timestamp = time.time()
+        for file_path, creation_timestamp in temporary_files_registry.items():
+            if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
+                return True
+        return False
+def has_any_temporary_files_registered():
+    with temporary_files_lock:
+        return len(temporary_files_registry) > 0
+def calculate_time_until_next_file_expiration():
+    with temporary_files_lock:
+        if len(temporary_files_registry) == 0:
+            return None
+        current_timestamp = time.time()
+        minimum_time_until_expiration = None
+        for file_path, creation_timestamp in temporary_files_registry.items():
+            time_since_creation = current_timestamp - creation_timestamp
+            time_until_expiration = TEMPORARY_FILE_LIFETIME_SECONDS - time_since_creation
+            if time_until_expiration <= 0:
+                return 0
+            if minimum_time_until_expiration is None or time_until_expiration < minimum_time_until_expiration:
+                minimum_time_until_expiration = time_until_expiration
+        return minimum_time_until_expiration
 def perform_background_cleanup_cycle():
+    last_memory_check_timestamp = 0
     while not background_cleanup_stop_event.is_set():
+        time_until_next_expiration = calculate_time_until_next_file_expiration()
+        current_timestamp = time.time()
+        time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
+        if time_until_next_expiration is not None:
+            if time_until_next_expiration <= 0:
+                wait_duration = 1
+            else:
+                wait_duration = min(
+                    time_until_next_expiration + 1,
+                    MEMORY_CHECK_INTERVAL,
+                    BACKGROUND_CLEANUP_INTERVAL
+                )
+        else:
+            if is_memory_above_idle_target() and not check_if_generation_is_currently_active():
+                wait_duration = MEMORY_CHECK_INTERVAL
+            else:
+                background_cleanup_trigger_event.clear()
+                triggered = background_cleanup_trigger_event.wait(timeout=BACKGROUND_CLEANUP_INTERVAL)
+                if background_cleanup_stop_event.is_set():
+                    break
+                if triggered:
+                    continue
+                else:
+                    if not check_if_generation_is_currently_active():
+                        perform_idle_memory_reduction()
+                    continue
+        background_cleanup_stop_event.wait(timeout=wait_duration)
+        if background_cleanup_stop_event.is_set():
+            break
+        if has_temporary_files_pending_cleanup():
             cleanup_expired_temporary_files()
+        current_timestamp = time.time()
+        time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
+        if time_since_last_memory_check >= MEMORY_CHECK_INTERVAL:
+            if not check_if_generation_is_currently_active():
+                if is_memory_usage_critical():
+                    enforce_memory_limit_if_exceeded()
+                elif is_memory_above_idle_target():
+                    perform_idle_memory_reduction()
+            last_memory_check_timestamp = current_timestamp
+def trigger_background_cleanup_check():
+    background_cleanup_trigger_event.set()
 def start_background_cleanup_thread():
     global background_cleanup_thread
     if background_cleanup_thread is None or not background_cleanup_thread.is_alive():
         background_cleanup_stop_event.clear()
+        background_cleanup_trigger_event.clear()
         background_cleanup_thread = threading.Thread(
             target=perform_background_cleanup_cycle,
 def stop_background_cleanup_thread():
     background_cleanup_stop_event.set()
+    background_cleanup_trigger_event.set()
     if background_cleanup_thread is not None and background_cleanup_thread.is_alive():
         background_cleanup_thread.join(timeout=5)
 atexit.register(stop_background_cleanup_thread)
 # =============================================================================
         with temporary_files_lock:
             temporary_files_registry[output_file.name] = time.time()
+        trigger_background_cleanup_check()
         return output_file.name
     except Exception as conversion_error:
         self.voice_state_cache_access_timestamps = {}
         self.voice_state_cache_lock = threading.Lock()
+        self.model_lock = threading.Lock()
+    def is_model_loaded(self):
+        with self.model_lock:
+            return self.loaded_model is not None
+    def unload_model_completely(self):
+        with self.model_lock:
+            self.clear_voice_state_cache_completely()
+            if self.loaded_model is not None:
+                del self.loaded_model
+                self.loaded_model = None
+            self.current_configuration = {}
+        memory_cleanup()
     def load_or_get_model(
         self,
         Returns:
             TTSModel: Loaded and configured TTS model instance
         """
+        perform_memory_cleanup()
         # Process and validate input parameters with defaults
         processed_variant = str(model_variant or DEFAULT_MODEL_VARIANT).strip()
         processed_temperature = float(temperature) if temperature is not None else DEFAULT_TEMPERATURE
             "eos_threshold": processed_eos_threshold
         }
+        with self.model_lock:
+            # Load new model if configuration changed or no model loaded
+            if self.loaded_model is None or self.current_configuration != requested_configuration:
+                if self.loaded_model is not None:
+                    self.clear_voice_state_cache_completely()
+                    del self.loaded_model
+                    self.loaded_model = None
+                    memory_cleanup()
+                self.loaded_model = TTSModel.load_model(**requested_configuration)
+                self.current_configuration = requested_configuration
+                self.voice_state_cache = {}  # Clear cache on model change
+            return self.loaded_model
     def clear_voice_state_cache_completely(self):
         with self.voice_state_cache_lock:
         with self.voice_state_cache_lock:
             if len(self.voice_state_cache) <= VOICE_STATE_CACHE_CLEANUP_THRESHOLD:
+                if len(self.voice_state_cache) > 0:
+                    sorted_voice_names_by_access_time = sorted(
+                        self.voice_state_cache_access_timestamps.keys(),
+                        key=lambda voice_name: self.voice_state_cache_access_timestamps[voice_name]
+                    )
+                    number_of_entries_to_remove = max(1, len(self.voice_state_cache) // 2)
+                    for index in range(min(number_of_entries_to_remove, len(sorted_voice_names_by_access_time))):
+                        voice_name_to_remove = sorted_voice_names_by_access_time[index]
+                        voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
+                        self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
+                        if voice_state_tensor is not None:
+                            del voice_state_tensor
+                force_garbage_collection()
                 return
             sorted_voice_names_by_access_time = sorted(
                 self.voice_state_cache_access_timestamps[validated_voice] = time.time()
                 return self.voice_state_cache[validated_voice]
+        if is_memory_usage_approaching_limit():
+            self.evict_least_recently_used_voice_states()
         if len(self.voice_state_cache) >= VOICE_STATE_CACHE_MAXIMUM_SIZE:
             self.evict_least_recently_used_voice_states()
+        with self.model_lock:
+            if self.loaded_model is None:
+                raise RuntimeError("TTS model is not loaded. Please try again.")
         # Compute and cache voice state if not already cached
         if validated_voice not in self.voice_state_cache:
         Returns:
             Voice state tensor extracted from the audio file
         """
+        with self.model_lock:
+            if self.loaded_model is None:
+                raise RuntimeError("TTS model is not loaded. Please try again.")
         converted_audio_path = convert_audio_to_pcm_wav(audio_file_path)
         Returns:
             torch.Tensor: Generated audio waveform
         """
+        with self.model_lock:
+            if self.loaded_model is None:
+                raise RuntimeError("TTS model is not loaded. Please try again.")
+            # Apply custom frames setting if enabled
+            processed_frames = int(frames_after_eos) if enable_custom_frames else None
+            generated_audio = self.loaded_model.generate_audio(
+                model_state=voice_state,
+                text_to_generate=text_content,
+                frames_after_eos=processed_frames,
+                copy_state=True
+            )
+        force_garbage_collection()
+        return generated_audio
     def save_audio_to_file(self, audio_tensor):
         """
         Returns:
             str: Path to the saved temporary WAV file
         """
+        with self.model_lock:
+            if self.loaded_model is None:
+                raise RuntimeError("TTS model is not loaded. Cannot save audio.")
+            audio_sample_rate = self.loaded_model.sample_rate
         # Convert tensor to numpy array for scipy
         audio_numpy_data = audio_tensor.numpy()
         # Create temporary file and write audio data
         output_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
         with temporary_files_lock:
             temporary_files_registry[output_file.name] = time.time()
+        trigger_background_cleanup_check()
         return output_file.name
     global is_currently_generating, stop_generation_requested
     # Run cleanup before starting new generation
+    if has_temporary_files_pending_cleanup():
+        cleanup_expired_temporary_files()
+    perform_memory_cleanup()
     # Validate text input
     is_valid, validation_result = validate_text_input(text_input)
     except gr.Error:
         raise
+    except RuntimeError as runtime_error:
+        raise gr.Error(str(runtime_error))
     except Exception as generation_error:
         raise gr.Error(f"Speech generation failed: {str(generation_error)}")
         if generated_audio_tensor is not None:
             del generated_audio_tensor
+            generated_audio_tensor = None
         if cloned_voice_state_tensor is not None:
             del cloned_voice_state_tensor
+            cloned_voice_state_tensor = None
+        memory_cleanup()
+        trigger_background_cleanup_check()
 # =============================================================================