MoTTS

Running on Zero

App Files Files Community

Mo2294 commited on Dec 16, 2025

Commit

9d5ea3d

verified ·

1 Parent(s): 24b3a19

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -100

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ import threading
 import time
 from pathlib import Path
 import tempfile
 # Set environment variables for HF Spaces
 os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
@@ -40,6 +41,24 @@ MAX_COMBINED_DURATION = 30 * 60  # 30 minutes in seconds
 PAUSE_DURATION = 3.0  # 3 seconds pause between audios
 def download_models():
     """Download models if they don't exist"""
     checkpoints_dir = "./checkpoints"
@@ -150,16 +169,15 @@ def create_combined_audios(audio_files_info):
             # 1.5 Sekunden Intro-Stille vor der ersten Audio
             silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
             audio_out.append(silence_intro)
             for i, fp in enumerate(current_files):
-                data, _ = sf.read(fp, dtype='int16')
                 audio_out.append(data)
                 # Zwischen Affirmationen 3 Sekunden Pause
                 if i < len(current_files) - 1:
                     audio_out.append(silence_3s)
             final_audio = np.concatenate(audio_out)
             sf.write(combined_name, final_audio, sr, subtype="PCM_16")
@@ -194,16 +212,15 @@ def create_combined_audios(audio_files_info):
         # 1.5 Sekunden Intro-Stille vor der ersten Audio
         silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
         audio_out.append(silence_intro)
         for i, fp in enumerate(current_files):
-            data, _ = sf.read(fp, dtype='int16')
             audio_out.append(data)
             # Zwischen Affirmationen 3 Sekunden Pause
             if i < len(current_files) - 1:
                 audio_out.append(silence_3s)
         final_audio = np.concatenate(audio_out)
         sf.write(combined_name, final_audio, sr, subtype="PCM_16")
@@ -308,37 +325,48 @@ def auto_process_dataset():
                     current_status = f"No sentences found in {txt_name}"
                     continue
-                current_status = (
-                    f"Found {len(sentences)} sentences in {txt_name}"
-                )
                 print(f"Processing sentences from {txt_name}:")
                 temp_files = []
-                audio_files_info = []  # Store (filepath, duration) tuples
                 commit_operations = []
                 # Process each sentence
                 for idx, sentence in enumerate(sentences):
                     if not auto_process_running:
                         break
                     current_status = (
-                        f"Processing {txt_name}: sentence "
-                        f"{idx + 1}/{len(sentences)}"
                     )
                     try:
-                        if not sentence:  # Skip empty sentences
                             continue
                         # Add a period at the end if missing (helps with TTS prosody)
-                        if sentence[-1] not in ".!?":
-                            sentence = sentence + "."
-                        print(f"  Sentence {idx+1}: '{sentence}'")
                         # Generate audio using IndexTTS2
-                        output_filename = f"temp_{txt_name}_{idx+1:03d}.wav"
                         # Capture stdout to get audio duration
                         import io
@@ -348,9 +376,9 @@ def auto_process_dataset():
                         with redirect_stdout(buf):
                             tts_model.infer(
                                 spk_audio_prompt=reference_voice_path,
-                                text=sentence,
                                 output_path=output_filename,
-                                verbose=True,  # Enable verbose to get duration
                             )
                         # Parse duration from output
@@ -363,21 +391,16 @@ def auto_process_dataset():
                                 break
                         if duration is None:
-                            # Fallback: read the file to get duration
                             audio_data, sr = sf.read(output_filename)
                             duration = len(audio_data) / sr
                         print(f"    Generated audio: {duration:.2f} seconds")
-                        # Store file info for combined audio
                         audio_files_info.append((output_filename, duration))
                         temp_files.append(output_filename)
-                        # Prepare upload operation for individual file
-                        output_path = (
-                            f"Affirmations/{txt_name}/"
-                            f"{txt_name}_{idx+1:03d}.wav"
-                        )
                         commit_operations.append(
                             CommitOperationAdd(
                                 path_in_repo=output_path,
@@ -386,87 +409,35 @@ def auto_process_dataset():
                         )
                     except Exception as e:
-                        current_status = (
-                            f"Error generating audio for sentence {idx+1}: {e}"
-                        )
                         print(f"Generation error: {e}")
                         continue
-                # Create combined audio file(s)
-                if audio_files_info and auto_process_running:
-                    current_status = (
-                        f"Creating combined audio(s) for {txt_name}..."
-                    )
-                    combined_files = create_combined_audios(audio_files_info)
-                    # Add combined files to upload operations
-                    for i, (combined_file, duration) in enumerate(
-                        combined_files
-                    ):
-                        if len(combined_files) == 1:
-                            combined_path = (
-                                f"Affirmations/{txt_name}/"
-                                f"{txt_name}_combined.wav"
-                            )
-                        else:
-                            combined_path = (
-                                f"Affirmations/{txt_name}/"
-                                f"{txt_name}_combined_{i+1:03d}.wav"
-                            )
-                        commit_operations.append(
-                            CommitOperationAdd(
-                                path_in_repo=combined_path,
-                                path_or_fileobj=combined_file,
-                            )
-                        )
-                        temp_files.append(combined_file)
-                        duration_min = int(duration // 60)
-                        duration_sec = int(duration % 60)
-                        print(
-                            f"  Combined file {i+1}: "
-                            f"{duration_min}:{duration_sec:02d}"
-                        )
                 # Upload all generated files
                 if commit_operations and auto_process_running:
-                    total_individual = len(audio_files_info)
-                    total_combined = (
-                        len(combined_files) if audio_files_info else 0
-                    )
-                    current_status = (
-                        f"Uploading {total_individual} individual + "
-                        f"{total_combined} combined files for {txt_name}..."
-                    )
                     try:
                         api.create_commit(
                             repo_id=output_dataset_id,
                             repo_type="dataset",
                             operations=commit_operations,
-                            commit_message=(
-                                f"Add audio files for {txt_name} - "
-                                f"{total_individual} individual + "
-                                f"{total_combined} combined"
-                            ),
                             token=token,
                         )
-                        current_status = (
-                            f"Successfully uploaded files for {txt_name}"
-                        )
                         # Move TXT file to /done folder
-                        current_status = (
-                            f"Moving {txt_name}.txt to /done folder..."
-                        )
-                        # Read file content
                         with open(txt_path, "rb") as f:
                             file_content = f.read()
-                        # Create operations to move file
                         move_operations = [
                             CommitOperationAdd(
                                 path_in_repo=f"done/{txt_file}",
@@ -479,22 +450,14 @@ def auto_process_dataset():
                             repo_id=input_dataset_id,
                             repo_type="dataset",
                             operations=move_operations,
-                            commit_message=(
-                                f"Move {txt_name}.txt to /done after processing"
-                            ),
                             token=token,
                         )
-                        current_status = (
-                            f"✅ Completed {txt_name}: "
-                            f"{total_individual} individual + "
-                            f"{total_combined} combined audio files"
-                        )
                     except Exception as e:
-                        current_status = (
-                            f"Upload/Move error for {txt_name}: {e}"
-                        )
                         print(f"Error: {e}")
                 # Cleanup temporary files
@@ -505,7 +468,7 @@ def auto_process_dataset():
                     except Exception:
                         pass
-                time.sleep(2)  # Small delay between files
             except Exception as e:
                 current_status = f"Error processing {txt_name}: {e}"

 import time
 from pathlib import Path
 import tempfile
+import re
 # Set environment variables for HF Spaces
 os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
 PAUSE_DURATION = 3.0  # 3 seconds pause between audios
+def sanitize_filename(text: str, max_len: int = 120) -> str:
+    """
+    Make a safe filename from affirmation text:
+    - spaces -> underscores
+    - remove special chars
+    - collapse multiple underscores
+    - lowercase
+    - limit length
+    """
+    text = text.strip().lower()
+    text = text.replace(" ", "_")
+    text = re.sub(r"[^a-z0-9_]", "", text)
+    text = re.sub(r"_+", "_", text).strip("_")
+    if not text:
+        text = "affirmation"
+    return text[:max_len]
 def download_models():
     """Download models if they don't exist"""
     checkpoints_dir = "./checkpoints"
             # 1.5 Sekunden Intro-Stille vor der ersten Audio
             silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
             audio_out.append(silence_intro)
             for i, fp in enumerate(current_files):
+                data, _ = sf.read(fp, dtype="int16")
                 audio_out.append(data)
                 # Zwischen Affirmationen 3 Sekunden Pause
                 if i < len(current_files) - 1:
                     audio_out.append(silence_3s)
             final_audio = np.concatenate(audio_out)
             sf.write(combined_name, final_audio, sr, subtype="PCM_16")
         # 1.5 Sekunden Intro-Stille vor der ersten Audio
         silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
         audio_out.append(silence_intro)
         for i, fp in enumerate(current_files):
+            data, _ = sf.read(fp, dtype="int16")
             audio_out.append(data)
             # Zwischen Affirmationen 3 Sekunden Pause
             if i < len(current_files) - 1:
                 audio_out.append(silence_3s)
         final_audio = np.concatenate(audio_out)
         sf.write(combined_name, final_audio, sr, subtype="PCM_16")
                     current_status = f"No sentences found in {txt_name}"
                     continue
+                current_status = f"Found {len(sentences)} sentences in {txt_name}"
                 print(f"Processing sentences from {txt_name}:")
                 temp_files = []
+                audio_files_info = []  # still used for durations/logging, not for combining
                 commit_operations = []
+                # Track used filenames to avoid duplicates within same TXT
+                used_names = set()
                 # Process each sentence
                 for idx, sentence in enumerate(sentences):
                     if not auto_process_running:
                         break
                     current_status = (
+                        f"Processing {txt_name}: sentence {idx + 1}/{len(sentences)}"
                     )
                     try:
+                        if not sentence:
                             continue
+                        # Filename should be the affirmation text (before adding punctuation)
+                        base_name = sanitize_filename(sentence)
+                        if base_name in used_names:
+                            # avoid overwriting if identical sentence appears multiple times
+                            suffix = 2
+                            while f"{base_name}_{suffix}" in used_names:
+                                suffix += 1
+                            base_name = f"{base_name}_{suffix}"
+                        used_names.add(base_name)
                         # Add a period at the end if missing (helps with TTS prosody)
+                        tts_sentence = sentence
+                        if tts_sentence[-1] not in ".!?":
+                            tts_sentence = tts_sentence + "."
+                        print(f"  Sentence {idx+1}: '{tts_sentence}'")
                         # Generate audio using IndexTTS2
+                        output_filename = f"temp_{base_name}.wav"
                         # Capture stdout to get audio duration
                         import io
                         with redirect_stdout(buf):
                             tts_model.infer(
                                 spk_audio_prompt=reference_voice_path,
+                                text=tts_sentence,
                                 output_path=output_filename,
+                                verbose=True,
                             )
                         # Parse duration from output
                                 break
                         if duration is None:
                             audio_data, sr = sf.read(output_filename)
                             duration = len(audio_data) / sr
                         print(f"    Generated audio: {duration:.2f} seconds")
                         audio_files_info.append((output_filename, duration))
                         temp_files.append(output_filename)
+                        # Upload path: use affirmation name, no numbering
+                        output_path = f"Affirmations/{txt_name}/{base_name}.wav"
                         commit_operations.append(
                             CommitOperationAdd(
                                 path_in_repo=output_path,
                         )
                     except Exception as e:
+                        current_status = f"Error generating audio for sentence {idx+1}: {e}"
                         print(f"Generation error: {e}")
                         continue
+                # ✅ NO MORE COMBINED AUDIO CREATION HERE
+                # (combined generation removed/disabled as requested)
                 # Upload all generated files
                 if commit_operations and auto_process_running:
+                    total_individual = len(commit_operations)
+                    current_status = f"Uploading {total_individual} audio files for {txt_name}..."
                     try:
                         api.create_commit(
                             repo_id=output_dataset_id,
                             repo_type="dataset",
                             operations=commit_operations,
+                            commit_message=f"Add {total_individual} audio files for {txt_name}",
                             token=token,
                         )
+                        current_status = f"Successfully uploaded files for {txt_name}"
                         # Move TXT file to /done folder
+                        current_status = f"Moving {txt_name}.txt to /done folder..."
                         with open(txt_path, "rb") as f:
                             file_content = f.read()
                         move_operations = [
                             CommitOperationAdd(
                                 path_in_repo=f"done/{txt_file}",
                             repo_id=input_dataset_id,
                             repo_type="dataset",
                             operations=move_operations,
+                            commit_message=f"Move {txt_name}.txt to /done after processing",
                             token=token,
                         )
+                        current_status = f"✅ Completed {txt_name}: {total_individual} audio files"
                     except Exception as e:
+                        current_status = f"Upload/Move error for {txt_name}: {e}"
                         print(f"Error: {e}")
                 # Cleanup temporary files
                     except Exception:
                         pass
+                time.sleep(2)
             except Exception as e:
                 current_status = f"Error processing {txt_name}: {e}"