Spaces:

Aid3445
/

Good.KTTS

Paused

App Files Files Community

Aid3445 commited on Sep 8

Commit

acbd624

verified ·

1 Parent(s): 22c49fc

Create app.py

Browse files

Files changed (1) hide show

app.py +330 -0

app.py ADDED Viewed

	@@ -0,0 +1,330 @@

+import gradio as gr
+import os
+import tempfile
+import soundfile as sf
+from kittentts import KittenTTS
+import numpy as np
+import re
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import gc
+# Fix for OpenMP duplicate library error
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
+class KittenTTSGradio:
+    def __init__(self):
+        """Initialize the KittenTTS model and settings"""
+        self.model = None
+        self.available_voices = [
+            'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
+            'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
+        ]
+        self.max_workers = max(1, os.cpu_count() - 1) if os.cpu_count() else 2
+        self.load_model()
+    def load_model(self):
+        """Load the TTS model"""
+        try:
+            self.model = KittenTTS("KittenML/kitten-tts-mini-0.1")
+            print("Model loaded successfully")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            raise e
+    def split_into_sentences(self, text):
+        """Split text into sentences"""
+        # Clean the text
+        text = re.sub(r'\s+', ' ', text)
+        text = text.strip()
+        # Split by common sentence terminators
+        sentences = re.split(r'(?<=[.!?])\s+', text)
+        # Process each sentence
+        processed_sentences = []
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if sentence:
+                # Ensure proper punctuation
+                if not sentence.endswith(('.', '!', '?')):
+                    sentence += '.'
+                processed_sentences.append(sentence)
+        return processed_sentences
+    def clean_text_for_model(self, text):
+        """Clean text for the TTS model"""
+        if not text:
+            return "Hello."
+        # Remove problematic characters
+        text = re.sub(r'[^\w\s\.\,\!\?\;\:\-\'\"]', '', text)
+        # Normalize whitespace
+        text = re.sub(r'\s+', ' ', text)
+        text = text.strip()
+        # Ensure minimum length
+        if len(text) < 5:
+            text = "Hello."
+        return text
+    def safe_generate_audio(self, text, voice, speed):
+        """Generate audio with fallback strategies"""
+        # Try original text
+        try:
+            audio = self.model.generate(text, voice=voice, speed=speed)
+            return audio
+        except Exception as e:
+            print(f"Original attempt failed: {e}")
+        # Try cleaned text
+        try:
+            cleaned_text = self.clean_text_for_model(text)
+            audio = self.model.generate(cleaned_text, voice=voice, speed=speed)
+            return audio
+        except Exception as e:
+            print(f"Cleaned attempt failed: {e}")
+        # Try basic fallback
+        try:
+            words = text.split()[:5]
+            basic_text = ' '.join(words)
+            if not basic_text.endswith(('.', '!', '?')):
+                basic_text += '.'
+            audio = self.model.generate(basic_text or "Hello.", voice=voice, speed=speed)
+            return audio
+        except Exception as e:
+            print(f"Basic attempt failed: {e}")
+            raise Exception("All audio generation attempts failed")
+    def process_single_sentence(self, sentence, voice, speed):
+        """Process a single sentence"""
+        cleaned_sentence = self.clean_text_for_model(sentence)
+        audio = self.safe_generate_audio(cleaned_sentence, voice=voice, speed=speed)
+        return audio
+    def convert_text_to_speech(self, text, voice, speed, use_multithreading, progress=gr.Progress()):
+        """Main conversion function for Gradio"""
+        if not self.model:
+            raise gr.Error("Model not loaded. Please refresh the page.")
+        if not text or not text.strip():
+            raise gr.Error("Please enter some text to convert.")
+        try:
+            # Split into sentences
+            sentences = self.split_into_sentences(text)
+            if not sentences:
+                raise gr.Error("No valid sentences found in the text.")
+            total_sentences = len(sentences)
+            progress(0, desc=f"Processing {total_sentences} sentences...")
+            # Process sentences
+            audio_chunks = []
+            if use_multithreading and total_sentences > 1:
+                # Multithreaded processing
+                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                    # Submit all sentences
+                    futures = {
+                        executor.submit(self.process_single_sentence, sentence, voice, speed): i
+                        for i, sentence in enumerate(sentences)
+                    }
+                    # Collect results in order
+                    results = {}
+                    completed = 0
+                    for future in as_completed(futures):
+                        try:
+                            idx = futures[future]
+                            audio = future.result()
+                            results[idx] = audio
+                            completed += 1
+                            progress(completed / total_sentences,
+                                   desc=f"Processed {completed}/{total_sentences} sentences")
+                        except Exception as e:
+                            print(f"Error processing sentence: {e}")
+                            continue
+                    # Sort by index
+                    for i in sorted(results.keys()):
+                        audio_chunks.append(results[i])
+            else:
+                # Sequential processing
+                for i, sentence in enumerate(sentences):
+                    try:
+                        audio = self.process_single_sentence(sentence, voice, speed)
+                        audio_chunks.append(audio)
+                        progress((i + 1) / total_sentences,
+                               desc=f"Processed {i + 1}/{total_sentences} sentences")
+                    except Exception as e:
+                        print(f"Error processing sentence: {e}")
+                        continue
+            if not audio_chunks:
+                raise gr.Error("Failed to generate any audio.")
+            # Concatenate audio chunks
+            progress(0.9, desc="Concatenating audio...")
+            if len(audio_chunks) == 1:
+                final_audio = audio_chunks[0]
+            else:
+                final_audio = np.concatenate(audio_chunks)
+            # Create temporary file for output
+            output_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+            sf.write(output_file.name, final_audio, 24000)
+            output_file.close()
+            progress(1.0, desc="Complete!")
+            # Clean up memory
+            gc.collect()
+            processing_method = "multithreading" if use_multithreading else "sequential"
+            status_message = f"✅ Successfully converted {total_sentences} sentences using {processing_method} processing!"
+            return output_file.name, status_message
+        except Exception as e:
+            raise gr.Error(f"Conversion failed: {str(e)}")
+# Initialize the app
+app = KittenTTSGradio()
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="KittenTTS - Text to Speech") as demo:
+        gr.Markdown("""
+        # 🎙️ KittenTTS Text-to-Speech Converter
+        Convert text to natural-sounding speech using KittenTTS. This app processes text sentence by sentence
+        for better quality and supports multithreading for faster processing.
+        """)
+        with gr.Row():
+            with gr.Column(scale=2):
+                text_input = gr.Textbox(
+                    label="Text to Convert",
+                    placeholder="Enter your text here or upload a file...",
+                    lines=10,
+                    max_lines=20
+                )
+                with gr.Row():
+                    file_upload = gr.File(
+                        label="Or Upload Text File",
+                        file_types=[".txt"],
+                        type="filepath"
+                    )
+                # File upload handler
+                def load_file(file_path):
+                    if file_path:
+                        try:
+                            with open(file_path, 'r', encoding='utf-8') as f:
+                                content = f.read()
+                                # Limit display for very large files
+                                if len(content) > 50000:
+                                    display_text = content[:50000] + "\n\n... (truncated for display)"
+                                else:
+                                    display_text = content
+                                return display_text
+                        except Exception as e:
+                            return f"Error loading file: {str(e)}"
+                    return ""
+                file_upload.change(
+                    fn=load_file,
+                    inputs=[file_upload],
+                    outputs=[text_input]
+                )
+            with gr.Column(scale=1):
+                voice_dropdown = gr.Dropdown(
+                    choices=app.available_voices,
+                    value=app.available_voices[0],
+                    label="Voice Selection",
+                    info="Choose the voice for speech synthesis"
+                )
+                speed_slider = gr.Slider(
+                    minimum=0.5,
+                    maximum=2.0,
+                    value=1.0,
+                    step=0.1,
+                    label="Speech Speed",
+                    info="Adjust the speed of speech (1.0 = normal)"
+                )
+                multithread_checkbox = gr.Checkbox(
+                    value=True,
+                    label=f"Enable Multithreading ({app.max_workers} workers)",
+                    info="Process multiple sentences in parallel for faster conversion"
+                )
+                convert_btn = gr.Button(
+                    "🎤 Convert to Speech",
+                    variant="primary",
+                    size="lg"
+                )
+        with gr.Row():
+            audio_output = gr.Audio(
+                label="Generated Audio",
+                type="filepath",
+                autoplay=False
+            )
+        with gr.Row():
+            status_output = gr.Markdown(
+                value="Ready to convert text to speech.",
+                label="Status"
+            )
+        # Examples
+        gr.Examples(
+            examples=[
+                ["Hello! This is a test of the KittenTTS system. It can convert text to natural sounding speech."],
+                ["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet."],
+                ["Welcome to our presentation. Today we'll discuss artificial intelligence. Let's begin with the basics."]
+            ],
+            inputs=text_input,
+            label="Example Texts"
+        )
+        # Connect the conversion function
+        convert_btn.click(
+            fn=app.convert_text_to_speech,
+            inputs=[text_input, voice_dropdown, speed_slider, multithread_checkbox],
+            outputs=[audio_output, status_output]
+        )
+        gr.Markdown("""
+        ---
+        ### 📝 Notes:
+        - The app processes text sentence by sentence for better quality
+        - Longer texts will take more time to process
+        - Enable multithreading for faster processing of long texts
+        - Maximum recommended text length: ~5000 words for optimal performance
+        """)
+    return demo
+# Create and launch the interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.queue(max_size=5)
+    demo.launch(
+        share=False,
+        show_error=True,
+        server_name="0.0.0.0",
+        server_port=7860
+    )