gds

Sleeping

App Files Files Community

hivecorp commited on May 24, 2025

Commit

9a83649

verified ·

1 Parent(s): bcbb7e7

Update app.py

Browse files

Files changed (1) hide show

app.py +411 -179

app.py CHANGED Viewed

@@ -12,9 +12,6 @@ from typing import List, Tuple, Optional, Dict, Any
 import math
 from dataclasses import dataclass
-# No changes to these classes and helper functions
-# (TimingManager, Segment, TextProcessor, TTSError, etc.)
-# ...
 class TimingManager:
     def __init__(self):
         self.current_time = 0
@@ -44,115 +41,179 @@ class Segment:
     end_time: int = 0
     duration: int = 0
     audio: Optional[AudioSegment] = None
-    lines: List[str] = None
 class TextProcessor:
     def __init__(self, words_per_line: int, lines_per_segment: int):
         self.words_per_line = words_per_line
         self.lines_per_segment = lines_per_segment
         self.min_segment_words = 3
-        self.max_segment_words = words_per_line * lines_per_segment * 1.5
         self.punctuation_weights = {
-            '.': 1.0, '!': 1.0, '?': 1.0, ';': 0.8, ':': 0.7,
-            ',': 0.5, '-': 0.3, '(': 0.2, ')': 0.2
         }
     def analyze_sentence_complexity(self, text: str) -> float:
         words = text.split()
-        if not words: return 1.0
         complexity = 1.0
         if len(words) > self.words_per_line * 2:
             complexity *= 1.2
         punct_count = sum(text.count(p) for p in self.punctuation_weights.keys())
         complexity *= (1 + (punct_count / len(words)) * 0.5)
         return complexity
     def find_natural_breaks(self, text: str) -> List[Tuple[int, float]]:
         breaks = []
         words = text.split()
         for i, word in enumerate(words):
             weight = 0
             for punct, punct_weight in self.punctuation_weights.items():
                 if word.endswith(punct):
                     weight = max(weight, punct_weight)
             phrase_starters = {'however', 'therefore', 'moreover', 'furthermore', 'meanwhile', 'although', 'because'}
             if i < len(words) - 1 and words[i+1].lower() in phrase_starters:
                 weight = max(weight, 0.6)
             if i > self.min_segment_words:
                 conjunctions = {'and', 'but', 'or', 'nor', 'for', 'yet', 'so'}
                 if word.lower() in conjunctions:
                     weight = max(weight, 0.4)
             if weight > 0:
                 breaks.append((i, weight))
         return breaks
     def split_into_segments(self, text: str) -> List[Segment]:
         text = re.sub(r'\s+', ' ', text.strip())
         text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
         text = re.sub(r'\s+([.!?,;:])', r'\1', text)
         segments = []
         words = text.split()
         i = 0
         while i < len(words):
-            chunk_end = i + int(self.max_segment_words)
-            chunk_text = ' '.join(words[i:chunk_end])
-            complexity = self.analyze_sentence_complexity(chunk_text)
-            breaks = self.find_natural_breaks(chunk_text)
-            best_break = -1
-            best_weight = -1
-            ideal_length = self.words_per_line * self.lines_per_segment
             for break_idx, weight in breaks:
-                distance_penalty = 1 - (abs(break_idx - ideal_length) / ideal_length) * 0.5
-                score = weight * distance_penalty
-                if score > best_weight:
-                    best_break = break_idx
-                    best_weight = score
-            if best_break == -1:
-                best_break = min(ideal_length, len(words) - 1 - i)
-            segment_words = words[i : i + best_break + 1]
             segment_text = ' '.join(segment_words)
             lines = self.split_into_lines(segment_text)
             final_segment_text = '\n'.join(lines)
-            segments.append(Segment(id=len(segments) + 1, text=final_segment_text))
             i += best_break + 1
         return segments
     def split_into_lines(self, text: str) -> List[str]:
         words = text.split()
         lines = []
         current_line = []
         word_count = 0
         for word in words:
             current_line.append(word)
             word_count += 1
-            is_break = (word_count >= self.words_per_line or
-                        any(word.endswith(p) for p in '.!?') or
-                        (word_count >= self.words_per_line * 0.7 and
-                         any(word.endswith(p) for p in ',;:')))
-            if is_break and len(words) > word_count:
                 lines.append(' '.join(current_line))
                 current_line = []
                 word_count = 0
         if current_line:
             lines.append(' '.join(current_line))
         return lines
 class TTSError(Exception):
     pass
 async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
-    temp_dir = tempfile.gettempdir()
-    audio_file = os.path.join(temp_dir, f"temp_segment_{segment.id}_{uuid.uuid4()}.wav")
     try:
         segment_text = ' '.join(segment.text.split('\n'))
         tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch)
-        await tts.save(audio_file)
         if not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0:
             raise TTSError(f"Generated audio file is empty or missing for segment {segment.id}")
-        segment.audio = AudioSegment.from_file(audio_file)
-        silence = AudioSegment.silent(duration=30)
-        segment.audio = silence + segment.audio + silence
-        segment.duration = len(segment.audio)
         return segment
     except Exception as e:
         if not isinstance(e, TTSError):
@@ -163,210 +224,328 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
             try:
                 os.remove(audio_file)
             except Exception:
-                pass
 class FileManager:
     def __init__(self):
         self.temp_dir = tempfile.mkdtemp(prefix="tts_app_")
         self.output_files = []
-        self.max_files_to_keep = 5
     def create_output_paths(self):
         unique_id = str(uuid.uuid4())
         audio_path = os.path.join(self.temp_dir, f"final_audio_{unique_id}.mp3")
         srt_path = os.path.join(self.temp_dir, f"final_subtitles_{unique_id}.srt")
         self.output_files.append((srt_path, audio_path))
         self.cleanup_old_files()
         return srt_path, audio_path
     def cleanup_old_files(self):
         if len(self.output_files) > self.max_files_to_keep:
-            old_files_to_remove = self.output_files[:-self.max_files_to_keep]
-            for srt_path, audio_path in old_files_to_remove:
                 try:
-                    if os.path.exists(srt_path): os.remove(srt_path)
-                    if os.path.exists(audio_path): os.remove(audio_path)
                 except Exception:
-                    pass
             self.output_files = self.output_files[-self.max_files_to_keep:]
     def cleanup_all(self):
         for srt_path, audio_path in self.output_files:
             try:
-                if os.path.exists(srt_path): os.remove(srt_path)
-                if os.path.exists(audio_path): os.remove(audio_path)
             except Exception:
-                pass
         try:
-            if os.path.exists(self.temp_dir): os.rmdir(self.temp_dir)
         except Exception:
-            pass
 file_manager = FileManager()
 async def generate_accurate_srt(
-    text: str, voice: str, rate: str, pitch: str,
-    words_per_line: int, lines_per_segment: int,
-    progress_callback=None, parallel: bool = True, max_workers: int = 4
 ) -> Tuple[str, str]:
     processor = TextProcessor(words_per_line, lines_per_segment)
     segments = processor.split_into_segments(text)
     total_segments = len(segments)
     if progress_callback:
         progress_callback(0.1, "Text segmentation complete")
-    processed_segments = []
     if parallel and total_segments > 1:
-        semaphore = asyncio.Semaphore(max_workers)
         processed_count = 0
         async def process_with_semaphore(segment):
             async with semaphore:
                 nonlocal processed_count
-                result = await process_segment_with_timing(segment, voice, rate, pitch)
-                processed_count += 1
-                if progress_callback:
-                    progress = 0.1 + (0.8 * processed_count / total_segments)
-                    progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
-                return result
-        tasks = [process_with_semaphore(s) for s in segments]
-        results = await asyncio.gather(*tasks, return_exceptions=True)
-        for res in results:
-            if isinstance(res, Exception):
-                raise TTSError(f"A task failed during parallel processing: {res}")
-            processed_segments.append(res)
     else:
         for i, segment in enumerate(segments):
-            processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
-            processed_segments.append(processed_segment)
-            if progress_callback:
-                progress = 0.1 + (0.8 * (i + 1) / total_segments)
-                progress_callback(progress, f"Processed {i + 1}/{total_segments} segments")
     processed_segments.sort(key=lambda s: s.id)
     if progress_callback:
         progress_callback(0.9, "Finalizing audio and subtitles")
     current_time = 0
     final_audio = AudioSegment.empty()
     srt_content = ""
     for segment in processed_segments:
         segment.start_time = current_time
         segment.end_time = current_time + segment.duration
-        srt_content += f"{segment.id}\n{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n{segment.text}\n\n"
         final_audio = final_audio.append(segment.audio, crossfade=0)
         current_time = segment.end_time
     srt_path, audio_path = file_manager.create_output_paths()
-    export_params = {'format': 'mp3', 'bitrate': '192k', 'parameters': ['-ar', '44100', '-ac', '2', '-qscale:a', '2']}
-    final_audio.export(audio_path, **export_params)
-    with open(srt_path, "w", encoding='utf-8') as f: f.write(srt_content)
     if progress_callback:
         progress_callback(1.0, "Complete!")
     return srt_path, audio_path
-### MODIFICATION START ###
-# This new function creates the HTML for the download buttons using the JavaScript strategy.
-def create_download_links_html(srt_path: str, audio_path: str) -> str:
-    """Generates an HTML string with JS-powered download links."""
-    if not srt_path or not audio_path:
-        return ""
-    srt_filename = os.path.basename(srt_path)
-    audio_filename = os.path.basename(audio_path)
-    # This JavaScript function handles the download without navigating the page.
-    js_download_logic = """
-    event.preventDefault();
-    fetch(this.href).then(resp => resp.blob()).then(blob => {
-        const url = window.URL.createObjectURL(blob);
-        const a = document.createElement('a');
-        a.style.display = 'none';
-        a.href = url;
-        a.download = this.getAttribute('download');
-        document.body.appendChild(a);
-        a.click();
-        window.URL.revokeObjectURL(url);
-        document.body.removeChild(a);
-    });
-    """
-    # Use the /file= relative path which Gradio provides for serving files.
-    srt_url = f"/file={srt_path}"
-    audio_url = f"/file={audio_path}"
-    # Combine both links into a single HTML string.
-    html = f"""
-    <div style="text-align: center; padding: 10px 0;">
-        <a href="{srt_url}" download="{srt_filename}" onclick="{js_download_logic}"
-           style="display: inline-block; padding: 8px 15px; background-color: #0b5ed7; color: white; text-decoration: none; border-radius: 5px; font-weight: 600; margin-right: 15px; cursor: pointer;">
-            📥 Download SRT
-        </a>
-        <a href="{audio_url}" download="{audio_filename}" onclick="{js_download_logic}"
-           style="display: inline-block; padding: 8px 15px; background-color: #0b5ed7; color: white; text-decoration: none; border-radius: 5px; font-weight: 600; cursor: pointer;">
-            📥 Download Audio
-        </a>
-    </div>
-    """
-    return html
-# This main processing function is now simplified.
 async def process_text_with_progress(
-    text, pitch, rate, voice, words_per_line,
-    lines_per_segment, parallel_processing,
     progress=gr.Progress()
 ):
-    """
-    Processes text, returns an audio path for the preview and an HTML string
-    that contains either the download links or an error message.
-    """
-    # On validation failure, return None for the audio preview and an error HTML.
     if not text or text.strip() == "":
-        return None, "<p style='color:red; text-align:center;'>Please enter some text to convert.</p>"
-    pitch_str = f"{pitch:+d}Hz"
-    rate_str = f"{rate:+d}%"
     try:
         progress(0, "Preparing text...")
         def update_progress(value, status):
             progress(value, status)
         srt_path, audio_path = await generate_accurate_srt(
-            text, voice_options[voice], rate_str, pitch_str,
-            words_per_line, lines_per_segment,
             progress_callback=update_progress,
             parallel=parallel_processing
         )
-        # Get the JS-powered download links HTML.
-        download_html = create_download_links_html(srt_path, audio_path)
-        # Return the audio path for the player and the HTML for the download/status area.
-        return audio_path, download_html
     except Exception as e:
-        # On processing error, return None for audio and an error HTML.
-        error_message = f"An error occurred: {str(e)}"
-        return None, f"<p style='color:red; text-align:center;'>{error_message}</p>"
-### MODIFICATION END ###
 voice_options = {
-    "Andrew Male": "en-US-AndrewNeural", "Jenny Female": "en-US-JennyNeural", "Guy Male": "en-US-GuyNeural",
-    "Ana Female": "en-US-AnaNeural", "Aria Female": "en-US-AriaNeural", "Brian Male": "en-US-BrianNeural",
-    "Christopher Male": "en-US-ChristopherNeural", "Eric Male": "en-US-EricNeural", "Michelle Male": "en-US-MichelleNeural",
-    "Roger Male": "en-US-RogerNeural", "Natasha Female": "en-AU-NatashaNeural", "William Male": "en-AU-WilliamNeural",
-    "Clara Female": "en-CA-ClaraNeural", "Liam Female ": "en-CA-LiamNeural", "Libby Female": "en-GB-LibbyNeural",
-    "Maisie": "en-GB-MaisieNeural", "Ryan": "en-GB-RyanNeural", "Sonia": "en-GB-SoniaNeural",
-    "Thomas": "en-GB-ThomasNeural", "Sam": "en-HK-SamNeural", "Yan": "en-HK-YanNeural",
-    "Connor": "en-IE-ConnorNeural", "Emily": "en-IE-EmilyNeural", "Neerja": "en-IN-NeerjaNeural",
-    "Prabhat": "en-IN-PrabhatNeural", "Asilia": "en-KE-AsiliaNeural", "Chilemba": "en-KE-ChilembaNeural",
-    "Abeo": "en-NG-AbeoNeural", "Ezinne": "en-NG-EzinneNeural", "Mitchell": "en-NZ-MitchellNeural",
-    "James": "en-PH-JamesNeural", "Rosa": "en-PH-RosaNeural", "Luna": "en-SG-LunaNeural",
-    "Wayne": "en-SG-WayneNeural", "Elimu": "en-TZ-ElimuNeural", "Imani": "en-TZ-ImaniNeural",
-    "Leah": "en-ZA-LeahNeural", "Luke": "en-ZA-LukeNeural"
 }
 import atexit
 atexit.register(file_manager.cleanup_all)
 with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
     gr.Markdown("# Advanced TTS with Configurable SRT Generation")
     gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
@@ -374,45 +553,98 @@ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
     with gr.Row():
         with gr.Column(scale=3):
             text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
         with gr.Column(scale=2):
-            voice_dropdown = gr.Dropdown(label="Select Voice", choices=list(voice_options.keys()), value="Jenny Female")
-            pitch_slider = gr.Slider(label="Pitch Adjustment (Hz)", minimum=-10, maximum=10, value=0, step=1)
-            rate_slider = gr.Slider(label="Rate Adjustment (%)", minimum=-25, maximum=25, value=0, step=1)
     with gr.Row():
         with gr.Column():
-            words_per_line = gr.Slider(label="Words per Line", minimum=3, maximum=12, value=6, step=1, info="Words per subtitle line.")
         with gr.Column():
-            lines_per_segment = gr.Slider(label="Lines per Segment", minimum=1, maximum=4, value=2, step=1, info="Lines per subtitle block.")
         with gr.Column():
-            parallel_processing = gr.Checkbox(label="Enable Parallel Processing", value=True, info="Faster conversion for longer texts.")
-    submit_btn = gr.Button("Generate Audio & Subtitles", variant="primary")
-    ### MODIFICATION START ###
-    # The output area is simplified.
     with gr.Row():
-        with gr.Column(scale=2):
-            # This component is for the audio player preview.
-            audio_preview = gr.Audio(label="Preview Audio")
-        with gr.Column(scale=1):
-            # This single HTML component will hold EITHER the download links OR an error message.
-            status_and_download_output = gr.HTML(label="Status & Downloads")
-    # The .click() event is now simpler and more robust.
     submit_btn.click(
         fn=process_text_with_progress,
         inputs=[
-            text_input, pitch_slider, rate_slider, voice_dropdown,
-            words_per_line, lines_per_segment, parallel_processing
         ],
         outputs=[
-            audio_preview,
-            status_and_download_output
         ],
         api_name="generate"
     )
-    ### MODIFICATION END ###
 if __name__ == "__main__":
-    app.launch()

 import math
 from dataclasses import dataclass
 class TimingManager:
     def __init__(self):
         self.current_time = 0
     end_time: int = 0
     duration: int = 0
     audio: Optional[AudioSegment] = None
+    lines: List[str] = None  # Add lines field for display purposes only
 class TextProcessor:
     def __init__(self, words_per_line: int, lines_per_segment: int):
         self.words_per_line = words_per_line
         self.lines_per_segment = lines_per_segment
         self.min_segment_words = 3
+        self.max_segment_words = words_per_line * lines_per_segment * 1.5  # Allow 50% more for natural breaks
         self.punctuation_weights = {
+            '.': 1.0,  # Strong break
+            '!': 1.0,
+            '?': 1.0,
+            ';': 0.8,  # Medium-strong break
+            ':': 0.7,
+            ',': 0.5,  # Medium break
+            '-': 0.3,  # Weak break
+            '(': 0.2,
+            ')': 0.2
         }
     def analyze_sentence_complexity(self, text: str) -> float:
+        """Analyze sentence complexity to determine optimal segment length"""
         words = text.split()
         complexity = 1.0
+        # Adjust for sentence length
         if len(words) > self.words_per_line * 2:
             complexity *= 1.2
+        # Adjust for punctuation density
         punct_count = sum(text.count(p) for p in self.punctuation_weights.keys())
         complexity *= (1 + (punct_count / len(words)) * 0.5)
         return complexity
     def find_natural_breaks(self, text: str) -> List[Tuple[int, float]]:
+        """Find natural break points with their weights"""
         breaks = []
         words = text.split()
         for i, word in enumerate(words):
             weight = 0
+            # Check for punctuation
             for punct, punct_weight in self.punctuation_weights.items():
                 if word.endswith(punct):
                     weight = max(weight, punct_weight)
+            # Check for natural phrase boundaries
             phrase_starters = {'however', 'therefore', 'moreover', 'furthermore', 'meanwhile', 'although', 'because'}
             if i < len(words) - 1 and words[i+1].lower() in phrase_starters:
                 weight = max(weight, 0.6)
+            # Check for conjunctions at natural points
             if i > self.min_segment_words:
                 conjunctions = {'and', 'but', 'or', 'nor', 'for', 'yet', 'so'}
                 if word.lower() in conjunctions:
                     weight = max(weight, 0.4)
             if weight > 0:
                 breaks.append((i, weight))
         return breaks
     def split_into_segments(self, text: str) -> List[Segment]:
+        # Normalize text and add proper spacing around punctuation
         text = re.sub(r'\s+', ' ', text.strip())
         text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
         text = re.sub(r'\s+([.!?,;:])', r'\1', text)
+        # First, split into major segments by strong punctuation
         segments = []
+        current_segment = []
+        current_text = ""
         words = text.split()
         i = 0
         while i < len(words):
+            complexity = self.analyze_sentence_complexity(' '.join(words[i:i + self.words_per_line * 2]))
+            breaks = self.find_natural_breaks(' '.join(words[i:i + int(self.max_segment_words * complexity)]))
+            # Find best break point
+            best_break = None
+            best_weight = 0
             for break_idx, weight in breaks:
+                actual_idx = i + break_idx
+                if (actual_idx - i >= self.min_segment_words and
+                    actual_idx - i <= self.max_segment_words):
+                    if weight > best_weight:
+                        best_break = break_idx
+                        best_weight = weight
+            if best_break is None:
+                # If no good break found, use maximum length
+                best_break = min(self.words_per_line * self.lines_per_segment, len(words) - i)
+            # Create segment
+            segment_words = words[i:i + best_break + 1]
             segment_text = ' '.join(segment_words)
+            # Split segment into lines
             lines = self.split_into_lines(segment_text)
             final_segment_text = '\n'.join(lines)
+            segments.append(Segment(
+                id=len(segments) + 1,
+                text=final_segment_text
+            ))
             i += best_break + 1
         return segments
     def split_into_lines(self, text: str) -> List[str]:
+        """Split segment text into natural lines"""
         words = text.split()
         lines = []
         current_line = []
         word_count = 0
         for word in words:
             current_line.append(word)
             word_count += 1
+            # Check for natural line breaks
+            is_break = (
+                word_count >= self.words_per_line or
+                any(word.endswith(p) for p in '.!?') or
+                (word_count >= self.words_per_line * 0.7 and
+                 any(word.endswith(p) for p in ',;:'))
+            )
+            if is_break:
                 lines.append(' '.join(current_line))
                 current_line = []
                 word_count = 0
         if current_line:
             lines.append(' '.join(current_line))
         return lines
+# IMPROVEMENT 1: Enhanced Error Handling
 class TTSError(Exception):
+    """Custom exception for TTS processing errors"""
     pass
 async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
+    """Process a complete segment as a single TTS unit with improved error handling"""
+    audio_file = os.path.join(tempfile.gettempdir(), f"temp_segment_{segment.id}_{uuid.uuid4()}.wav")
     try:
+        # Process the entire segment text as one unit, replacing newlines with spaces
         segment_text = ' '.join(segment.text.split('\n'))
         tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch)
+        try:
+            await tts.save(audio_file)
+        except Exception as e:
+            raise TTSError(f"Failed to generate audio for segment {segment.id}: {str(e)}")
         if not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0:
             raise TTSError(f"Generated audio file is empty or missing for segment {segment.id}")
+        try:
+            segment.audio = AudioSegment.from_file(audio_file)
+            # Reduced silence to 30ms for more natural flow
+            silence = AudioSegment.silent(duration=30)
+            segment.audio = silence + segment.audio + silence
+            segment.duration = len(segment.audio)
+        except Exception as e:
+            raise TTSError(f"Failed to process audio file for segment {segment.id}: {str(e)}")
         return segment
     except Exception as e:
         if not isinstance(e, TTSError):
             try:
                 os.remove(audio_file)
             except Exception:
+                pass  # Ignore deletion errors
+# IMPROVEMENT 2: Better File Management with cleanup
 class FileManager:
+    """Manages temporary and output files with cleanup capabilities"""
     def __init__(self):
         self.temp_dir = tempfile.mkdtemp(prefix="tts_app_")
         self.output_files = []
+        self.max_files_to_keep = 5  # Keep only the 5 most recent output pairs
+    def get_temp_path(self, prefix):
+        """Get a path for a temporary file"""
+        return os.path.join(self.temp_dir, f"{prefix}_{uuid.uuid4()}")
     def create_output_paths(self):
+        """Create paths for output files"""
         unique_id = str(uuid.uuid4())
         audio_path = os.path.join(self.temp_dir, f"final_audio_{unique_id}.mp3")
         srt_path = os.path.join(self.temp_dir, f"final_subtitles_{unique_id}.srt")
         self.output_files.append((srt_path, audio_path))
         self.cleanup_old_files()
         return srt_path, audio_path
     def cleanup_old_files(self):
+        """Clean up old output files, keeping only the most recent ones"""
         if len(self.output_files) > self.max_files_to_keep:
+            old_files = self.output_files[:-self.max_files_to_keep]
+            for srt_path, audio_path in old_files:
                 try:
+                    if os.path.exists(srt_path):
+                        os.remove(srt_path)
+                    if os.path.exists(audio_path):
+                        os.remove(audio_path)
                 except Exception:
+                    pass  # Ignore deletion errors
+            # Update the list to only include files we're keeping
             self.output_files = self.output_files[-self.max_files_to_keep:]
     def cleanup_all(self):
+        """Clean up all managed files"""
         for srt_path, audio_path in self.output_files:
             try:
+                if os.path.exists(srt_path):
+                    os.remove(srt_path)
+                if os.path.exists(audio_path):
+                    os.remove(audio_path)
             except Exception:
+                pass  # Ignore deletion errors
         try:
+            os.rmdir(self.temp_dir)
         except Exception:
+            pass  # Ignore if directory isn't empty or can't be removed
+# Create global file manager
 file_manager = FileManager()
+# This function generates an HTML download link.
+# The `target="_blank"` attribute ensures that when this link is clicked,
+# the download action opens in a new browser tab or window.
+def create_download_link(audio_path):
+    if audio_path is None:
+        return None
+    filename = Path(audio_path).name
+    # Update URL format to match Gradio's file serving pattern
+    base_url = "aman18811-wfr-01.hf.space" # This base_url might need to be adjusted for your specific Gradio deployment
+    file_url = f"https://{base_url}/gradio_api/file={audio_path}"
+    return f"""
+    <a href="{file_url}"
+        download="{filename}"
+        target="_blank"
+        rel="noopener noreferrer"
+        style="display: inline-block; padding: 10px 20px; background: linear-gradient(135deg, #4776E6, #8E54E9); color: white; text-decoration: none; border-radius: 8px; font-weight: 600; transition: all 0.3s ease;"
+        onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 5px 15px rgba(71, 118, 230, 0.3)';"
+        onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='none';"
+        onclick="event.preventDefault(); fetch(this.href).then(resp => resp.blob()).then(blob => {{
+            const url = window.URL.createObjectURL(blob);
+            const a = document.createElement('a');
+            a.style.display = 'none';
+            a.href = url;
+            a.download = '{filename}';
+            document.body.appendChild(a);
+            a.click();
+            window.URL.revokeObjectURL(url);
+            document.body.removeChild(a);
+        }});">
+        Download Audio File
+    </a>
+    """
+# IMPROVEMENT 3: Parallel Processing for Segments
 async def generate_accurate_srt(
+    text: str,
+    voice: str,
+    rate: str,
+    pitch: str,
+    words_per_line: int,
+    lines_per_segment: int,
+    progress_callback=None,
+    parallel: bool = True,
+    max_workers: int = 4
 ) -> Tuple[str, str]:
+    """Generate accurate SRT with parallel processing option"""
     processor = TextProcessor(words_per_line, lines_per_segment)
     segments = processor.split_into_segments(text)
     total_segments = len(segments)
+    processed_segments = []
+    # Update progress to show segmentation is complete
     if progress_callback:
         progress_callback(0.1, "Text segmentation complete")
     if parallel and total_segments > 1:
+        # Process segments in parallel
         processed_count = 0
+        segment_tasks = []
+        # Create a semaphore to limit concurrent tasks
+        semaphore = asyncio.Semaphore(max_workers)
         async def process_with_semaphore(segment):
             async with semaphore:
                 nonlocal processed_count
+                try:
+                    result = await process_segment_with_timing(segment, voice, rate, pitch)
+                    processed_count += 1
+                    if progress_callback:
+                        progress = 0.1 + (0.8 * processed_count / total_segments)
+                        progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
+                    return result
+                except Exception as e:
+                    # Handle errors in individual segments
+                    processed_count += 1
+                    if progress_callback:
+                        progress = 0.1 + (0.8 * processed_count / total_segments)
+                        progress_callback(progress, f"Error in segment {segment.id}: {str(e)}")
+                    raise
+        # Create tasks for all segments
+        for segment in segments:
+            segment_tasks.append(process_with_semaphore(segment))
+        # Run all tasks and collect results
+        try:
+            processed_segments = await asyncio.gather(*segment_tasks)
+        except Exception as e:
+            if progress_callback:
+                progress_callback(0.9, f"Error during parallel processing: {str(e)}")
+            raise TTSError(f"Failed during parallel processing: {str(e)}")
     else:
+        # Process segments sequentially (original method)
         for i, segment in enumerate(segments):
+            try:
+                processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
+                processed_segments.append(processed_segment)
+                if progress_callback:
+                    progress = 0.1 + (0.8 * (i + 1) / total_segments)
+                    progress_callback(progress, f"Processed {i + 1}/{total_segments} segments")
+            except Exception as e:
+                if progress_callback:
+                    progress_callback(0.9, f"Error processing segment {segment.id}: {str(e)}")
+                raise TTSError(f"Failed to process segment {segment.id}: {str(e)}")
+    # Sort segments by ID to ensure correct order
     processed_segments.sort(key=lambda s: s.id)
     if progress_callback:
         progress_callback(0.9, "Finalizing audio and subtitles")
+    # Now combine the segments in the correct order
     current_time = 0
     final_audio = AudioSegment.empty()
     srt_content = ""
     for segment in processed_segments:
+        # Calculate precise timing
         segment.start_time = current_time
         segment.end_time = current_time + segment.duration
+        # Add to SRT with precise timing
+        srt_content += (
+            f"{segment.id}\n"
+            f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
+            f"{segment.text}\n\n"
+        )
+        # Add to final audio with precise positioning
         final_audio = final_audio.append(segment.audio, crossfade=0)
+        # Update timing with precise gap
         current_time = segment.end_time
+    # Export with high precision
     srt_path, audio_path = file_manager.create_output_paths()
+    try:
+        # Export with optimized quality settings and compression
+        export_params = {
+            'format': 'mp3',
+            'bitrate': '192k',  # Reduced from 320k but still high quality
+            'parameters': [
+                '-ar', '44100',  # Standard sample rate
+                '-ac', '2',      # Stereo
+                '-compression_level', '0',  # Best compression
+                '-qscale:a', '2'  # High quality VBR encoding
+            ]
+        }
+        final_audio.export(audio_path, **export_params)
+        with open(srt_path, "w", encoding='utf-8') as f:
+            f.write(srt_content)
+    except Exception as e:
+        if progress_callback:
+            progress_callback(1.0, f"Error exporting final files: {str(e)}")
+        raise TTSError(f"Failed to export final files: {str(e)}")
     if progress_callback:
         progress_callback(1.0, "Complete!")
     return srt_path, audio_path
+# IMPROVEMENT 4: Progress Reporting with proper error handling for older Gradio versions
 async def process_text_with_progress(
+    text,
+    pitch,
+    rate,
+    voice,
+    words_per_line,
+    lines_per_segment,
+    parallel_processing,
     progress=gr.Progress()
 ):
+    # Input validation
     if not text or text.strip() == "":
+        return None, None, None, True, "Please enter some text to convert to speech."
+    # Format pitch and rate strings
+    pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
+    rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
     try:
+        # Start progress tracking
         progress(0, "Preparing text...")
         def update_progress(value, status):
             progress(value, status)
         srt_path, audio_path = await generate_accurate_srt(
+            text,
+            voice_options[voice],
+            rate_str,
+            pitch_str,
+            words_per_line,
+            lines_per_segment,
             progress_callback=update_progress,
             parallel=parallel_processing
         )
+        # If successful, return results and hide error
+        return srt_path, audio_path, audio_path, False, ""
+    except TTSError as e:
+        # Return specific TTS error
+        return None, None, None, True, f"TTS Error: {str(e)}"
     except Exception as e:
+        # Return any other error
+        return None, None, None, True, f"Unexpected error: {str(e)}"
+# Voice options dictionary
 voice_options = {
+    "Andrew Male": "en-US-AndrewNeural",
+    "Jenny Female": "en-US-JennyNeural",
+    "Guy Male": "en-US-GuyNeural",
+    "Ana Female": "en-US-AnaNeural",
+    "Aria Female": "en-US-AriaNeural",
+    "Brian Male": "en-US-BrianNeural",
+    "Christopher Male": "en-US-ChristopherNeural",
+    "Eric Male": "en-US-EricNeural",
+    "Michelle Male": "en-US-MichelleNeural",
+    "Roger Male": "en-US-RogerNeural",
+    "Natasha Female": "en-AU-NatashaNeural",
+    "William Male": "en-AU-WilliamNeural",
+    "Clara Female": "en-CA-ClaraNeural",
+    "Liam Female ": "en-CA-LiamNeural",
+    "Libby Female": "en-GB-LibbyNeural",
+    "Maisie": "en-GB-MaisieNeural",
+    "Ryan": "en-GB-RyanNeural",
+    "Sonia": "en-GB-SoniaNeural",
+    "Thomas": "en-GB-ThomasNeural",
+    "Sam": "en-HK-SamNeural",
+    "Yan": "en-HK-YanNeural",
+    "Connor": "en-IE-ConnorNeural",
+    "Emily": "en-IE-EmilyNeural",
+    "Neerja": "en-IN-NeerjaNeural",
+    "Prabhat": "en-IN-PrabhatNeural",
+    "Asilia": "en-KE-AsiliaNeural",
+    "Chilemba": "en-KE-ChilembaNeural",
+    "Abeo": "en-NG-AbeoNeural",
+    "Ezinne": "en-NG-EzinneNeural",
+    "Mitchell": "en-NZ-MitchellNeural",
+    "James": "en-PH-JamesNeural",
+    "Rosa": "en-PH-RosaNeural",
+    "Luna": "en-SG-LunaNeural",
+    "Wayne": "en-SG-WayneNeural",
+    "Elimu": "en-TZ-ElimuNeural",
+    "Imani": "en-TZ-ImaniNeural",
+    "Leah": "en-ZA-LeahNeural",
+    "Luke": "en-ZA-LukeNeural"
+    # Add other voices as needed
 }
+# Register cleanup on exit
 import atexit
 atexit.register(file_manager.cleanup_all)
+# Create Gradio interface
 with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
     gr.Markdown("# Advanced TTS with Configurable SRT Generation")
     gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
     with gr.Row():
         with gr.Column(scale=3):
             text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
         with gr.Column(scale=2):
+            voice_dropdown = gr.Dropdown(
+                label="Select Voice",
+                choices=list(voice_options.keys()),
+                value="Jenny Female"
+            )
+            pitch_slider = gr.Slider(
+                label="Pitch Adjustment (Hz)",
+                minimum=-10,
+                maximum=10,
+                value=0,
+                step=1
+            )
+            rate_slider = gr.Slider(
+                label="Rate Adjustment (%)",
+                minimum=-25,
+                maximum=25,
+                value=0,
+                step=1
+            )
     with gr.Row():
         with gr.Column():
+            words_per_line = gr.Slider(
+                label="Words per Line",
+                minimum=3,
+                maximum=12,
+                value=6,
+                step=1,
+                info="Controls how many words appear on each line of the subtitle"
+            )
         with gr.Column():
+            lines_per_segment = gr.Slider(
+                label="Lines per Segment",
+                minimum=1,
+                maximum=4,
+                value=2,
+                step=1,
+                info="Controls how many lines appear in each subtitle segment"
+            )
         with gr.Column():
+            parallel_processing = gr.Checkbox(
+                label="Enable Parallel Processing",
+                value=True,
+                info="Process multiple segments simultaneously for faster conversion (recommended for longer texts)"
+            )
+    submit_btn = gr.Button("Generate Audio & Subtitles")
+    # Add error message component
+    error_output = gr.Textbox(label="Status", visible=False)
     with gr.Row():
+        with gr.Column():
+            audio_output = gr.Audio(label="Preview Audio")
+        with gr.Column():
+            srt_file = gr.File(label="Download SRT")
+            # The download_link HTML component will contain an <a> tag with target="_blank"
+            # This ensures that when the generated audio/SRT is downloaded via this link,
+            # it will open in a new browser tab.
+            download_link = gr.HTML(elem_classes="download-btn")
+            # The audio_file component is typically for direct download via Gradio's file handling,
+            # which might not open a new tab depending on browser settings.
+            # The HTML download_link provides more control over opening in a new tab.
+            audio_file = gr.File(label="Download Audio (Direct)")
+    # Handle button click with manual error handling instead of .catch()
+    # When submit_btn is clicked, it calls process_text_with_progress.
+    # This function processes the inputs and updates the outputs on the *current* Gradio page.
+    # It does NOT open a new page itself.
+    # The 'download_link' HTML output, however, contains an <a> tag designed to open in a new tab.
     submit_btn.click(
         fn=process_text_with_progress,
         inputs=[
+            text_input,
+            pitch_slider,
+            rate_slider,
+            voice_dropdown,
+            words_per_line,
+            lines_per_segment,
+            parallel_processing
         ],
         outputs=[
+            srt_file,
+            audio_file,
+            audio_output,
+            error_output,
+            download_link # Ensure download_link is updated with the new HTML for download
         ],
         api_name="generate"
     )
 if __name__ == "__main__":
+    app.launch()