|
|
import gradio as gr |
|
|
from pydub import AudioSegment |
|
|
import edge_tts |
|
|
import os |
|
|
import asyncio |
|
|
import uuid |
|
|
import re |
|
|
import time |
|
|
import tempfile |
|
|
from concurrent.futures import ThreadPoolExecutor |
|
|
from typing import List, Tuple, Optional, Dict, Any |
|
|
import math |
|
|
from dataclasses import dataclass |
|
|
|
|
|
class TimingManager: |
|
|
def __init__(self): |
|
|
self.current_time = 0 |
|
|
self.segment_gap = 100 |
|
|
|
|
|
def get_timing(self, duration): |
|
|
start_time = self.current_time |
|
|
end_time = start_time + duration |
|
|
self.current_time = end_time + self.segment_gap |
|
|
return start_time, end_time |
|
|
|
|
|
def get_audio_length(audio_file): |
|
|
audio = AudioSegment.from_file(audio_file) |
|
|
return len(audio) / 1000 |
|
|
|
|
|
def format_time_ms(milliseconds): |
|
|
seconds, ms = divmod(int(milliseconds), 1000) |
|
|
mins, secs = divmod(mins, 60) |
|
|
hrs, mins = divmod(mins, 60) |
|
|
return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}" |
|
|
|
|
|
@dataclass |
|
|
class Segment: |
|
|
id: int |
|
|
text: str |
|
|
start_time: int = 0 |
|
|
end_time: int = 0 |
|
|
duration: int = 0 |
|
|
audio: Optional[AudioSegment] = None |
|
|
lines: List[str] = None |
|
|
|
|
|
class TextProcessor: |
|
|
def __init__(self, words_per_line: int, lines_per_segment: int): |
|
|
self.words_per_line = words_per_line |
|
|
self.lines_per_segment = lines_per_segment |
|
|
self.min_segment_words = 3 |
|
|
self.max_segment_words = words_per_line * lines_per_segment * 1.5 |
|
|
self.punctuation_weights = { |
|
|
'.': 1.0, |
|
|
'!': 1.0, |
|
|
'?': 1.0, |
|
|
';': 0.8, |
|
|
':': 0.7, |
|
|
',': 0.5, |
|
|
'-': 0.3, |
|
|
'(': 0.2, |
|
|
')': 0.2 |
|
|
} |
|
|
|
|
|
def analyze_sentence_complexity(self, text: str) -> float: |
|
|
"""Analyze sentence complexity to determine optimal segment length""" |
|
|
words = text.split() |
|
|
complexity = 1.0 |
|
|
|
|
|
|
|
|
if len(words) > self.words_per_line * 2: |
|
|
complexity *= 1.2 |
|
|
|
|
|
|
|
|
punct_count = sum(text.count(p) for p in self.punctuation_weights.keys()) |
|
|
complexity *= (1 + (punct_count / len(words)) * 0.5) |
|
|
|
|
|
return complexity |
|
|
|
|
|
def find_natural_breaks(self, text: str) -> List[Tuple[int, float]]: |
|
|
"""Find natural break points with their weights""" |
|
|
breaks = [] |
|
|
words = text.split() |
|
|
|
|
|
for i, word in enumerate(words): |
|
|
weight = 0 |
|
|
|
|
|
|
|
|
for punct, punct_weight in self.punctuation_weights.items(): |
|
|
if word.endswith(punct): |
|
|
weight = max(weight, punct_weight) |
|
|
|
|
|
|
|
|
phrase_starters = {'however', 'therefore', 'moreover', 'furthermore', 'meanwhile', 'although', 'because'} |
|
|
if i < len(words) - 1 and words[i+1].lower() in phrase_starters: |
|
|
weight = max(weight, 0.6) |
|
|
|
|
|
|
|
|
if i > self.min_segment_words: |
|
|
conjunctions = {'and', 'but', 'or', 'nor', 'for', 'yet', 'so'} |
|
|
if word.lower() in conjunctions: |
|
|
weight = max(weight, 0.4) |
|
|
|
|
|
if weight > 0: |
|
|
breaks.append((i, weight)) |
|
|
|
|
|
return breaks |
|
|
|
|
|
def split_into_segments(self, text: str) -> List[Segment]: |
|
|
|
|
|
text = re.sub(r'\s+', ' ', text.strip()) |
|
|
text = re.sub(r'([.!?,;:])\s*', r'\1 ', text) |
|
|
text = re.sub(r'\s+([.!?,;:])', r'\1', text) |
|
|
|
|
|
|
|
|
segments = [] |
|
|
current_segment = [] |
|
|
current_text = "" |
|
|
words = text.split() |
|
|
|
|
|
i = 0 |
|
|
while i < len(words): |
|
|
complexity = self.analyze_sentence_complexity(' '.join(words[i:i + self.words_per_line * 2])) |
|
|
breaks = self.find_natural_breaks(' '.join(words[i:i + int(self.max_segment_words * complexity)])) |
|
|
|
|
|
|
|
|
best_break = None |
|
|
best_weight = 0 |
|
|
|
|
|
for break_idx, weight in breaks: |
|
|
actual_idx = i + break_idx |
|
|
if (actual_idx - i >= self.min_segment_words and |
|
|
actual_idx - i <= self.max_segment_words): |
|
|
if weight > best_weight: |
|
|
best_break = break_idx |
|
|
best_weight = weight |
|
|
|
|
|
if best_break is None: |
|
|
|
|
|
best_break = min(self.words_per_line * self.lines_per_segment, len(words) - i) |
|
|
|
|
|
|
|
|
segment_words = words[i:i + best_break + 1] |
|
|
segment_text = ' '.join(segment_words) |
|
|
|
|
|
|
|
|
lines = self.split_into_lines(segment_text) |
|
|
final_segment_text = '\n'.join(lines) |
|
|
|
|
|
segments.append(Segment( |
|
|
id=len(segments) + 1, |
|
|
text=final_segment_text |
|
|
)) |
|
|
|
|
|
i += best_break + 1 |
|
|
|
|
|
return segments |
|
|
|
|
|
def split_into_lines(self, text: str) -> List[str]: |
|
|
"""Split segment text into natural lines""" |
|
|
words = text.split() |
|
|
lines = [] |
|
|
current_line = [] |
|
|
word_count = 0 |
|
|
|
|
|
for word in words: |
|
|
current_line.append(word) |
|
|
word_count += 1 |
|
|
|
|
|
|
|
|
is_break = ( |
|
|
word_count >= self.words_per_line or |
|
|
any(word.endswith(p) for p in '.!?') or |
|
|
(word_count >= self.words_per_line * 0.7 and |
|
|
any(word.endswith(p) for p in ',;:')) |
|
|
) |
|
|
|
|
|
if is_break: |
|
|
lines.append(' '.join(current_line)) |
|
|
current_line = [] |
|
|
word_count = 0 |
|
|
|
|
|
if current_line: |
|
|
lines.append(' '.join(current_line)) |
|
|
|
|
|
return lines |
|
|
|
|
|
|
|
|
class TTSError(Exception): |
|
|
"""Custom exception for TTS processing errors""" |
|
|
pass |
|
|
|
|
|
async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment: |
|
|
"""Process a complete segment as a single TTS unit with improved error handling""" |
|
|
audio_file = os.path.join(tempfile.gettempdir(), f"temp_segment_{segment.id}_{uuid.uuid4()}.wav") |
|
|
try: |
|
|
|
|
|
segment_text = ' '.join(segment.text.split('\n')) |
|
|
tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch) |
|
|
|
|
|
try: |
|
|
await tts.save(audio_file) |
|
|
except Exception as e: |
|
|
raise TTSError(f"Failed to generate audio for segment {segment.id}: {str(e)}") |
|
|
|
|
|
if not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0: |
|
|
raise TTSError(f"Generated audio file is empty or missing for segment {segment.id}") |
|
|
|
|
|
try: |
|
|
segment.audio = AudioSegment.from_file(audio_file) |
|
|
|
|
|
silence = AudioSegment.silent(duration=30) |
|
|
segment.audio = silence + segment.audio + silence |
|
|
segment.duration = len(segment.audio) |
|
|
except Exception as e: |
|
|
raise TTSError(f"Failed to process audio file for segment {segment.id}: {str(e)}") |
|
|
|
|
|
return segment |
|
|
except Exception as e: |
|
|
if not isinstance(e, TTSError): |
|
|
raise TTSError(f"Unexpected error processing segment {segment.id}: {str(e)}") |
|
|
raise |
|
|
finally: |
|
|
if os.path.exists(audio_file): |
|
|
try: |
|
|
os.remove(audio_file) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
class FileManager: |
|
|
"""Manages temporary and output files with cleanup capabilities""" |
|
|
def __init__(self): |
|
|
self.temp_dir = tempfile.mkdtemp(prefix="tts_app_") |
|
|
self.output_files = [] |
|
|
self.max_files_to_keep = 5 |
|
|
|
|
|
def get_temp_path(self, prefix): |
|
|
"""Get a path for a temporary file""" |
|
|
return os.path.join(self.temp_dir, f"{prefix}_{uuid.uuid4()}") |
|
|
|
|
|
def create_output_paths(self): |
|
|
"""Create paths for output files""" |
|
|
unique_id = str(uuid.uuid4()) |
|
|
audio_path = os.path.join(self.temp_dir, f"final_audio_{unique_id}.mp3") |
|
|
srt_path = os.path.join(self.temp_dir, f"final_subtitles_{unique_id}.srt") |
|
|
|
|
|
self.output_files.append((srt_path, audio_path)) |
|
|
self.cleanup_old_files() |
|
|
|
|
|
return srt_path, audio_path |
|
|
|
|
|
def cleanup_old_files(self): |
|
|
"""Clean up old output files, keeping only the most recent ones""" |
|
|
if len(self.output_files) > self.max_files_to_keep: |
|
|
old_files = self.output_files[:-self.max_files_to_keep] |
|
|
for srt_path, audio_path in old_files: |
|
|
try: |
|
|
if os.path.exists(srt_path): |
|
|
os.remove(srt_path) |
|
|
if os.path.exists(audio_path): |
|
|
os.remove(audio_path) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
self.output_files = self.output_files[-self.max_files_to_keep:] |
|
|
|
|
|
def cleanup_all(self): |
|
|
"""Clean up all managed files""" |
|
|
for srt_path, audio_path in self.output_files: |
|
|
try: |
|
|
if os.path.exists(srt_path): |
|
|
os.remove(srt_path) |
|
|
if os.path.exists(audio_path): |
|
|
os.remove(audio_path) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
try: |
|
|
os.rmdir(self.temp_dir) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
file_manager = FileManager() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_download_link(audio_path): |
|
|
if audio_path is None: |
|
|
return "" |
|
|
|
|
|
filename = Path(audio_path).name |
|
|
|
|
|
base_url = "aman18811-wfr-01.hf.space" |
|
|
file_url = f"https://{base_url}/gradio_api/file={audio_path}" |
|
|
|
|
|
return f""" |
|
|
<a href="{file_url}" |
|
|
download="{filename}" |
|
|
target="_blank" |
|
|
rel="noopener noreferrer" |
|
|
style="display: inline-block; padding: 10px 20px; background: linear-gradient(135deg, #4776E6, #8E54E9); color: white; text-decoration: none; border-radius: 8px; font-weight: 600; transition: all 0.3s ease;" |
|
|
onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 5px 15px rgba(71, 118, 230, 0.3)';" |
|
|
onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='none';" |
|
|
onclick="event.preventDefault(); fetch(this.href).then(resp => resp.blob()).then(blob => {{ |
|
|
const url = window.URL.createObjectURL(blob); |
|
|
const a = document.createElement('a'); |
|
|
a.style.display = 'none'; |
|
|
a.href = url; |
|
|
a.download = '{filename}'; |
|
|
document.body.appendChild(a); |
|
|
a.click(); |
|
|
window.URL.revokeObjectURL(url); |
|
|
document.body.removeChild(a); |
|
|
}});"> |
|
|
Download Audio File |
|
|
</a> |
|
|
""" |
|
|
|
|
|
|
|
|
async def generate_accurate_srt( |
|
|
text: str, |
|
|
voice: str, |
|
|
rate: str, |
|
|
pitch: str, |
|
|
words_per_line: int, |
|
|
lines_per_segment: int, |
|
|
progress_callback=None, |
|
|
parallel: bool = True, |
|
|
max_workers: int = 4 |
|
|
) -> Tuple[str, str]: |
|
|
"""Generate accurate SRT with parallel processing option""" |
|
|
processor = TextProcessor(words_per_line, lines_per_segment) |
|
|
segments = processor.split_into_segments(text) |
|
|
|
|
|
total_segments = len(segments) |
|
|
processed_segments = [] |
|
|
|
|
|
|
|
|
if progress_callback: |
|
|
progress_callback(0.1, "Text segmentation complete") |
|
|
|
|
|
if parallel and total_segments > 1: |
|
|
|
|
|
processed_count = 0 |
|
|
segment_tasks = [] |
|
|
|
|
|
|
|
|
semaphore = asyncio.Semaphore(max_workers) |
|
|
|
|
|
async def process_with_semaphore(segment): |
|
|
async with semaphore: |
|
|
nonlocal processed_count |
|
|
try: |
|
|
result = await process_segment_with_timing(segment, voice, rate, pitch) |
|
|
processed_count += 1 |
|
|
if progress_callback: |
|
|
progress = 0.1 + (0.8 * processed_count / total_segments) |
|
|
progress_callback(progress, f"Processed {processed_count}/{total_segments} segments") |
|
|
return result |
|
|
except Exception as e: |
|
|
|
|
|
processed_count += 1 |
|
|
if progress_callback: |
|
|
progress = 0.1 + (0.8 * processed_count / total_segments) |
|
|
progress_callback(progress, f"Error in segment {segment.id}: {str(e)}") |
|
|
raise |
|
|
|
|
|
|
|
|
for segment in segments: |
|
|
segment_tasks.append(process_with_semaphore(segment)) |
|
|
|
|
|
|
|
|
try: |
|
|
processed_segments = await asyncio.gather(*segment_tasks) |
|
|
except Exception as e: |
|
|
if progress_callback: |
|
|
progress_callback(0.9, f"Error during parallel processing: {str(e)}") |
|
|
raise TTSError(f"Failed during parallel processing: {str(e)}") |
|
|
else: |
|
|
|
|
|
for i, segment in enumerate(segments): |
|
|
try: |
|
|
processed_segment = await process_segment_with_timing(segment, voice, rate, pitch) |
|
|
processed_segments.append(processed_segment) |
|
|
|
|
|
if progress_callback: |
|
|
progress = 0.1 + (0.8 * (i + 1) / total_segments) |
|
|
progress_callback(progress, f"Processed {i + 1}/{total_segments} segments") |
|
|
except Exception as e: |
|
|
if progress_callback: |
|
|
progress_callback(0.9, f"Error processing segment {segment.id}: {str(e)}") |
|
|
raise TTSError(f"Failed to process segment {segment.id}: {str(e)}") |
|
|
|
|
|
|
|
|
processed_segments.sort(key=lambda s: s.id) |
|
|
|
|
|
if progress_callback: |
|
|
progress_callback(0.9, "Finalizing audio and subtitles") |
|
|
|
|
|
|
|
|
current_time = 0 |
|
|
final_audio = AudioSegment.empty() |
|
|
srt_content = "" |
|
|
|
|
|
for segment in processed_segments: |
|
|
|
|
|
segment.start_time = current_time |
|
|
segment.end_time = current_time + segment.duration |
|
|
|
|
|
|
|
|
srt_content += ( |
|
|
f"{segment.id}\n" |
|
|
f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n" |
|
|
f"{segment.text}\n\n" |
|
|
) |
|
|
|
|
|
|
|
|
final_audio = final_audio.append(segment.audio, crossfade=0) |
|
|
|
|
|
|
|
|
current_time = segment.end_time |
|
|
|
|
|
|
|
|
srt_path, audio_path = file_manager.create_output_paths() |
|
|
|
|
|
try: |
|
|
|
|
|
export_params = { |
|
|
'format': 'mp3', |
|
|
'bitrate': '192k', |
|
|
'parameters': [ |
|
|
'-ar', '44100', |
|
|
'-ac', '2', |
|
|
'-compression_level', '0', |
|
|
'-qscale:a', '2' |
|
|
] |
|
|
} |
|
|
final_audio.export(audio_path, **export_params) |
|
|
|
|
|
with open(srt_path, "w", encoding='utf-8') as f: |
|
|
f.write(srt_content) |
|
|
except Exception as e: |
|
|
if progress_callback: |
|
|
progress_callback(1.0, f"Error exporting final files: {str(e)}") |
|
|
raise TTSError(f"Failed to export final files: {str(e)}") |
|
|
|
|
|
if progress_callback: |
|
|
progress_callback(1.0, "Complete!") |
|
|
|
|
|
return srt_path, audio_path |
|
|
|
|
|
|
|
|
async def process_text_with_progress( |
|
|
text, |
|
|
pitch, |
|
|
rate, |
|
|
voice, |
|
|
words_per_line, |
|
|
lines_per_segment, |
|
|
parallel_processing, |
|
|
progress=gr.Progress() |
|
|
): |
|
|
|
|
|
if not text or text.strip() == "": |
|
|
|
|
|
return None, None, None, gr.update(value="Please enter some text to convert to speech.", visible=True), gr.update(value="", visible=False) |
|
|
|
|
|
|
|
|
pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz" |
|
|
rate_str = f"{rate:+d}%" if rate != 0 else "+0%" |
|
|
|
|
|
try: |
|
|
|
|
|
progress(0, "Preparing text...") |
|
|
|
|
|
def update_progress(value, status): |
|
|
progress(value, status) |
|
|
|
|
|
srt_path, audio_path = await generate_accurate_srt( |
|
|
text, |
|
|
voice_options[voice], |
|
|
rate_str, |
|
|
pitch_str, |
|
|
words_per_line, |
|
|
lines_per_segment, |
|
|
progress_callback=update_progress, |
|
|
parallel=parallel_processing |
|
|
) |
|
|
|
|
|
|
|
|
return srt_path, audio_path, audio_path, gr.update(value="", visible=False), gr.update(value=create_download_link(audio_path), visible=True) |
|
|
except TTSError as e: |
|
|
|
|
|
return None, None, None, gr.update(value=f"TTS Error: {str(e)}", visible=True), gr.update(value="", visible=False) |
|
|
except Exception as e: |
|
|
|
|
|
return None, None, None, gr.update(value=f"Unexpected error: {str(e)}", visible=True), gr.update(value="", visible=False) |
|
|
|
|
|
|
|
|
voice_options = { |
|
|
"Andrew Male": "en-US-AndrewNeural", |
|
|
"Jenny Female": "en-US-JennyNeural", |
|
|
"Guy Male": "en-US-GuyNeural", |
|
|
"Ana Female": "en-US-AnaNeural", |
|
|
"Aria Female": "en-US-AriaNeural", |
|
|
"Brian Male": "en-US-BrianNeural", |
|
|
"Christopher Male": "en-US-ChristopherNeural", |
|
|
"Eric Male": "en-US-EricNeural", |
|
|
"Michelle Male": "en-US-MichelleNeural", |
|
|
"Roger Male": "en-US-RogerNeural", |
|
|
"Natasha Female": "en-AU-NatashaNeural", |
|
|
"William Male": "en-AU-WilliamNeural", |
|
|
"Clara Female": "en-CA-ClaraNeural", |
|
|
"Liam Female ": "en-CA-LiamNeural", |
|
|
"Libby Female": "en-GB-LibbyNeural", |
|
|
"Maisie": "en-GB-MaisieNeural", |
|
|
"Ryan": "en-GB-RyanNeural", |
|
|
"Sonia": "en-GB-SoniaNeural", |
|
|
"Thomas": "en-GB-ThomasNeural", |
|
|
"Sam": "en-HK-SamNeural", |
|
|
"Yan": "en-HK-YanNeural", |
|
|
"Connor": "en-IE-ConnorNeural", |
|
|
"Emily": "en-IE-EmilyNeural", |
|
|
"Neerja": "en-IN-NeerjaNeural", |
|
|
"Prabhat": "en-IN-PrabhatNeural", |
|
|
"Asilia": "en-KE-AsiliaNeural", |
|
|
"Chilemba": "en-KE-ChilembaNeural", |
|
|
"Abeo": "en-NG-AbeoNeural", |
|
|
"Ezinne": "en-NG-EzinneNeural", |
|
|
"Mitchell": "en-NZ-MitchellNeural", |
|
|
"James": "en-PH-JamesNeural", |
|
|
"Rosa": "en-PH-RosaNeural", |
|
|
"Luna": "en-SG-LunaNeural", |
|
|
"Wayne": "en-SG-WayneNeural", |
|
|
"Elimu": "en-TZ-ElimuNeural", |
|
|
"Imani": "en-TZ-ImaniNeural", |
|
|
"Leah": "en-ZA-LeahNeural", |
|
|
"Luke": "en-ZA-LukeNeural" |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
import atexit |
|
|
atexit.register(file_manager.cleanup_all) |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app: |
|
|
gr.Markdown("# Advanced TTS with Configurable SRT Generation") |
|
|
gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=3): |
|
|
text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
voice_dropdown = gr.Dropdown( |
|
|
label="Select Voice", |
|
|
choices=list(voice_options.keys()), |
|
|
value="Jenny Female" |
|
|
) |
|
|
pitch_slider = gr.Slider( |
|
|
label="Pitch Adjustment (Hz)", |
|
|
minimum=-10, |
|
|
maximum=10, |
|
|
value=0, |
|
|
step=1 |
|
|
) |
|
|
rate_slider = gr.Slider( |
|
|
label="Rate Adjustment (%)", |
|
|
minimum=-25, |
|
|
maximum=25, |
|
|
value=0, |
|
|
step=1 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
words_per_line = gr.Slider( |
|
|
label="Words per Line", |
|
|
minimum=3, |
|
|
maximum=12, |
|
|
value=6, |
|
|
step=1, |
|
|
info="Controls how many words appear on each line of the subtitle" |
|
|
) |
|
|
with gr.Column(): |
|
|
lines_per_segment = gr.Slider( |
|
|
label="Lines per Segment", |
|
|
minimum=1, |
|
|
maximum=4, |
|
|
value=2, |
|
|
step=1, |
|
|
info="Controls how many lines appear in each subtitle segment" |
|
|
) |
|
|
with gr.Column(): |
|
|
parallel_processing = gr.Checkbox( |
|
|
label="Enable Parallel Processing", |
|
|
value=True, |
|
|
info="Process multiple segments simultaneously for faster conversion (recommended for longer texts)" |
|
|
) |
|
|
|
|
|
submit_btn = gr.Button("Generate Audio & Subtitles") |
|
|
|
|
|
|
|
|
|
|
|
error_output = gr.Textbox(label="Status", interactive=False, visible=False) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
audio_output = gr.Audio(label="Preview Audio") |
|
|
with gr.Column(): |
|
|
srt_file = gr.File(label="Download SRT") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
download_link = gr.HTML(elem_classes="download-btn", visible=False) |
|
|
|
|
|
|
|
|
|
|
|
audio_file = gr.File(label="Download Audio (Direct)") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
fn=process_text_with_progress, |
|
|
inputs=[ |
|
|
text_input, |
|
|
pitch_slider, |
|
|
rate_slider, |
|
|
voice_dropdown, |
|
|
words_per_line, |
|
|
lines_per_segment, |
|
|
parallel_processing |
|
|
], |
|
|
outputs=[ |
|
|
srt_file, |
|
|
audio_file, |
|
|
audio_output, |
|
|
error_output, |
|
|
download_link |
|
|
], |
|
|
api_name="generate" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch() |
|
|
|