hivecorp commited on
Commit
bccb8c6
·
verified ·
1 Parent(s): 6c892fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -20
app.py CHANGED
@@ -42,12 +42,12 @@ def split_text_into_segments(text):
42
 
43
  return segments
44
 
45
- # Function to generate SRT with accurate timing per batch
46
- async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate):
47
  audio_file = f"batch_{batch_num}_audio.wav"
48
 
49
  # Generate the audio using edge-tts with pitch and rate adjustment
50
- tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate=f"{rate}%", pitch=f"{pitch}Hz")
51
  await tts.save(audio_file)
52
 
53
  actual_length = get_audio_length(audio_file)
@@ -58,6 +58,7 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
58
  srt_content = ""
59
  for index, segment in enumerate(segments):
60
  end_time = start_time + segment_duration
 
61
  if end_time > start_offset + actual_length:
62
  end_time = start_offset + actual_length
63
 
@@ -69,14 +70,15 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
69
 
70
  return srt_content, audio_file, start_time
71
 
72
- async def batch_process_srt_and_audio(script_text, pitch, rate, progress=gr.Progress()):
 
73
  batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
74
  all_srt_content = ""
75
  combined_audio = AudioSegment.empty()
76
  start_offset = 0.0
77
 
78
  for batch_num, batch_text in enumerate(batches):
79
- srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate)
80
  all_srt_content += srt_content
81
 
82
  batch_audio = AudioSegment.from_file(audio_file)
@@ -99,33 +101,40 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, progress=gr.Prog
99
  validated_srt_content += line + "\n"
100
 
101
  unique_id = uuid.uuid4()
102
- final_audio_path = f"final_audio_{unique_id}.mp3" # Export as mp3
103
  final_srt_path = f"final_subtitles_{unique_id}.srt"
104
 
105
- combined_audio.export(final_audio_path, format="mp3", bitrate="320k") # Export as MP3 at 320kbps
 
106
  with open(final_srt_path, "w") as srt_file:
107
  srt_file.write(validated_srt_content)
108
 
109
  return final_srt_path, final_audio_path
110
 
111
- async def process_script(script_text, pitch, rate):
112
- srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch, rate)
 
113
  return srt_path, audio_path, audio_path
114
 
115
- # Function to get the available US English voices
116
- async def get_available_voices():
117
- voices = await edge_tts.list_voices()
118
- return [voice for voice in voices if 'en-US' in voice['name']]
119
-
120
- # Main execution to fetch voices
121
- available_voices = asyncio.run(get_available_voices())
122
-
123
- # Gradio interface setup
 
 
 
 
 
124
  app = gr.Interface(
125
  fn=process_script,
126
  inputs=[
127
  gr.Textbox(label="Enter Script Text", lines=10),
128
- gr.Dropdown(label="Select Voice", choices=[voice['name'] for voice in available_voices], value=available_voices[0]['name']),
129
  gr.Slider(label="Speech Rate Adjustment (%)", minimum=0, maximum=2, step=0.1, value=1),
130
  gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=1)
131
  ],
@@ -134,7 +143,7 @@ app = gr.Interface(
134
  gr.File(label="Download Audio File"),
135
  gr.Audio(label="Play Audio")
136
  ],
137
- description="HIVEcorp TTS Generator with adjustable speech rate and pitch."
138
  )
139
 
140
  app.launch()
 
42
 
43
  return segments
44
 
45
+ # Function to generate SRT with accurate timing per batch and cross-check timing
46
+ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, voice, rate):
47
  audio_file = f"batch_{batch_num}_audio.wav"
48
 
49
  # Generate the audio using edge-tts with pitch and rate adjustment
50
+ tts = edge_tts.Communicate(batch_text, voice, rate=f"{rate}%", pitch=f"{pitch}Hz")
51
  await tts.save(audio_file)
52
 
53
  actual_length = get_audio_length(audio_file)
 
58
  srt_content = ""
59
  for index, segment in enumerate(segments):
60
  end_time = start_time + segment_duration
61
+
62
  if end_time > start_offset + actual_length:
63
  end_time = start_offset + actual_length
64
 
 
70
 
71
  return srt_content, audio_file, start_time
72
 
73
+ # Batch processing function with cumulative timing, progress indicator, and final SRT validation
74
+ async def batch_process_srt_and_audio(script_text, pitch, voice, rate, progress=gr.Progress()):
75
  batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
76
  all_srt_content = ""
77
  combined_audio = AudioSegment.empty()
78
  start_offset = 0.0
79
 
80
  for batch_num, batch_text in enumerate(batches):
81
+ srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, voice, rate)
82
  all_srt_content += srt_content
83
 
84
  batch_audio = AudioSegment.from_file(audio_file)
 
101
  validated_srt_content += line + "\n"
102
 
103
  unique_id = uuid.uuid4()
104
+ final_audio_path = f"final_audio_{unique_id}.mp3"
105
  final_srt_path = f"final_subtitles_{unique_id}.srt"
106
 
107
+ combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
108
+
109
  with open(final_srt_path, "w") as srt_file:
110
  srt_file.write(validated_srt_content)
111
 
112
  return final_srt_path, final_audio_path
113
 
114
+ # Gradio interface function
115
+ async def process_script(script_text, pitch, voice, rate):
116
+ srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch, voice, rate)
117
  return srt_path, audio_path, audio_path
118
 
119
+ # List of available voices
120
+ voices = {
121
+ "Jenny": "en-US-JennyNeural",
122
+ "Guy": "en-US-GuyNeural",
123
+ "Ana": "en-US-AnaNeural",
124
+ "Aria": "en-US-AriaNeural",
125
+ "Brian": "en-US-BrianNeural",
126
+ "Christopher": "en-US-ChristopherNeural",
127
+ "Eric": "en-US-EricNeural",
128
+ "Michelle": "en-US-MichelleNeural",
129
+ "Roger": "en-US-RogerNeural",
130
+ }
131
+
132
+ # Gradio interface setup with voice selection and speech rate adjustment
133
  app = gr.Interface(
134
  fn=process_script,
135
  inputs=[
136
  gr.Textbox(label="Enter Script Text", lines=10),
137
+ gr.Dropdown(label="Select Voice", choices=list(voices.keys()), value="Jenny"),
138
  gr.Slider(label="Speech Rate Adjustment (%)", minimum=0, maximum=2, step=0.1, value=1),
139
  gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=1)
140
  ],
 
143
  gr.File(label="Download Audio File"),
144
  gr.Audio(label="Play Audio")
145
  ],
146
+ description="HIVEcorp TTS Generator with customizable voice, speech rate, and pitch adjustments."
147
  )
148
 
149
  app.launch()