sreepathi-ravikumar commited on
Commit
a329cf2
·
verified ·
1 Parent(s): faaec01

Update video2.py

Browse files
Files changed (1) hide show
  1. video2.py +22 -26
video2.py CHANGED
@@ -87,15 +87,16 @@ def detect_language(word):
87
 
88
  def calculate_pause(text_chunk):
89
  """
90
- NATURAL PAUSE DURATIONS (Reduced for speed/flow)
 
91
  """
92
  t = text_chunk.strip()
93
- # Quick breath for comma (was 250, now 100)
94
- if t.endswith(',') or t.endswith(';'): return 100
95
- # Standard sentence end (was 650, now 350)
96
- elif t.endswith('.'): return 350
97
- elif t.endswith('?'): return 400
98
- elif t.endswith('!'): return 350
99
  return 0
100
 
101
  def analyze_and_segment(text):
@@ -152,14 +153,14 @@ async def generate_chunk_with_retry(segment_data, semaphore):
152
 
153
  voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
154
 
155
- # Keeping English slightly slower helps it blend with Tamil speed
156
- rate = "-5%" if lang_type == 'english' else "+0%"
157
  pitch = "+0Hz"
158
 
159
  for attempt in range(MAX_RETRIES):
160
  async with semaphore:
161
  try:
162
- await asyncio.sleep(random.uniform(0.1, 0.4)) # Jitter
163
 
164
  fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
165
  os.close(fd)
@@ -187,6 +188,9 @@ def process_and_stitch(results):
187
 
188
  final_audio = AudioSegment.empty()
189
 
 
 
 
190
  for i, item in enumerate(results):
191
  try:
192
  path = item['path']
@@ -201,29 +205,22 @@ def process_and_stitch(results):
201
  else:
202
  prev_item = results[i-1]
203
 
204
- # --- FLOW LOGIC ---
205
  if prev_item['pause'] > 0:
206
- # If there was a comma/period, use the calculated pause
207
  silence = AudioSegment.silent(duration=prev_item['pause'])
208
  final_audio += silence + segment_audio
209
  else:
210
- # If switching languages (Voltage -> nu), use CROSSFADE
211
- # Crossfade blends the end of English with start of Tamil.
212
- # 20ms is fast enough to sound connected but not overlapped.
213
- try:
214
- final_audio = final_audio.append(segment_audio, crossfade=20)
215
- except:
216
- # Fallback for tiny segments
217
- final_audio += segment_audio
218
 
219
  except Exception as e:
220
- print(f"Error stitching segment {i}: {e}")
221
  continue
222
 
223
  return final_audio
224
 
225
  async def natural_tts_engine(full_text, output_file, native_lang_code):
226
- print("Analyzing...")
227
  segments = analyze_and_segment(full_text)
228
 
229
  tasks = []
@@ -234,13 +231,11 @@ async def natural_tts_engine(full_text, output_file, native_lang_code):
234
 
235
  raw_results = await asyncio.gather(*tasks)
236
 
237
- print("Stitching for Natural Flow...")
238
  final_audio = process_and_stitch(raw_results)
239
 
240
  if not final_audio: return None
241
 
242
- print("Mastering...")
243
- # Compression ratio 2.5 makes speech punchy and clear
244
  final_audio = compress_dynamic_range(
245
  final_audio,
246
  threshold=-15.0,
@@ -251,7 +246,6 @@ async def natural_tts_engine(full_text, output_file, native_lang_code):
251
  final_audio = normalize(final_audio)
252
 
253
  final_audio.export(output_file, format="mp3", bitrate="320k")
254
- print(f"✅ Saved: {output_file}")
255
  return output_file
256
 
257
  async def generate_tts(id, lines, lang_input):
@@ -272,6 +266,8 @@ async def generate_tts(id, lines, lang_input):
272
 
273
 
274
 
 
 
275
  def audio_func(id, lines, lang):
276
  loop = asyncio.new_event_loop()
277
  asyncio.set_event_loop(loop)
 
87
 
88
  def calculate_pause(text_chunk):
89
  """
90
+ MAX EFFICIENCY PAUSE DURATIONS
91
+ Only add a brief pause for meaningful punctuation.
92
  """
93
  t = text_chunk.strip()
94
+ # Micro-breath (70ms) for comma/semicolon
95
+ if t.endswith(',') or t.endswith(';'): return 70
96
+ # Quick sentence stop (250ms)
97
+ elif t.endswith('.'): return 250
98
+ elif t.endswith('?'): return 300
99
+ elif t.endswith('!'): return 250
100
  return 0
101
 
102
  def analyze_and_segment(text):
 
153
 
154
  voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
155
 
156
+ # Max efficiency: Neutral rate (+0%) for all.
157
+ rate = "+0%"
158
  pitch = "+0Hz"
159
 
160
  for attempt in range(MAX_RETRIES):
161
  async with semaphore:
162
  try:
163
+ await asyncio.sleep(random.uniform(0.1, 0.4))
164
 
165
  fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
166
  os.close(fd)
 
188
 
189
  final_audio = AudioSegment.empty()
190
 
191
+ # 50ms silence pad to the start of the entire output to prevent clipping the first word
192
+ final_audio += AudioSegment.silent(duration=50)
193
+
194
  for i, item in enumerate(results):
195
  try:
196
  path = item['path']
 
205
  else:
206
  prev_item = results[i-1]
207
 
208
+ # --- ZERO-GAP FLOW LOGIC ---
209
  if prev_item['pause'] > 0:
210
+ # If there was punctuation, insert the micro-silence.
211
  silence = AudioSegment.silent(duration=prev_item['pause'])
212
  final_audio += silence + segment_audio
213
  else:
214
+ # If continuous speech (same language or language switch without punctuation),
215
+ # use direct append for 0ms gap.
216
+ final_audio += segment_audio
 
 
 
 
 
217
 
218
  except Exception as e:
 
219
  continue
220
 
221
  return final_audio
222
 
223
  async def natural_tts_engine(full_text, output_file, native_lang_code):
 
224
  segments = analyze_and_segment(full_text)
225
 
226
  tasks = []
 
231
 
232
  raw_results = await asyncio.gather(*tasks)
233
 
 
234
  final_audio = process_and_stitch(raw_results)
235
 
236
  if not final_audio: return None
237
 
238
+ # Final Mastering: Ensures volume is consistent and clear
 
239
  final_audio = compress_dynamic_range(
240
  final_audio,
241
  threshold=-15.0,
 
246
  final_audio = normalize(final_audio)
247
 
248
  final_audio.export(output_file, format="mp3", bitrate="320k")
 
249
  return output_file
250
 
251
  async def generate_tts(id, lines, lang_input):
 
266
 
267
 
268
 
269
+
270
+
271
  def audio_func(id, lines, lang):
272
  loop = asyncio.new_event_loop()
273
  asyncio.set_event_loop(loop)