sreepathi-ravikumar commited on
Commit
faaec01
·
verified ·
1 Parent(s): 73b2a26

Update video2.py

Browse files
Files changed (1) hide show
  1. video2.py +29 -30
video2.py CHANGED
@@ -58,7 +58,7 @@ from mutagen.mp3 import MP3
58
  AUDIO_DIR = "output_audio"
59
  os.makedirs(AUDIO_DIR, exist_ok=True)
60
 
61
- # Max concurrent requests (Safe zone for Edge TTS)
62
  MAX_CONCURRENT_REQUESTS = 3
63
  MAX_RETRIES = 5
64
  BASE_DELAY = 2.0
@@ -76,7 +76,6 @@ def clean_text(text):
76
  if not text: return ""
77
  text = html.unescape(str(text))
78
  text = re.sub(r'https?://\S+', '', text)
79
- # Remove special chars but KEEP punctuation
80
  text = re.sub(r'[\*\#\<\>\[\]\{\}]', '', text)
81
  text = re.sub(r'\s+', ' ', text).strip()
82
  return text
@@ -88,14 +87,16 @@ def detect_language(word):
88
 
89
  def calculate_pause(text_chunk):
90
  """
91
- INCREASED GAP DURATIONS as requested.
92
  """
93
  t = text_chunk.strip()
94
- if t.endswith('.'): return 650 # Long pause for full stop
95
- elif t.endswith('?'): return 700 # Question pause
96
- elif t.endswith('!'): return 600
97
- elif t.endswith(',') or t.endswith(';'): return 250 # Clear breath
98
- return 0 # Default gap logic handles the rest
 
 
99
 
100
  def analyze_and_segment(text):
101
  text = clean_text(text)
@@ -151,14 +152,14 @@ async def generate_chunk_with_retry(segment_data, semaphore):
151
 
152
  voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
153
 
154
- # Slight speed adjustment remains for naturalness
155
- rate = "-10%" if lang_type == 'english' else "+0%"
156
  pitch = "+0Hz"
157
 
158
  for attempt in range(MAX_RETRIES):
159
  async with semaphore:
160
  try:
161
- await asyncio.sleep(random.uniform(0.1, 0.5)) # Jitter
162
 
163
  fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
164
  os.close(fd)
@@ -175,7 +176,6 @@ async def generate_chunk_with_retry(segment_data, semaphore):
175
 
176
  except Exception as e:
177
  delay = BASE_DELAY * (2 ** attempt) + random.uniform(0, 1)
178
- print(f"⚠️ Retry Chunk {idx} in {delay:.1f}s... ({e})")
179
  try: os.remove(path)
180
  except: pass
181
  if attempt == MAX_RETRIES - 1: return None
@@ -187,14 +187,9 @@ def process_and_stitch(results):
187
 
188
  final_audio = AudioSegment.empty()
189
 
190
- # Default gap between switched words (e.g. Voltage [GAP] nu)
191
- # 100ms is noticeable but not awkward.
192
- DEFAULT_SWITCH_GAP = 120
193
-
194
  for i, item in enumerate(results):
195
  try:
196
  path = item['path']
197
-
198
  segment_audio = AudioSegment.from_mp3(path)
199
  try: os.remove(path)
200
  except: pass
@@ -206,17 +201,20 @@ def process_and_stitch(results):
206
  else:
207
  prev_item = results[i-1]
208
 
209
- # LOGIC CHANGE: Always add silence. No crossfades.
210
-
211
  if prev_item['pause'] > 0:
212
- # Punctuation Gap (Big)
213
- gap_duration = prev_item['pause']
 
214
  else:
215
- # Language Switch Gap (Small but clear)
216
- gap_duration = DEFAULT_SWITCH_GAP
217
-
218
- silence = AudioSegment.silent(duration=gap_duration)
219
- final_audio += silence + segment_audio
 
 
 
220
 
221
  except Exception as e:
222
  print(f"Error stitching segment {i}: {e}")
@@ -236,17 +234,17 @@ async def natural_tts_engine(full_text, output_file, native_lang_code):
236
 
237
  raw_results = await asyncio.gather(*tasks)
238
 
239
- print("Stitching with gaps...")
240
  final_audio = process_and_stitch(raw_results)
241
 
242
  if not final_audio: return None
243
 
244
  print("Mastering...")
245
- # Compression ensures the gaps are quiet and words are punchy
246
  final_audio = compress_dynamic_range(
247
  final_audio,
248
- threshold=-18.0,
249
- ratio=2.0,
250
  attack=5.0,
251
  release=50.0
252
  )
@@ -273,6 +271,7 @@ async def generate_tts(id, lines, lang_input):
273
  return 0, None
274
 
275
 
 
276
  def audio_func(id, lines, lang):
277
  loop = asyncio.new_event_loop()
278
  asyncio.set_event_loop(loop)
 
58
  AUDIO_DIR = "output_audio"
59
  os.makedirs(AUDIO_DIR, exist_ok=True)
60
 
61
+ # Rate Limit Protection
62
  MAX_CONCURRENT_REQUESTS = 3
63
  MAX_RETRIES = 5
64
  BASE_DELAY = 2.0
 
76
  if not text: return ""
77
  text = html.unescape(str(text))
78
  text = re.sub(r'https?://\S+', '', text)
 
79
  text = re.sub(r'[\*\#\<\>\[\]\{\}]', '', text)
80
  text = re.sub(r'\s+', ' ', text).strip()
81
  return text
 
87
 
88
  def calculate_pause(text_chunk):
89
  """
90
+ NATURAL PAUSE DURATIONS (Reduced for speed/flow)
91
  """
92
  t = text_chunk.strip()
93
+ # Quick breath for comma (was 250, now 100)
94
+ if t.endswith(',') or t.endswith(';'): return 100
95
+ # Standard sentence end (was 650, now 350)
96
+ elif t.endswith('.'): return 350
97
+ elif t.endswith('?'): return 400
98
+ elif t.endswith('!'): return 350
99
+ return 0
100
 
101
  def analyze_and_segment(text):
102
  text = clean_text(text)
 
152
 
153
  voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
154
 
155
+ # Keeping English slightly slower helps it blend with Tamil speed
156
+ rate = "-5%" if lang_type == 'english' else "+0%"
157
  pitch = "+0Hz"
158
 
159
  for attempt in range(MAX_RETRIES):
160
  async with semaphore:
161
  try:
162
+ await asyncio.sleep(random.uniform(0.1, 0.4)) # Jitter
163
 
164
  fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
165
  os.close(fd)
 
176
 
177
  except Exception as e:
178
  delay = BASE_DELAY * (2 ** attempt) + random.uniform(0, 1)
 
179
  try: os.remove(path)
180
  except: pass
181
  if attempt == MAX_RETRIES - 1: return None
 
187
 
188
  final_audio = AudioSegment.empty()
189
 
 
 
 
 
190
  for i, item in enumerate(results):
191
  try:
192
  path = item['path']
 
193
  segment_audio = AudioSegment.from_mp3(path)
194
  try: os.remove(path)
195
  except: pass
 
201
  else:
202
  prev_item = results[i-1]
203
 
204
+ # --- FLOW LOGIC ---
 
205
  if prev_item['pause'] > 0:
206
+ # If there was a comma/period, use the calculated pause
207
+ silence = AudioSegment.silent(duration=prev_item['pause'])
208
+ final_audio += silence + segment_audio
209
  else:
210
+ # If switching languages (Voltage -> nu), use CROSSFADE
211
+ # Crossfade blends the end of English with start of Tamil.
212
+ # 20ms is fast enough to sound connected but not overlapped.
213
+ try:
214
+ final_audio = final_audio.append(segment_audio, crossfade=20)
215
+ except:
216
+ # Fallback for tiny segments
217
+ final_audio += segment_audio
218
 
219
  except Exception as e:
220
  print(f"Error stitching segment {i}: {e}")
 
234
 
235
  raw_results = await asyncio.gather(*tasks)
236
 
237
+ print("Stitching for Natural Flow...")
238
  final_audio = process_and_stitch(raw_results)
239
 
240
  if not final_audio: return None
241
 
242
  print("Mastering...")
243
+ # Compression ratio 2.5 makes speech punchy and clear
244
  final_audio = compress_dynamic_range(
245
  final_audio,
246
+ threshold=-15.0,
247
+ ratio=2.5,
248
  attack=5.0,
249
  release=50.0
250
  )
 
271
  return 0, None
272
 
273
 
274
+
275
  def audio_func(id, lines, lang):
276
  loop = asyncio.new_event_loop()
277
  asyncio.set_event_loop(loop)