Update video2.py
Browse files
video2.py
CHANGED
|
@@ -87,15 +87,16 @@ def detect_language(word):
|
|
| 87 |
|
| 88 |
def calculate_pause(text_chunk):
|
| 89 |
"""
|
| 90 |
-
|
|
|
|
| 91 |
"""
|
| 92 |
t = text_chunk.strip()
|
| 93 |
-
#
|
| 94 |
-
if t.endswith(',') or t.endswith(';'): return
|
| 95 |
-
#
|
| 96 |
-
elif t.endswith('.'): return
|
| 97 |
-
elif t.endswith('?'): return
|
| 98 |
-
elif t.endswith('!'): return
|
| 99 |
return 0
|
| 100 |
|
| 101 |
def analyze_and_segment(text):
|
|
@@ -152,14 +153,14 @@ async def generate_chunk_with_retry(segment_data, semaphore):
|
|
| 152 |
|
| 153 |
voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
|
| 154 |
|
| 155 |
-
#
|
| 156 |
-
rate = "
|
| 157 |
pitch = "+0Hz"
|
| 158 |
|
| 159 |
for attempt in range(MAX_RETRIES):
|
| 160 |
async with semaphore:
|
| 161 |
try:
|
| 162 |
-
await asyncio.sleep(random.uniform(0.1, 0.4))
|
| 163 |
|
| 164 |
fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
|
| 165 |
os.close(fd)
|
|
@@ -187,6 +188,9 @@ def process_and_stitch(results):
|
|
| 187 |
|
| 188 |
final_audio = AudioSegment.empty()
|
| 189 |
|
|
|
|
|
|
|
|
|
|
| 190 |
for i, item in enumerate(results):
|
| 191 |
try:
|
| 192 |
path = item['path']
|
|
@@ -201,29 +205,22 @@ def process_and_stitch(results):
|
|
| 201 |
else:
|
| 202 |
prev_item = results[i-1]
|
| 203 |
|
| 204 |
-
# --- FLOW LOGIC ---
|
| 205 |
if prev_item['pause'] > 0:
|
| 206 |
-
# If there was
|
| 207 |
silence = AudioSegment.silent(duration=prev_item['pause'])
|
| 208 |
final_audio += silence + segment_audio
|
| 209 |
else:
|
| 210 |
-
# If
|
| 211 |
-
#
|
| 212 |
-
|
| 213 |
-
try:
|
| 214 |
-
final_audio = final_audio.append(segment_audio, crossfade=20)
|
| 215 |
-
except:
|
| 216 |
-
# Fallback for tiny segments
|
| 217 |
-
final_audio += segment_audio
|
| 218 |
|
| 219 |
except Exception as e:
|
| 220 |
-
print(f"Error stitching segment {i}: {e}")
|
| 221 |
continue
|
| 222 |
|
| 223 |
return final_audio
|
| 224 |
|
| 225 |
async def natural_tts_engine(full_text, output_file, native_lang_code):
|
| 226 |
-
print("Analyzing...")
|
| 227 |
segments = analyze_and_segment(full_text)
|
| 228 |
|
| 229 |
tasks = []
|
|
@@ -234,13 +231,11 @@ async def natural_tts_engine(full_text, output_file, native_lang_code):
|
|
| 234 |
|
| 235 |
raw_results = await asyncio.gather(*tasks)
|
| 236 |
|
| 237 |
-
print("Stitching for Natural Flow...")
|
| 238 |
final_audio = process_and_stitch(raw_results)
|
| 239 |
|
| 240 |
if not final_audio: return None
|
| 241 |
|
| 242 |
-
|
| 243 |
-
# Compression ratio 2.5 makes speech punchy and clear
|
| 244 |
final_audio = compress_dynamic_range(
|
| 245 |
final_audio,
|
| 246 |
threshold=-15.0,
|
|
@@ -251,7 +246,6 @@ async def natural_tts_engine(full_text, output_file, native_lang_code):
|
|
| 251 |
final_audio = normalize(final_audio)
|
| 252 |
|
| 253 |
final_audio.export(output_file, format="mp3", bitrate="320k")
|
| 254 |
-
print(f"✅ Saved: {output_file}")
|
| 255 |
return output_file
|
| 256 |
|
| 257 |
async def generate_tts(id, lines, lang_input):
|
|
@@ -272,6 +266,8 @@ async def generate_tts(id, lines, lang_input):
|
|
| 272 |
|
| 273 |
|
| 274 |
|
|
|
|
|
|
|
| 275 |
def audio_func(id, lines, lang):
|
| 276 |
loop = asyncio.new_event_loop()
|
| 277 |
asyncio.set_event_loop(loop)
|
|
|
|
| 87 |
|
| 88 |
def calculate_pause(text_chunk):
|
| 89 |
"""
|
| 90 |
+
MAX EFFICIENCY PAUSE DURATIONS
|
| 91 |
+
Only add a brief pause for meaningful punctuation.
|
| 92 |
"""
|
| 93 |
t = text_chunk.strip()
|
| 94 |
+
# Micro-breath (70ms) for comma/semicolon
|
| 95 |
+
if t.endswith(',') or t.endswith(';'): return 70
|
| 96 |
+
# Quick sentence stop (250ms)
|
| 97 |
+
elif t.endswith('.'): return 250
|
| 98 |
+
elif t.endswith('?'): return 300
|
| 99 |
+
elif t.endswith('!'): return 250
|
| 100 |
return 0
|
| 101 |
|
| 102 |
def analyze_and_segment(text):
|
|
|
|
| 153 |
|
| 154 |
voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
|
| 155 |
|
| 156 |
+
# Max efficiency: Neutral rate (+0%) for all.
|
| 157 |
+
rate = "+0%"
|
| 158 |
pitch = "+0Hz"
|
| 159 |
|
| 160 |
for attempt in range(MAX_RETRIES):
|
| 161 |
async with semaphore:
|
| 162 |
try:
|
| 163 |
+
await asyncio.sleep(random.uniform(0.1, 0.4))
|
| 164 |
|
| 165 |
fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
|
| 166 |
os.close(fd)
|
|
|
|
| 188 |
|
| 189 |
final_audio = AudioSegment.empty()
|
| 190 |
|
| 191 |
+
# 50ms silence pad to the start of the entire output to prevent clipping the first word
|
| 192 |
+
final_audio += AudioSegment.silent(duration=50)
|
| 193 |
+
|
| 194 |
for i, item in enumerate(results):
|
| 195 |
try:
|
| 196 |
path = item['path']
|
|
|
|
| 205 |
else:
|
| 206 |
prev_item = results[i-1]
|
| 207 |
|
| 208 |
+
# --- ZERO-GAP FLOW LOGIC ---
|
| 209 |
if prev_item['pause'] > 0:
|
| 210 |
+
# If there was punctuation, insert the micro-silence.
|
| 211 |
silence = AudioSegment.silent(duration=prev_item['pause'])
|
| 212 |
final_audio += silence + segment_audio
|
| 213 |
else:
|
| 214 |
+
# If continuous speech (same language or language switch without punctuation),
|
| 215 |
+
# use direct append for 0ms gap.
|
| 216 |
+
final_audio += segment_audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
except Exception as e:
|
|
|
|
| 219 |
continue
|
| 220 |
|
| 221 |
return final_audio
|
| 222 |
|
| 223 |
async def natural_tts_engine(full_text, output_file, native_lang_code):
|
|
|
|
| 224 |
segments = analyze_and_segment(full_text)
|
| 225 |
|
| 226 |
tasks = []
|
|
|
|
| 231 |
|
| 232 |
raw_results = await asyncio.gather(*tasks)
|
| 233 |
|
|
|
|
| 234 |
final_audio = process_and_stitch(raw_results)
|
| 235 |
|
| 236 |
if not final_audio: return None
|
| 237 |
|
| 238 |
+
# Final Mastering: Ensures volume is consistent and clear
|
|
|
|
| 239 |
final_audio = compress_dynamic_range(
|
| 240 |
final_audio,
|
| 241 |
threshold=-15.0,
|
|
|
|
| 246 |
final_audio = normalize(final_audio)
|
| 247 |
|
| 248 |
final_audio.export(output_file, format="mp3", bitrate="320k")
|
|
|
|
| 249 |
return output_file
|
| 250 |
|
| 251 |
async def generate_tts(id, lines, lang_input):
|
|
|
|
| 266 |
|
| 267 |
|
| 268 |
|
| 269 |
+
|
| 270 |
+
|
| 271 |
def audio_func(id, lines, lang):
|
| 272 |
loop = asyncio.new_event_loop()
|
| 273 |
asyncio.set_event_loop(loop)
|