Update video2.py
Browse files
video2.py
CHANGED
|
@@ -58,7 +58,7 @@ from mutagen.mp3 import MP3
|
|
| 58 |
AUDIO_DIR = "output_audio"
|
| 59 |
os.makedirs(AUDIO_DIR, exist_ok=True)
|
| 60 |
|
| 61 |
-
#
|
| 62 |
MAX_CONCURRENT_REQUESTS = 3
|
| 63 |
MAX_RETRIES = 5
|
| 64 |
BASE_DELAY = 2.0
|
|
@@ -76,7 +76,6 @@ def clean_text(text):
|
|
| 76 |
if not text: return ""
|
| 77 |
text = html.unescape(str(text))
|
| 78 |
text = re.sub(r'https?://\S+', '', text)
|
| 79 |
-
# Remove special chars but KEEP punctuation
|
| 80 |
text = re.sub(r'[\*\#\<\>\[\]\{\}]', '', text)
|
| 81 |
text = re.sub(r'\s+', ' ', text).strip()
|
| 82 |
return text
|
|
@@ -88,14 +87,16 @@ def detect_language(word):
|
|
| 88 |
|
| 89 |
def calculate_pause(text_chunk):
|
| 90 |
"""
|
| 91 |
-
|
| 92 |
"""
|
| 93 |
t = text_chunk.strip()
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
elif t.endswith('
|
| 98 |
-
|
|
|
|
|
|
|
| 99 |
|
| 100 |
def analyze_and_segment(text):
|
| 101 |
text = clean_text(text)
|
|
@@ -151,14 +152,14 @@ async def generate_chunk_with_retry(segment_data, semaphore):
|
|
| 151 |
|
| 152 |
voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
|
| 153 |
|
| 154 |
-
#
|
| 155 |
-
rate = "-
|
| 156 |
pitch = "+0Hz"
|
| 157 |
|
| 158 |
for attempt in range(MAX_RETRIES):
|
| 159 |
async with semaphore:
|
| 160 |
try:
|
| 161 |
-
await asyncio.sleep(random.uniform(0.1, 0.
|
| 162 |
|
| 163 |
fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
|
| 164 |
os.close(fd)
|
|
@@ -175,7 +176,6 @@ async def generate_chunk_with_retry(segment_data, semaphore):
|
|
| 175 |
|
| 176 |
except Exception as e:
|
| 177 |
delay = BASE_DELAY * (2 ** attempt) + random.uniform(0, 1)
|
| 178 |
-
print(f"⚠️ Retry Chunk {idx} in {delay:.1f}s... ({e})")
|
| 179 |
try: os.remove(path)
|
| 180 |
except: pass
|
| 181 |
if attempt == MAX_RETRIES - 1: return None
|
|
@@ -187,14 +187,9 @@ def process_and_stitch(results):
|
|
| 187 |
|
| 188 |
final_audio = AudioSegment.empty()
|
| 189 |
|
| 190 |
-
# Default gap between switched words (e.g. Voltage [GAP] nu)
|
| 191 |
-
# 100ms is noticeable but not awkward.
|
| 192 |
-
DEFAULT_SWITCH_GAP = 120
|
| 193 |
-
|
| 194 |
for i, item in enumerate(results):
|
| 195 |
try:
|
| 196 |
path = item['path']
|
| 197 |
-
|
| 198 |
segment_audio = AudioSegment.from_mp3(path)
|
| 199 |
try: os.remove(path)
|
| 200 |
except: pass
|
|
@@ -206,17 +201,20 @@ def process_and_stitch(results):
|
|
| 206 |
else:
|
| 207 |
prev_item = results[i-1]
|
| 208 |
|
| 209 |
-
#
|
| 210 |
-
|
| 211 |
if prev_item['pause'] > 0:
|
| 212 |
-
#
|
| 213 |
-
|
|
|
|
| 214 |
else:
|
| 215 |
-
#
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
except Exception as e:
|
| 222 |
print(f"Error stitching segment {i}: {e}")
|
|
@@ -236,17 +234,17 @@ async def natural_tts_engine(full_text, output_file, native_lang_code):
|
|
| 236 |
|
| 237 |
raw_results = await asyncio.gather(*tasks)
|
| 238 |
|
| 239 |
-
print("Stitching
|
| 240 |
final_audio = process_and_stitch(raw_results)
|
| 241 |
|
| 242 |
if not final_audio: return None
|
| 243 |
|
| 244 |
print("Mastering...")
|
| 245 |
-
# Compression
|
| 246 |
final_audio = compress_dynamic_range(
|
| 247 |
final_audio,
|
| 248 |
-
threshold=-
|
| 249 |
-
ratio=2.
|
| 250 |
attack=5.0,
|
| 251 |
release=50.0
|
| 252 |
)
|
|
@@ -273,6 +271,7 @@ async def generate_tts(id, lines, lang_input):
|
|
| 273 |
return 0, None
|
| 274 |
|
| 275 |
|
|
|
|
| 276 |
def audio_func(id, lines, lang):
|
| 277 |
loop = asyncio.new_event_loop()
|
| 278 |
asyncio.set_event_loop(loop)
|
|
|
|
| 58 |
AUDIO_DIR = "output_audio"
|
| 59 |
os.makedirs(AUDIO_DIR, exist_ok=True)
|
| 60 |
|
| 61 |
+
# Rate Limit Protection
|
| 62 |
MAX_CONCURRENT_REQUESTS = 3
|
| 63 |
MAX_RETRIES = 5
|
| 64 |
BASE_DELAY = 2.0
|
|
|
|
| 76 |
if not text: return ""
|
| 77 |
text = html.unescape(str(text))
|
| 78 |
text = re.sub(r'https?://\S+', '', text)
|
|
|
|
| 79 |
text = re.sub(r'[\*\#\<\>\[\]\{\}]', '', text)
|
| 80 |
text = re.sub(r'\s+', ' ', text).strip()
|
| 81 |
return text
|
|
|
|
| 87 |
|
| 88 |
def calculate_pause(text_chunk):
|
| 89 |
"""
|
| 90 |
+
NATURAL PAUSE DURATIONS (Reduced for speed/flow)
|
| 91 |
"""
|
| 92 |
t = text_chunk.strip()
|
| 93 |
+
# Quick breath for comma (was 250, now 100)
|
| 94 |
+
if t.endswith(',') or t.endswith(';'): return 100
|
| 95 |
+
# Standard sentence end (was 650, now 350)
|
| 96 |
+
elif t.endswith('.'): return 350
|
| 97 |
+
elif t.endswith('?'): return 400
|
| 98 |
+
elif t.endswith('!'): return 350
|
| 99 |
+
return 0
|
| 100 |
|
| 101 |
def analyze_and_segment(text):
|
| 102 |
text = clean_text(text)
|
|
|
|
| 152 |
|
| 153 |
voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
|
| 154 |
|
| 155 |
+
# Keeping English slightly slower helps it blend with Tamil speed
|
| 156 |
+
rate = "-5%" if lang_type == 'english' else "+0%"
|
| 157 |
pitch = "+0Hz"
|
| 158 |
|
| 159 |
for attempt in range(MAX_RETRIES):
|
| 160 |
async with semaphore:
|
| 161 |
try:
|
| 162 |
+
await asyncio.sleep(random.uniform(0.1, 0.4)) # Jitter
|
| 163 |
|
| 164 |
fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
|
| 165 |
os.close(fd)
|
|
|
|
| 176 |
|
| 177 |
except Exception as e:
|
| 178 |
delay = BASE_DELAY * (2 ** attempt) + random.uniform(0, 1)
|
|
|
|
| 179 |
try: os.remove(path)
|
| 180 |
except: pass
|
| 181 |
if attempt == MAX_RETRIES - 1: return None
|
|
|
|
| 187 |
|
| 188 |
final_audio = AudioSegment.empty()
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
for i, item in enumerate(results):
|
| 191 |
try:
|
| 192 |
path = item['path']
|
|
|
|
| 193 |
segment_audio = AudioSegment.from_mp3(path)
|
| 194 |
try: os.remove(path)
|
| 195 |
except: pass
|
|
|
|
| 201 |
else:
|
| 202 |
prev_item = results[i-1]
|
| 203 |
|
| 204 |
+
# --- FLOW LOGIC ---
|
|
|
|
| 205 |
if prev_item['pause'] > 0:
|
| 206 |
+
# If there was a comma/period, use the calculated pause
|
| 207 |
+
silence = AudioSegment.silent(duration=prev_item['pause'])
|
| 208 |
+
final_audio += silence + segment_audio
|
| 209 |
else:
|
| 210 |
+
# If switching languages (Voltage -> nu), use CROSSFADE
|
| 211 |
+
# Crossfade blends the end of English with start of Tamil.
|
| 212 |
+
# 20ms is fast enough to sound connected but not overlapped.
|
| 213 |
+
try:
|
| 214 |
+
final_audio = final_audio.append(segment_audio, crossfade=20)
|
| 215 |
+
except:
|
| 216 |
+
# Fallback for tiny segments
|
| 217 |
+
final_audio += segment_audio
|
| 218 |
|
| 219 |
except Exception as e:
|
| 220 |
print(f"Error stitching segment {i}: {e}")
|
|
|
|
| 234 |
|
| 235 |
raw_results = await asyncio.gather(*tasks)
|
| 236 |
|
| 237 |
+
print("Stitching for Natural Flow...")
|
| 238 |
final_audio = process_and_stitch(raw_results)
|
| 239 |
|
| 240 |
if not final_audio: return None
|
| 241 |
|
| 242 |
print("Mastering...")
|
| 243 |
+
# Compression ratio 2.5 makes speech punchy and clear
|
| 244 |
final_audio = compress_dynamic_range(
|
| 245 |
final_audio,
|
| 246 |
+
threshold=-15.0,
|
| 247 |
+
ratio=2.5,
|
| 248 |
attack=5.0,
|
| 249 |
release=50.0
|
| 250 |
)
|
|
|
|
| 271 |
return 0, None
|
| 272 |
|
| 273 |
|
| 274 |
+
|
| 275 |
def audio_func(id, lines, lang):
|
| 276 |
loop = asyncio.new_event_loop()
|
| 277 |
asyncio.set_event_loop(loop)
|