File size: 12,113 Bytes
3a89103 c4b79fe c9030d6 b0b4114 c9030d6 fad83e4 c9030d6 a9a000b 6643aa1 5d8bb2b a15157a cb8ee6b eb0f122 97fa939 c4b79fe eb0f122 a9a000b 7fee81b 40ff02c a9a000b 7fee81b a9a000b 97fa939 a0f5f50 70d5824 b6cac61 cb6e92f b6cac61 cecdb1a cb6e92f b6cac61 cecdb1a b6cac61 cb6e92f 3724d2b b6cac61 cb6e92f a52313b cb6e92f 1a2bb4e cb6e92f b6cac61 cb6e92f a0f5f50 cb6e92f 67e7115 cb6e92f 67e7115 cb6e92f 67e7115 cb6e92f 9222ac5 cb6e92f 1a2bb4e cb6e92f 9222ac5 cb6e92f 1a2bb4e cb6e92f b6cac61 cb6e92f 67e7115 cb6e92f a52313b cb6e92f a52313b cb6e92f a52313b cb6e92f a52313b cb6e92f a52313b cb6e92f a52313b cb6e92f a52313b cb6e92f a52313b cb6e92f a52313b cb6e92f 1a2bb4e cb6e92f a52313b cb6e92f 67e7115 cb6e92f a52313b cb6e92f 67e7115 cb6e92f 67e7115 1a2bb4e cb6e92f 9222ac5 cb6e92f 9222ac5 cb6e92f 46401d5 67e7115 cb6e92f b6cac61 cb6e92f a52313b 966de65 cb6e92f 73b2a26 bf7b22d df79249 916cab7 97fa939 82f6e63 505aba1 cf471fe a47e779 82f6e63 505aba1 cf471fe 82f6e63 505aba1 fe47739 505aba1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 |
from moviepy.editor import *
from moviepy.video.fx.all import speedx
from PIL import Image
import pytesseract
import numpy as np
import edge_tts
from mutagen.mp3 import MP3
import uuid
import os
from pathlib import Path
import rust_highlight
import rust_combiner
import shutil
import asyncio
import cv2
import numpy as np
import subprocess, shlex, os, time
import asyncio
import nest_asyncio
import edge_tts
import re
import html
import unicodedata
from pydub import AudioSegment
from pydub.effects import normalize
import tempfile
import os
import warnings
# from IPython.display import Video, display, HTML # Commented out for Hugging Face Spaces compatibility
import math
# Use /app/data which we created with proper permissions
BASE_DIR = "/app/data"
IMAGE_DIR = "/tmp/images"
os.makedirs(IMAGE_DIR, exist_ok=True)
AUDIO_DIR = os.path.join(BASE_DIR, "sound")
CLIPS_DIR = os.path.join(BASE_DIR, "video")
# Create directories (no chmod needed)
for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
Path(path).mkdir(parents=True, exist_ok=True)
warnings.filterwarnings('ignore')
nest_asyncio.apply()
import re
import html
import unicodedata
import tempfile
import os
import asyncio
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from functools import lru_cache
import edge_tts
from pydub import AudioSegment
from pydub.effects import normalize
from mutagen.mp3 import MP3
VOICE_EN = "en-IN-NeerjaNeural"
# Pre-compiled regex patterns for speed (compiled once, reused many times)
URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
WHITESPACE_PATTERN = re.compile(r'\s+')
SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
@lru_cache(maxsize=1024) # Cache cleaned text to avoid re-processing
def clean_text_for_tts(text):
"""Cleans text before TTS with optimized regex and caching."""
if not text:
return ""
text = str(text).strip()
text = html.unescape(text)
# Use pre-compiled patterns (much faster)
text = URL_PATTERN.sub('', text)
text = TAG_PATTERN.sub('', text)
text = BRACKET_PATTERN.sub('', text)
text = SPECIAL_CHAR_PATTERN.sub('', text)
text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
# Batch remove keywords (faster than multiple re.sub calls)
for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
text = text.replace(keyword, '').replace(keyword.upper(), '')
text = unicodedata.normalize('NFKD', text)
text = WHITESPACE_PATTERN.sub(' ', text)
return text.strip()
async def generate_safe_audio(text, voice, semaphore):
"""Generate clean audio with rate limiting."""
async with semaphore: # Limit concurrent TTS requests
cleaned_text = clean_text_for_tts(text)
if not cleaned_text:
return None
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
fname = temp_file.name
temp_file.close()
try:
comm = edge_tts.Communicate(cleaned_text, voice=voice)
await comm.save(fname)
return fname
except Exception as e:
print(f"Error generating audio: {e}")
if os.path.exists(fname):
os.unlink(fname)
return None
@lru_cache(maxsize=256)
def smart_text_chunking(text, max_chars=80):
"""Cached text chunking for speed."""
text = clean_text_for_tts(text)
if not text:
return tuple() # Return tuple for hashability (required by lru_cache)
sentences = SENTENCE_PATTERN.split(text)
chunks = []
for sentence in sentences:
sentence = sentence.strip()
if not sentence:
continue
if len(sentence) <= max_chars:
chunks.append(sentence)
else:
sub_parts = SUB_PATTERN.split(sentence)
for part in sub_parts:
part = part.strip()
if not part:
continue
if len(part) <= max_chars:
chunks.append(part)
else:
words = part.split()
current_chunk = ""
for word in words:
test_chunk = f"{current_chunk} {word}" if current_chunk else word
if len(test_chunk) <= max_chars:
current_chunk = test_chunk
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = word
if current_chunk:
chunks.append(current_chunk.strip())
return tuple(chunk for chunk in chunks if chunk.strip())
def process_audio_segment_fast(audio_file):
"""Fast audio processing in separate thread."""
try:
segment = AudioSegment.from_file(audio_file)
segment = normalize(segment)
# Only strip silence for longer segments
if len(segment) > 200:
try:
segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
except:
pass # Skip if fails
return segment
except Exception as e:
print(f"Warning: Error processing audio segment: {e}")
return None
finally:
# Cleanup temp file immediately
try:
if os.path.exists(audio_file):
os.unlink(audio_file)
except:
pass
async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
"""Ultra-optimized bilingual TTS with parallel processing."""
print("Starting optimized bilingual TTS processing...")
try:
chunks = smart_text_chunking(text)
if not chunks:
print("Error: No valid text chunks after cleaning")
return None
print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
# Semaphore to limit concurrent TTS requests (prevents rate limiting)
semaphore = asyncio.Semaphore(max_concurrent)
# Prepare all tasks
tasks = []
for i, chunk in enumerate(chunks):
is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
tasks.append(generate_safe_audio(chunk, voice, semaphore))
# Generate all audio files concurrently
audio_files = await asyncio.gather(*tasks, return_exceptions=True)
# Filter successful files
processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
if not processed_audio_files:
print("Error: No audio was successfully generated")
return None
print(f"Successfully generated {len(processed_audio_files)} audio segments")
# Process audio segments in parallel using ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
# Filter out None segments
audio_segments = [seg for seg in audio_segments if seg is not None]
if not audio_segments:
print("Error: No audio segments were successfully processed")
return None
# Merge audio segments (fast concatenation)
print("Merging audio segments...")
merged_audio = audio_segments[0]
pause = AudioSegment.silent(duration=200)
for segment in audio_segments[1:]:
merged_audio += pause + segment
# Apply final processing (compression and normalization)
print("Applying final audio processing...")
merged_audio = merged_audio.compress_dynamic_range(
threshold=-20.0,
ratio=4.0,
attack=5.0,
release=50.0
)
merged_audio = normalize(merged_audio)
# Export with high quality
merged_audio.export(output_file, format="mp3", bitrate="192k")
print(f"✅ Audio successfully generated: {output_file}")
return output_file
except Exception as main_error:
print(f"Main error in bilingual TTS: {main_error}")
return None
async def generate_tts_optimized(id, lines, lang):
"""Optimized TTS generation function."""
voice = {
"English": "en-US-JennyNeural",
"Tamil": "ta-IN-PallaviNeural",
"Hindi": "hi-IN-SwaraNeural",
"Malayalam": "ml-IN-SobhanaNeural",
"Kannada": "kn-IN-SapnaNeural",
"Telugu": "te-IN-ShrutiNeural",
"Bengali": "bn-IN-TanishaaNeural",
"Marathi": "mr-IN-AarohiNeural",
"Gujarati": "gu-IN-DhwaniNeural",
"Punjabi": "pa-IN-VaaniNeural",
"Urdu": "ur-IN-GulNeural",
"French": "fr-FR-DeniseNeural",
"German": "de-DE-KatjaNeural",
"Spanish": "es-ES-ElviraNeural",
"Italian": "it-IT-IsabellaNeural",
"Russian": "ru-RU-SvetlanaNeural",
"Japanese": "ja-JP-NanamiNeural",
"Korean": "ko-KR-SunHiNeural",
"Chinese": "zh-CN-XiaoxiaoNeural",
"Arabic": "ar-SA-ZariyahNeural",
"Portuguese": "pt-BR-FranciscaNeural",
"Dutch": "nl-NL-FennaNeural",
"Greek": "el-GR-AthinaNeural",
"Hebrew": "he-IL-HilaNeural",
"Turkish": "tr-TR-EmelNeural",
"Polish": "pl-PL-AgnieszkaNeural",
"Thai": "th-TH-AcharaNeural",
"Vietnamese": "vi-VN-HoaiMyNeural",
"Swedish": "sv-SE-SofieNeural",
"Finnish": "fi-FI-NooraNeural",
"Czech": "cs-CZ-VlastaNeural",
"Hungarian": "hu-HU-NoemiNeural"
}
audio_name = f"audio{id}.mp3"
audio_path = os.path.join(AUDIO_DIR, audio_name)
if "&&&" in lang:
listf = lang.split("&&&")
text = listf[0].strip()
lang_name = listf[1].strip()
voice_to_use = voice.get(lang_name, VOICE_EN)
else:
text = lines[id]
voice_to_use = voice.get(lang, VOICE_EN)
# Increase max_concurrent for more speed (adjust based on your system)
output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
if output and os.path.exists(audio_path):
audio = MP3(audio_path)
duration = audio.info.length
return duration, audio_path
return None, None
def audio_func(id, lines, lang):
"""Synchronous wrapper for audio generation."""
return asyncio.run(generate_tts_optimized(id, lines, lang))
#-----------------------------
#---------------------------------
import os
import subprocess
import shlex
import time
import math
import numpy as np
import cv2
from moviepy.editor import VideoFileClip, AudioFileClip
from moviepy.video.fx.speedx import speedx
# video.py
def video_func(id, lines, lang):
duration, audio_path = audio_func(id, lines, lang)
if not duration or not audio_path:
print("Failed to generate audio.")
return None
TEXT = lines[id]
print("-----------------------------------------------------------------------------")
print(TEXT)
# CREATE CLIPS DIRECTORY IF IT DOESN'T EXIST
os.makedirs(CLIPS_DIR, exist_ok=True)
# Call Rust function
final_video_path = rust_highlight.generate_video_clip(id, TEXT, audio_path, duration, CLIPS_DIR)
if final_video_path:
print(f"Final video saved at: {final_video_path}")
return final_video_path
else:
print("Video generation failed.")
return None |