File size: 12,113 Bytes
3a89103
c4b79fe
c9030d6
 
 
b0b4114
c9030d6
fad83e4
c9030d6
a9a000b
6643aa1
5d8bb2b
a15157a
cb8ee6b
eb0f122
 
 
97fa939
 
 
 
 
 
 
 
 
 
 
c4b79fe
eb0f122
a9a000b
 
7fee81b
 
40ff02c
 
a9a000b
7fee81b
a9a000b
97fa939
 
a0f5f50
70d5824
b6cac61
cb6e92f
b6cac61
 
cecdb1a
cb6e92f
b6cac61
cecdb1a
b6cac61
cb6e92f
3724d2b
b6cac61
cb6e92f
a52313b
cb6e92f
 
 
 
 
 
 
 
1a2bb4e
cb6e92f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6cac61
cb6e92f
 
 
a0f5f50
cb6e92f
 
 
 
 
 
67e7115
cb6e92f
 
 
 
 
 
 
 
 
 
 
 
 
67e7115
cb6e92f
 
 
 
 
 
 
 
 
 
 
 
 
 
67e7115
cb6e92f
 
9222ac5
cb6e92f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a2bb4e
cb6e92f
9222ac5
cb6e92f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a2bb4e
cb6e92f
 
b6cac61
cb6e92f
 
 
 
67e7115
cb6e92f
 
 
 
a52313b
cb6e92f
 
 
 
a52313b
cb6e92f
a52313b
cb6e92f
a52313b
cb6e92f
 
a52313b
cb6e92f
 
 
 
 
 
a52313b
cb6e92f
 
 
 
 
a52313b
cb6e92f
 
 
a52313b
cb6e92f
a52313b
cb6e92f
 
 
1a2bb4e
cb6e92f
 
a52313b
cb6e92f
 
 
67e7115
cb6e92f
 
 
 
a52313b
cb6e92f
 
67e7115
cb6e92f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67e7115
1a2bb4e
cb6e92f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9222ac5
cb6e92f
 
9222ac5
cb6e92f
 
 
 
 
46401d5
67e7115
cb6e92f
b6cac61
cb6e92f
 
 
 
 
 
 
 
 
a52313b
966de65
cb6e92f
 
 
73b2a26
bf7b22d
df79249
916cab7
97fa939
82f6e63
505aba1
 
 
 
 
 
 
 
 
 
 
cf471fe
a47e779
82f6e63
 
 
505aba1
cf471fe
82f6e63
 
505aba1
 
 
 
 
 
 
 
 
 
fe47739
505aba1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
from moviepy.editor import *
from moviepy.video.fx.all import speedx
from PIL import Image
import pytesseract
import numpy as np
import edge_tts
from mutagen.mp3 import MP3
import uuid
import os
from pathlib import Path
import rust_highlight
import rust_combiner
import shutil
import asyncio
import cv2
import numpy as np
import subprocess, shlex, os, time
import asyncio
import nest_asyncio
import edge_tts
import re
import html
import unicodedata
from pydub import AudioSegment
from pydub.effects import normalize
import tempfile
import os
import warnings
# from IPython.display import Video, display, HTML # Commented out for Hugging Face Spaces compatibility
import math
# Use /app/data which we created with proper permissions
BASE_DIR = "/app/data"
IMAGE_DIR = "/tmp/images"
os.makedirs(IMAGE_DIR, exist_ok=True)
AUDIO_DIR = os.path.join(BASE_DIR, "sound")
CLIPS_DIR = os.path.join(BASE_DIR, "video")
# Create directories (no chmod needed)
for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
    Path(path).mkdir(parents=True, exist_ok=True)
warnings.filterwarnings('ignore')
nest_asyncio.apply()

import re
import html
import unicodedata
import tempfile
import os
import asyncio
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from functools import lru_cache
import edge_tts
from pydub import AudioSegment
from pydub.effects import normalize
from mutagen.mp3 import MP3

VOICE_EN = "en-IN-NeerjaNeural"

# Pre-compiled regex patterns for speed (compiled once, reused many times)
URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
WHITESPACE_PATTERN = re.compile(r'\s+')
SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')

@lru_cache(maxsize=1024)  # Cache cleaned text to avoid re-processing
def clean_text_for_tts(text):
    """Cleans text before TTS with optimized regex and caching."""
    if not text:
        return ""
    text = str(text).strip()
    text = html.unescape(text)
    
    # Use pre-compiled patterns (much faster)
    text = URL_PATTERN.sub('', text)
    text = TAG_PATTERN.sub('', text)
    text = BRACKET_PATTERN.sub('', text)
    text = SPECIAL_CHAR_PATTERN.sub('', text)
    text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
    
    # Batch remove keywords (faster than multiple re.sub calls)
    for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
        text = text.replace(keyword, '').replace(keyword.upper(), '')
    
    text = unicodedata.normalize('NFKD', text)
    text = WHITESPACE_PATTERN.sub(' ', text)
    return text.strip()

async def generate_safe_audio(text, voice, semaphore):
    """Generate clean audio with rate limiting."""
    async with semaphore:  # Limit concurrent TTS requests
        cleaned_text = clean_text_for_tts(text)
        if not cleaned_text:
            return None
        
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
        fname = temp_file.name
        temp_file.close()
        
        try:
            comm = edge_tts.Communicate(cleaned_text, voice=voice)
            await comm.save(fname)
            return fname
        except Exception as e:
            print(f"Error generating audio: {e}")
            if os.path.exists(fname):
                os.unlink(fname)
            return None

@lru_cache(maxsize=256)
def smart_text_chunking(text, max_chars=80):
    """Cached text chunking for speed."""
    text = clean_text_for_tts(text)
    if not text:
        return tuple()  # Return tuple for hashability (required by lru_cache)
    
    sentences = SENTENCE_PATTERN.split(text)
    chunks = []
    
    for sentence in sentences:
        sentence = sentence.strip()
        if not sentence:
            continue
        
        if len(sentence) <= max_chars:
            chunks.append(sentence)
        else:
            sub_parts = SUB_PATTERN.split(sentence)
            for part in sub_parts:
                part = part.strip()
                if not part:
                    continue
                    
                if len(part) <= max_chars:
                    chunks.append(part)
                else:
                    words = part.split()
                    current_chunk = ""
                    for word in words:
                        test_chunk = f"{current_chunk} {word}" if current_chunk else word
                        if len(test_chunk) <= max_chars:
                            current_chunk = test_chunk
                        else:
                            if current_chunk:
                                chunks.append(current_chunk.strip())
                            current_chunk = word
                    if current_chunk:
                        chunks.append(current_chunk.strip())
    
    return tuple(chunk for chunk in chunks if chunk.strip())

def process_audio_segment_fast(audio_file):
    """Fast audio processing in separate thread."""
    try:
        segment = AudioSegment.from_file(audio_file)
        segment = normalize(segment)
        
        # Only strip silence for longer segments
        if len(segment) > 200:
            try:
                segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
            except:
                pass  # Skip if fails
        
        return segment
    except Exception as e:
        print(f"Warning: Error processing audio segment: {e}")
        return None
    finally:
        # Cleanup temp file immediately
        try:
            if os.path.exists(audio_file):
                os.unlink(audio_file)
        except:
            pass

async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
    """Ultra-optimized bilingual TTS with parallel processing."""
    print("Starting optimized bilingual TTS processing...")
    
    try:
        chunks = smart_text_chunking(text)
        if not chunks:
            print("Error: No valid text chunks after cleaning")
            return None
        
        print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
        
        is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
        
        # Semaphore to limit concurrent TTS requests (prevents rate limiting)
        semaphore = asyncio.Semaphore(max_concurrent)
        
        # Prepare all tasks
        tasks = []
        for i, chunk in enumerate(chunks):
            is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
            voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
            tasks.append(generate_safe_audio(chunk, voice, semaphore))
        
        # Generate all audio files concurrently
        audio_files = await asyncio.gather(*tasks, return_exceptions=True)
        
        # Filter successful files
        processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
        
        if not processed_audio_files:
            print("Error: No audio was successfully generated")
            return None
        
        print(f"Successfully generated {len(processed_audio_files)} audio segments")
        
        # Process audio segments in parallel using ThreadPoolExecutor
        with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
            audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
        
        # Filter out None segments
        audio_segments = [seg for seg in audio_segments if seg is not None]
        
        if not audio_segments:
            print("Error: No audio segments were successfully processed")
            return None
        
        # Merge audio segments (fast concatenation)
        print("Merging audio segments...")
        merged_audio = audio_segments[0]
        pause = AudioSegment.silent(duration=200)
        
        for segment in audio_segments[1:]:
            merged_audio += pause + segment
        
        # Apply final processing (compression and normalization)
        print("Applying final audio processing...")
        merged_audio = merged_audio.compress_dynamic_range(
            threshold=-20.0, 
            ratio=4.0, 
            attack=5.0, 
            release=50.0
        )
        merged_audio = normalize(merged_audio)
        
        # Export with high quality
        merged_audio.export(output_file, format="mp3", bitrate="192k")
        print(f"✅ Audio successfully generated: {output_file}")
        
        return output_file
        
    except Exception as main_error:
        print(f"Main error in bilingual TTS: {main_error}")
        return None

async def generate_tts_optimized(id, lines, lang):
    """Optimized TTS generation function."""
    voice = {
        "English": "en-US-JennyNeural",
        "Tamil": "ta-IN-PallaviNeural",
        "Hindi": "hi-IN-SwaraNeural",
        "Malayalam": "ml-IN-SobhanaNeural",
        "Kannada": "kn-IN-SapnaNeural",
        "Telugu": "te-IN-ShrutiNeural",
        "Bengali": "bn-IN-TanishaaNeural",
        "Marathi": "mr-IN-AarohiNeural",
        "Gujarati": "gu-IN-DhwaniNeural",
        "Punjabi": "pa-IN-VaaniNeural",
        "Urdu": "ur-IN-GulNeural",
        "French": "fr-FR-DeniseNeural",
        "German": "de-DE-KatjaNeural",
        "Spanish": "es-ES-ElviraNeural",
        "Italian": "it-IT-IsabellaNeural",
        "Russian": "ru-RU-SvetlanaNeural",
        "Japanese": "ja-JP-NanamiNeural",
        "Korean": "ko-KR-SunHiNeural",
        "Chinese": "zh-CN-XiaoxiaoNeural",
        "Arabic": "ar-SA-ZariyahNeural",
        "Portuguese": "pt-BR-FranciscaNeural",
        "Dutch": "nl-NL-FennaNeural",
        "Greek": "el-GR-AthinaNeural",
        "Hebrew": "he-IL-HilaNeural",
        "Turkish": "tr-TR-EmelNeural",
        "Polish": "pl-PL-AgnieszkaNeural",
        "Thai": "th-TH-AcharaNeural",
        "Vietnamese": "vi-VN-HoaiMyNeural",
        "Swedish": "sv-SE-SofieNeural",
        "Finnish": "fi-FI-NooraNeural",
        "Czech": "cs-CZ-VlastaNeural",
        "Hungarian": "hu-HU-NoemiNeural"
    }
    
    audio_name = f"audio{id}.mp3"
    audio_path = os.path.join(AUDIO_DIR, audio_name)
    
    if "&&&" in lang:
        listf = lang.split("&&&")
        text = listf[0].strip()
        lang_name = listf[1].strip()
        voice_to_use = voice.get(lang_name, VOICE_EN)
    else:
        text = lines[id]
        voice_to_use = voice.get(lang, VOICE_EN)
    
    # Increase max_concurrent for more speed (adjust based on your system)
    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
    
    if output and os.path.exists(audio_path):
        audio = MP3(audio_path)
        duration = audio.info.length
        return duration, audio_path
    
    return None, None

def audio_func(id, lines, lang):
    """Synchronous wrapper for audio generation."""
    return asyncio.run(generate_tts_optimized(id, lines, lang))


     


#-----------------------------
#---------------------------------
import os
import subprocess
import shlex
import time
import math
import numpy as np
import cv2
from moviepy.editor import VideoFileClip, AudioFileClip
from moviepy.video.fx.speedx import speedx

# video.py
def video_func(id, lines, lang):
    duration, audio_path = audio_func(id, lines, lang)
    if not duration or not audio_path:
        print("Failed to generate audio.")
        return None
    
    TEXT = lines[id]
    print("-----------------------------------------------------------------------------")
    print(TEXT)
    
    # CREATE CLIPS DIRECTORY IF IT DOESN'T EXIST
    os.makedirs(CLIPS_DIR, exist_ok=True)
    
    # Call Rust function
    final_video_path = rust_highlight.generate_video_clip(id, TEXT, audio_path, duration, CLIPS_DIR)
    
    if final_video_path:
        print(f"Final video saved at: {final_video_path}")
        return final_video_path
    else:
        print("Video generation failed.")
        return None