sreepathi-ravikumar commited on
Commit
4345f75
·
verified ·
1 Parent(s): e59d451

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -433
app.py CHANGED
@@ -1,442 +1,21 @@
1
- import os
2
- import re
3
- import html
4
- import uuid
5
- import asyncio
6
- import tempfile
7
- import unicodedata
8
- from datetime import datetime, timedelta
9
- from functools import lru_cache
10
- from concurrent.futures import ThreadPoolExecutor
11
- from typing import List, Tuple
12
-
13
- from flask import Flask, request, jsonify, send_file
14
  from flask_cors import CORS
15
  import edge_tts
16
- from pydub import AudioSegment
17
- from pydub.effects import normalize, compress_dynamic_range
18
- from mutagen.mp3 import MP3
19
 
20
- # Initialize Flask app
21
  app = Flask(__name__)
22
  CORS(app)
23
 
24
- # Configuration
25
- AUDIO_OUTPUT_DIR = 'audio_output'
26
- MAX_CONCURRENT_TTS = 15
27
- MAX_CHUNK_LENGTH = 80
28
- THREAD_POOL_SIZE = 8
29
- AUDIO_FILE_RETENTION_HOURS = 1
30
-
31
- # Pre-compiled regex patterns for performance
32
- URL_PATTERN = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
33
- TAG_PATTERN = re.compile(r'<[^>]+>')
34
- BRACKET_PATTERN = re.compile(r'[\[\]{}()]')
35
- SPECIAL_CHAR_PATTERN = re.compile(r'[^\w\s\u0B80-\u0BFF.,!?;:\-\'"।॥]')
36
- WHITESPACE_PATTERN = re.compile(r'\s+')
37
- SENTENCE_PATTERN = re.compile(r'[.!?]+')
38
- SUB_PATTERN = re.compile(r'[,;]+')
39
-
40
- # Tamil Unicode range for bilingual detection
41
- TAMIL_RANGE = range(0x0B80, 0x0C00)
42
-
43
- # Voice mappings for 30+ languages
44
- VOICE_MAPPINGS = {
45
- 'en': 'en-US-JennyNeural',
46
- 'ta': 'ta-IN-PallaviNeural',
47
- 'hi': 'hi-IN-SwaraNeural',
48
- 'ml': 'ml-IN-SobhanaNeural',
49
- 'kn': 'kn-IN-SapnaNeural',
50
- 'te': 'te-IN-ShrutiNeural',
51
- 'bn': 'bn-IN-TanishaaNeural',
52
- 'mr': 'mr-IN-AarohiNeural',
53
- 'gu': 'gu-IN-DhwaniNeural',
54
- 'pa': 'pa-IN-SandeepNeural',
55
- 'ur': 'ur-IN-GulNeural',
56
- 'fr': 'fr-FR-DeniseNeural',
57
- 'de': 'de-DE-KatjaNeural',
58
- 'es': 'es-ES-ElviraNeural',
59
- 'it': 'it-IT-ElsaNeural',
60
- 'ru': 'ru-RU-SvetlanaNeural',
61
- 'ja': 'ja-JP-NanamiNeural',
62
- 'ko': 'ko-KR-SunHiNeural',
63
- 'zh': 'zh-CN-XiaoxiaoNeural',
64
- 'ar': 'ar-SA-ZariyahNeural',
65
- 'pt': 'pt-BR-FranciscaNeural',
66
- 'nl': 'nl-NL-ColetteNeural',
67
- 'el': 'el-GR-AthinaNeural',
68
- 'he': 'he-IL-HilaNeural',
69
- 'tr': 'tr-TR-EmelNeural',
70
- 'pl': 'pl-PL-ZofiaNeural',
71
- 'th': 'th-TH-PremwadeeNeural',
72
- 'vi': 'vi-VN-HoaiMyNeural',
73
- 'sv': 'sv-SE-SofieNeural',
74
- 'fi': 'fi-FI-NooraNeural',
75
- 'cs': 'cs-CZ-VlastaNeural',
76
- 'hu': 'hu-HU-NoemiNeural'
77
- }
78
-
79
- # Create audio output directory
80
- os.makedirs(AUDIO_OUTPUT_DIR, exist_ok=True)
81
-
82
-
83
- @lru_cache(maxsize=1024)
84
- def clean_text(text: str) -> str:
85
- """
86
- Clean and normalize text for TTS processing.
87
- Cached for performance with repeated text.
88
- """
89
- # Remove URLs
90
- text = URL_PATTERN.sub('', text)
91
-
92
- # Remove HTML tags
93
- text = TAG_PATTERN.sub('', text)
94
-
95
- # Unescape HTML entities
96
- text = html.unescape(text)
97
-
98
- # Remove brackets
99
- text = BRACKET_PATTERN.sub('', text)
100
-
101
- # Normalize Unicode (NFKD)
102
- text = unicodedata.normalize('NFKD', text)
103
-
104
- # Remove special characters (keeping Tamil and basic punctuation)
105
- text = SPECIAL_CHAR_PATTERN.sub('', text)
106
-
107
- # Normalize whitespace
108
- text = WHITESPACE_PATTERN.sub(' ', text)
109
-
110
- return text.strip()
111
-
112
-
113
- @lru_cache(maxsize=512)
114
- def smart_chunk_text(text: str) -> Tuple[str, ...]:
115
- """
116
- Split text into manageable chunks at natural boundaries.
117
- Returns tuple for caching compatibility.
118
- """
119
- chunks = []
120
-
121
- # First, split by sentences
122
- sentences = SENTENCE_PATTERN.split(text)
123
-
124
- for sentence in sentences:
125
- sentence = sentence.strip()
126
- if not sentence:
127
- continue
128
-
129
- # If sentence is short enough, add it directly
130
- if len(sentence) <= MAX_CHUNK_LENGTH:
131
- chunks.append(sentence)
132
- else:
133
- # Split by commas/semicolons
134
- sub_parts = SUB_PATTERN.split(sentence)
135
-
136
- current_chunk = ""
137
- for part in sub_parts:
138
- part = part.strip()
139
- if not part:
140
- continue
141
-
142
- if len(current_chunk) + len(part) + 2 <= MAX_CHUNK_LENGTH:
143
- current_chunk += (", " if current_chunk else "") + part
144
- else:
145
- if current_chunk:
146
- chunks.append(current_chunk)
147
-
148
- # If single part is still too long, split by words
149
- if len(part) > MAX_CHUNK_LENGTH:
150
- words = part.split()
151
- current_chunk = ""
152
- for word in words:
153
- if len(current_chunk) + len(word) + 1 <= MAX_CHUNK_LENGTH:
154
- current_chunk += (" " if current_chunk else "") + word
155
- else:
156
- if current_chunk:
157
- chunks.append(current_chunk)
158
- current_chunk = word
159
- else:
160
- current_chunk = part
161
-
162
- if current_chunk:
163
- chunks.append(current_chunk)
164
-
165
- return tuple(chunks)
166
-
167
-
168
- def detect_tamil_content(text: str) -> bool:
169
- """Check if text contains Tamil Unicode characters."""
170
- return any(ord(char) in TAMIL_RANGE for char in text)
171
-
172
-
173
- def get_voice_for_text(text: str, language: str = None, voice: str = None) -> str:
174
- """
175
- Determine the appropriate voice based on text content and parameters.
176
- Supports bilingual Tamil-English detection.
177
- """
178
- if voice:
179
- return voice
180
-
181
- # Auto-detect Tamil content
182
- if detect_tamil_content(text):
183
- return VOICE_MAPPINGS['ta']
184
-
185
- # Use specified language or default to English
186
- lang_code = language.lower() if language else 'en'
187
- return VOICE_MAPPINGS.get(lang_code, VOICE_MAPPINGS['en'])
188
-
189
-
190
- async def generate_audio_chunk(text: str, voice: str, semaphore: asyncio.Semaphore) -> bytes:
191
- """
192
- Generate audio for a single text chunk using edge-tts.
193
- Rate-limited by semaphore.
194
- """
195
- async with semaphore:
196
- communicate = edge_tts.Communicate(text, voice)
197
- audio_data = b""
198
-
199
- async for chunk in communicate.stream():
200
- if chunk["type"] == "audio":
201
- audio_data += chunk["data"]
202
-
203
- return audio_data
204
-
205
-
206
- async def generate_all_chunks(chunks: List[str], voice: str) -> List[bytes]:
207
- """
208
- Generate audio for all chunks concurrently with rate limiting.
209
- """
210
- semaphore = asyncio.Semaphore(MAX_CONCURRENT_TTS)
211
-
212
- tasks = [
213
- generate_audio_chunk(chunk, voice, semaphore)
214
- for chunk in chunks
215
- ]
216
 
217
- return await asyncio.gather(*tasks)
218
-
219
-
220
- def process_audio_segment(audio_data: bytes) -> AudioSegment:
221
- """
222
- Process a single audio segment: normalize and strip silence.
223
- Designed for parallel execution.
224
- """
225
- with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file:
226
- temp_path = temp_file.name
227
- temp_file.write(audio_data)
228
-
229
- try:
230
- segment = AudioSegment.from_mp3(temp_path)
231
-
232
- # Normalize audio
233
- segment = normalize(segment)
234
-
235
- # Strip silence
236
- segment = segment.strip_silence(
237
- silence_thresh=-40,
238
- silence_len=50
239
- )
240
-
241
- return segment
242
- finally:
243
- os.unlink(temp_path)
244
-
245
-
246
- def combine_audio_segments(audio_chunks: List[bytes]) -> str:
247
- """
248
- Process and combine all audio chunks into a single MP3 file.
249
- Uses parallel processing for audio segment handling.
250
- """
251
- print(f"Processing {len(audio_chunks)} audio chunks...")
252
-
253
- # Process segments in parallel
254
- with ThreadPoolExecutor(max_workers=THREAD_POOL_SIZE) as executor:
255
- segments = list(executor.map(process_audio_segment, audio_chunks))
256
-
257
- print(f"Processed {len(segments)} segments, combining...")
258
-
259
- # Add 200ms pause between segments
260
- pause = AudioSegment.silent(duration=200)
261
-
262
- combined = AudioSegment.empty()
263
- for i, segment in enumerate(segments):
264
- combined += segment
265
- if i < len(segments) - 1: # Don't add pause after last segment
266
- combined += pause
267
-
268
- # Apply dynamic range compression
269
- combined = compress_dynamic_range(
270
- combined,
271
- threshold=-20.0,
272
- ratio=4.0,
273
- attack=5.0,
274
- release=50.0
275
- )
276
-
277
- # Export as MP3
278
- output_filename = f"{uuid.uuid4()}.mp3"
279
- output_path = os.path.join(AUDIO_OUTPUT_DIR, output_filename)
280
-
281
- combined.export(
282
- output_path,
283
- format='mp3',
284
- bitrate='192k',
285
- parameters=["-q:a", "0"]
286
- )
287
-
288
- print(f"Audio saved to {output_path}")
289
-
290
- # Get duration
291
- audio_info = MP3(output_path)
292
- duration = audio_info.info.length
293
- print(f"Audio duration: {duration:.2f} seconds")
294
-
295
- return output_path
296
-
297
-
298
- def cleanup_old_files():
299
- """Remove audio files older than retention period."""
300
- cutoff_time = datetime.now() - timedelta(hours=AUDIO_FILE_RETENTION_HOURS)
301
-
302
- if not os.path.exists(AUDIO_OUTPUT_DIR):
303
- return
304
-
305
- removed_count = 0
306
- for filename in os.listdir(AUDIO_OUTPUT_DIR):
307
- filepath = os.path.join(AUDIO_OUTPUT_DIR, filename)
308
-
309
- if os.path.isfile(filepath):
310
- file_time = datetime.fromtimestamp(os.path.getmtime(filepath))
311
-
312
- if file_time < cutoff_time:
313
- try:
314
- os.unlink(filepath)
315
- removed_count += 1
316
- except Exception as e:
317
- print(f"Error removing {filepath}: {e}")
318
-
319
- if removed_count > 0:
320
- print(f"Cleaned up {removed_count} old audio files")
321
-
322
-
323
- @app.route('/', methods=['GET'])
324
- def index():
325
- """Root endpoint with service information."""
326
- return jsonify({
327
- 'status': 'online',
328
- 'service': 'Multilingual TTS API',
329
- 'version': '1.0.0',
330
- 'supported_languages': list(VOICE_MAPPINGS.keys()),
331
- 'endpoints': {
332
- 'generate': '/generate-tts (POST)',
333
- 'voices': '/voices (GET)',
334
- 'health': '/health (GET)'
335
- }
336
- })
337
-
338
-
339
- @app.route('/voices', methods=['GET'])
340
- def get_voices():
341
- """Return all available voice mappings."""
342
- return jsonify({
343
- 'voices': VOICE_MAPPINGS,
344
- 'count': len(VOICE_MAPPINGS)
345
- })
346
-
347
-
348
- @app.route('/health', methods=['GET'])
349
- def health_check():
350
- """Health check endpoint."""
351
- audio_files = len([f for f in os.listdir(AUDIO_OUTPUT_DIR) if f.endswith('.mp3')])
352
-
353
- return jsonify({
354
- 'status': 'healthy',
355
- 'timestamp': datetime.now().isoformat(),
356
- 'audio_directory': AUDIO_OUTPUT_DIR,
357
- 'cached_audio_files': audio_files
358
- })
359
-
360
-
361
- @app.route('/generate-tts', methods=['POST'])
362
- def generate_tts():
363
- """
364
- Generate TTS audio from text.
365
-
366
- Request JSON:
367
- {
368
- "text": "Text to convert to speech",
369
- "language": "en" (optional),
370
- "voice": "en-US-JennyNeural" (optional)
371
- }
372
- """
373
- try:
374
- data = request.get_json()
375
-
376
- if not data or 'text' not in data:
377
- return jsonify({'error': 'Missing required field: text'}), 400
378
-
379
- text = data['text'].strip()
380
- if not text:
381
- return jsonify({'error': 'Text cannot be empty'}), 400
382
-
383
- language = data.get('language')
384
- voice = data.get('voice')
385
-
386
- print(f"Received TTS request - Length: {len(text)} chars")
387
-
388
- # Clean text
389
- cleaned_text = clean_text(text)
390
- print(f"Cleaned text - Length: {len(cleaned_text)} chars")
391
-
392
- # Determine voice
393
- selected_voice = get_voice_for_text(cleaned_text, language, voice)
394
- print(f"Selected voice: {selected_voice}")
395
-
396
- # Chunk text
397
- chunks = smart_chunk_text(cleaned_text)
398
- print(f"Split into {len(chunks)} chunks")
399
-
400
- # Generate audio chunks concurrently
401
- loop = asyncio.new_event_loop()
402
- asyncio.set_event_loop(loop)
403
-
404
- try:
405
- audio_chunks = loop.run_until_complete(
406
- generate_all_chunks(list(chunks), selected_voice)
407
- )
408
- finally:
409
- loop.close()
410
-
411
- print(f"Generated {len(audio_chunks)} audio chunks")
412
-
413
- # Combine audio segments
414
- output_path = combine_audio_segments(audio_chunks)
415
-
416
- # Send file
417
- return send_file(
418
- output_path,
419
- mimetype='audio/mpeg',
420
- as_attachment=True,
421
- download_name=f'tts_{uuid.uuid4().hex[:8]}.mp3'
422
- )
423
-
424
- except Exception as e:
425
- print(f"Error generating TTS: {e}")
426
- import traceback
427
- traceback.print_exc()
428
- return jsonify({'error': f'Audio generation failed: {str(e)}'}), 500
429
-
430
 
431
  if __name__ == '__main__':
432
- print("Starting Multilingual TTS API...")
433
- print(f"Audio output directory: {AUDIO_OUTPUT_DIR}")
434
- print(f"Supported languages: {len(VOICE_MAPPINGS)}")
435
-
436
- # Cleanup old files on startup
437
- cleanup_old_files()
438
-
439
- # Run Flask app
440
- port = int(os.environ.get('PORT', 7860))
441
- app.run(host='0.0.0.0', port=port, debug=False)
442
-
 
1
+ from flask import Flask, request, send_file
 
 
 
 
 
 
 
 
 
 
 
 
2
  from flask_cors import CORS
3
  import edge_tts
4
+ import asyncio
5
+ import uuid
 
6
 
 
7
  app = Flask(__name__)
8
  CORS(app)
9
 
10
+ @app.route('/tts', methods=['POST'])
11
+ def tts():
12
+ text = request.json.get('text', '').strip()
13
+ if not text:
14
+ return {'error': 'No text'}, 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ filename = f"{uuid.uuid4()}.mp3"
17
+ asyncio.run(edge_tts.Communicate(text, 'en-US-JennyNeural').save(filename))
18
+ return send_file(filename, mimetype='audio/mpeg')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  if __name__ == '__main__':
21
+ app.run(host='0.0.0.0', port=7860)