thecodeworm commited on
Commit
dbe2f4e
·
verified ·
1 Parent(s): 30857bb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +631 -0
app.py ADDED
@@ -0,0 +1,631 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI Backend for Hugging Face Spaces
3
+ Provides REST API endpoints for audio processing + Text-to-Speech
4
+ """
5
+
6
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request
7
+ from fastapi.responses import JSONResponse, FileResponse
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from pydantic import BaseModel
10
+ import soundfile as sf
11
+ import tempfile
12
+ import os
13
+ from pathlib import Path
14
+ import logging
15
+ from typing import Optional
16
+ import time
17
+ from collections import defaultdict
18
+ from datetime import datetime, timedelta
19
+ import asyncio
20
+ from huggingface_hub import hf_hub_download
21
+
22
+ # Direct import (no 'backend.' prefix for HF Spaces)
23
+ from inference_pipeline import EnhancementPipeline
24
+
25
+ # Setup logging
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
29
+ )
30
+ logger = logging.getLogger(__name__)
31
+
32
+ BASE_DIR = Path(__file__).parent.resolve()
33
+
34
+ # Security: Allowed file types
35
+ ALLOWED_EXTENSIONS = {'.wav', '.mp3', '.m4a', '.ogg', '.flac', '.webm'}
36
+ ALLOWED_MIMETYPES = {
37
+ 'audio/wav', 'audio/wave', 'audio/x-wav',
38
+ 'audio/mpeg', 'audio/mp3',
39
+ 'audio/mp4', 'audio/m4a', 'audio/x-m4a',
40
+ 'audio/ogg', 'audio/flac', 'audio/webm'
41
+ }
42
+
43
+ # Initialize FastAPI app
44
+ app = FastAPI(
45
+ title="ClearSpeech API",
46
+ description="Speech Enhancement, Transcription & Text-to-Speech",
47
+ version="2.1.0",
48
+ docs_url="/docs",
49
+ redoc_url="/redoc"
50
+ )
51
+
52
+ # CORS middleware
53
+ app.add_middleware(
54
+ CORSMiddleware,
55
+ allow_origins=["*"],
56
+ allow_credentials=True,
57
+ allow_methods=["*"],
58
+ allow_headers=["*"],
59
+ )
60
+
61
+ # Global pipeline instance
62
+ pipeline = None
63
+ temp_files = {}
64
+
65
+
66
+ # ============================================================================
67
+ # SECURITY: Rate Limiting & File Validation
68
+ # ============================================================================
69
+
70
+ class SimpleRateLimiter:
71
+ """Simple in-memory rate limiter for demo protection"""
72
+ def __init__(self, max_requests: int = 20, window_minutes: int = 60):
73
+ self.max_requests = max_requests
74
+ self.window = timedelta(minutes=window_minutes)
75
+ self.requests = defaultdict(list)
76
+ self.lock = asyncio.Lock()
77
+
78
+ async def check_rate_limit(self, client_ip: str) -> bool:
79
+ async with self.lock:
80
+ now = datetime.now()
81
+ self.requests[client_ip] = [
82
+ ts for ts in self.requests[client_ip]
83
+ if now - ts < self.window
84
+ ]
85
+
86
+ if len(self.requests[client_ip]) >= self.max_requests:
87
+ return False
88
+
89
+ self.requests[client_ip].append(now)
90
+ return True
91
+
92
+ async def cleanup(self):
93
+ while True:
94
+ await asyncio.sleep(3600)
95
+ async with self.lock:
96
+ now = datetime.now()
97
+ for ip in list(self.requests.keys()):
98
+ self.requests[ip] = [ts for ts in self.requests[ip] if now - ts < self.window]
99
+ if not self.requests[ip]:
100
+ del self.requests[ip]
101
+
102
+
103
+ rate_limiter = SimpleRateLimiter(max_requests=20, window_minutes=60)
104
+
105
+
106
+ def get_client_ip(request: Request) -> str:
107
+ """Get client IP from request"""
108
+ forwarded = request.headers.get("X-Forwarded-For")
109
+ if forwarded:
110
+ return forwarded.split(",")[0].strip()
111
+ real_ip = request.headers.get("X-Real-IP")
112
+ if real_ip:
113
+ return real_ip
114
+ return request.client.host if request.client else "unknown"
115
+
116
+
117
+ def validate_audio_file(file: UploadFile) -> None:
118
+ """Validate uploaded file is a safe audio file"""
119
+ file_ext = Path(file.filename).suffix.lower()
120
+ if file_ext not in ALLOWED_EXTENSIONS:
121
+ raise HTTPException(
122
+ status_code=400,
123
+ detail=f"Invalid file type '{file_ext}'. Allowed: {', '.join(ALLOWED_EXTENSIONS)}"
124
+ )
125
+
126
+ if file.content_type and file.content_type not in ALLOWED_MIMETYPES:
127
+ raise HTTPException(
128
+ status_code=400,
129
+ detail=f"Invalid content type: {file.content_type}"
130
+ )
131
+
132
+ if '..' in file.filename or '/' in file.filename or '\\' in file.filename:
133
+ raise HTTPException(status_code=400, detail="Invalid filename")
134
+
135
+
136
+ # Configuration
137
+ class Config:
138
+ # Hugging Face Hub Configuration
139
+ HF_REPO_ID = os.getenv("HF_REPO_ID", "thecodeworm/clearspeech-unet")
140
+ HF_CHECKPOINT_FILENAME = "best_model_fixed.pt"
141
+
142
+ # Local paths
143
+ CHECKPOINT_DIR = Path(tempfile.gettempdir()) / "clearspeech_models"
144
+ CNN_CHECKPOINT = CHECKPOINT_DIR / HF_CHECKPOINT_FILENAME
145
+
146
+ # Model configuration
147
+ WHISPER_MODEL = os.getenv("WHISPER_MODEL", "base") # Can use 'base' with 16GB RAM!
148
+ DEVICE = os.getenv("DEVICE", "cpu")
149
+ USE_FP16 = False
150
+
151
+ # Limits
152
+ MAX_FILE_SIZE = int(os.getenv("MAX_FILE_SIZE", 50 * 1024 * 1024))
153
+ TEMP_DIR = Path(tempfile.gettempdir()) / "clearspeech"
154
+
155
+ @classmethod
156
+ def setup(cls):
157
+ """Setup: Download checkpoint from Hugging Face Hub"""
158
+ cls.TEMP_DIR.mkdir(parents=True, exist_ok=True)
159
+ cls.CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
160
+
161
+ # Download from HF Hub if not exists
162
+ if not cls.CNN_CHECKPOINT.exists():
163
+ logger.info("="*70)
164
+ logger.info("📥 Downloading model checkpoint from Hugging Face Hub")
165
+ logger.info("="*70)
166
+ logger.info(f"Repository: {cls.HF_REPO_ID}")
167
+ logger.info(f"Filename: {cls.HF_CHECKPOINT_FILENAME}")
168
+
169
+ try:
170
+ downloaded_path = hf_hub_download(
171
+ repo_id=cls.HF_REPO_ID,
172
+ filename=cls.HF_CHECKPOINT_FILENAME,
173
+ cache_dir=str(cls.CHECKPOINT_DIR.parent),
174
+ local_dir=str(cls.CHECKPOINT_DIR),
175
+ local_dir_use_symlinks=False
176
+ )
177
+
178
+ cls.CNN_CHECKPOINT = Path(downloaded_path)
179
+ logger.info(f"✅ Checkpoint downloaded successfully!")
180
+ logger.info(f" Saved to: {cls.CNN_CHECKPOINT}")
181
+ logger.info("="*70)
182
+
183
+ except Exception as e:
184
+ logger.error("="*70)
185
+ logger.error("❌ Failed to download checkpoint")
186
+ logger.error("="*70)
187
+ logger.error(f"Error: {e}")
188
+ logger.error(f"Please verify HF_REPO_ID: {cls.HF_REPO_ID}")
189
+ raise
190
+ else:
191
+ logger.info(f"✅ Using cached checkpoint: {cls.CNN_CHECKPOINT}")
192
+
193
+
194
+ # Response models
195
+ class ProcessResponse(BaseModel):
196
+ success: bool
197
+ transcript: str
198
+ duration: float
199
+ language: str
200
+ enhanced_audio_url: str
201
+ tts_audio_url: Optional[str] = None
202
+ segments: list = []
203
+ processing_time: float
204
+
205
+
206
+ class EnhanceResponse(BaseModel):
207
+ success: bool
208
+ enhanced_audio_url: str
209
+ duration: float
210
+ processing_time: float
211
+
212
+
213
+ class TranscribeResponse(BaseModel):
214
+ success: bool
215
+ transcript: str
216
+ duration: float
217
+ language: str
218
+ segments: list = []
219
+ processing_time: float
220
+
221
+
222
+ class TTSRequest(BaseModel):
223
+ text: str
224
+ language: str = "en"
225
+ voice: str = "default"
226
+
227
+
228
+ class HealthResponse(BaseModel):
229
+ status: str
230
+ models_loaded: bool
231
+ cnn_checkpoint: str
232
+ whisper_model: str
233
+ device: str
234
+ tts_available: bool
235
+
236
+
237
+ @app.on_event("startup")
238
+ async def startup_event():
239
+ """Load models on server startup"""
240
+ global pipeline
241
+ logger.info("🚀 Starting ClearSpeech API Server on Hugging Face Spaces...")
242
+
243
+ try:
244
+ Config.setup()
245
+
246
+ if not Config.CNN_CHECKPOINT.exists():
247
+ raise FileNotFoundError(f"Checkpoint not found: {Config.CNN_CHECKPOINT}")
248
+
249
+ pipeline = EnhancementPipeline(
250
+ cnn_checkpoint_path=str(Config.CNN_CHECKPOINT),
251
+ whisper_model_name=Config.WHISPER_MODEL,
252
+ device=Config.DEVICE,
253
+ use_fp16=Config.USE_FP16
254
+ )
255
+
256
+ logger.info("✅ Models loaded successfully!")
257
+ logger.info(f"📍 CNN Checkpoint: {Config.CNN_CHECKPOINT}")
258
+ logger.info(f"📍 Whisper Model: {Config.WHISPER_MODEL}")
259
+ logger.info(f"📍 Device: {Config.DEVICE}")
260
+
261
+ # Check TTS
262
+ try:
263
+ import gtts
264
+ logger.info("✅ TTS (gtts) available")
265
+ except ImportError:
266
+ logger.warning("⚠️ TTS not available")
267
+
268
+ logger.info("="*70)
269
+ logger.info("Server ready! Visit /docs for API documentation")
270
+ logger.info("="*70)
271
+
272
+ # Start rate limiter cleanup
273
+ asyncio.create_task(rate_limiter.cleanup())
274
+
275
+ except Exception as e:
276
+ logger.error(f"❌ Failed to load models: {e}")
277
+ raise
278
+
279
+
280
+ @app.on_event("shutdown")
281
+ async def shutdown_event():
282
+ """Cleanup on server shutdown"""
283
+ logger.info("Shutting down server...")
284
+
285
+ for filepath in temp_files.values():
286
+ try:
287
+ if Path(filepath).exists():
288
+ os.remove(filepath)
289
+ except Exception as e:
290
+ logger.warning(f"Failed to cleanup {filepath}: {e}")
291
+
292
+ temp_files.clear()
293
+
294
+
295
+ # ============================================================================
296
+ # TTS FUNCTIONS
297
+ # ============================================================================
298
+
299
+ def generate_tts_gtts(text: str, output_path: str, language: str = "en"):
300
+ """Generate TTS using gTTS"""
301
+ try:
302
+ from gtts import gTTS
303
+ tts = gTTS(text=text, lang=language, slow=False)
304
+ tts.save(output_path)
305
+ return True
306
+ except Exception as e:
307
+ logger.error(f"gTTS failed: {e}")
308
+ return False
309
+
310
+
311
+ def generate_tts(text: str, output_path: str, language: str = "en"):
312
+ """Generate TTS"""
313
+ return generate_tts_gtts(text, output_path, language)
314
+
315
+
316
+ # ============================================================================
317
+ # API ENDPOINTS
318
+ # ============================================================================
319
+
320
+ @app.get("/")
321
+ async def root():
322
+ """Health check endpoint"""
323
+ return {
324
+ "status": "online",
325
+ "message": "ClearSpeech API - Speech Enhancement, Transcription & TTS",
326
+ "version": "2.1.0",
327
+ "platform": "Hugging Face Spaces",
328
+ "endpoints": {
329
+ "docs": "/docs",
330
+ "health": "/health",
331
+ "process": "/process (POST)",
332
+ "enhance": "/enhance (POST)",
333
+ "transcribe": "/transcribe (POST)",
334
+ "tts": "/tts (POST)",
335
+ "download": "/download/{filename} (GET)"
336
+ }
337
+ }
338
+
339
+
340
+ @app.get("/health", response_model=HealthResponse)
341
+ async def health_check():
342
+ """Detailed health check"""
343
+ tts_available = False
344
+ try:
345
+ import gtts
346
+ tts_available = True
347
+ except ImportError:
348
+ pass
349
+
350
+ return {
351
+ "status": "healthy" if pipeline is not None else "unhealthy",
352
+ "models_loaded": pipeline is not None,
353
+ "cnn_checkpoint": str(Config.CNN_CHECKPOINT),
354
+ "whisper_model": Config.WHISPER_MODEL,
355
+ "device": Config.DEVICE,
356
+ "tts_available": tts_available
357
+ }
358
+
359
+
360
+ @app.post("/process", response_model=ProcessResponse)
361
+ async def process_audio(
362
+ request: Request,
363
+ file: UploadFile = File(...),
364
+ language: Optional[str] = Form(default="en"),
365
+ skip_enhancement: Optional[str] = Form(default="false"),
366
+ generate_tts_param: Optional[str] = Form(default="false", alias="generate_tts")
367
+ ):
368
+ """Complete pipeline: enhance + transcribe + optional TTS"""
369
+ # Rate limiting
370
+ client_ip = get_client_ip(request)
371
+ if not await rate_limiter.check_rate_limit(client_ip):
372
+ raise HTTPException(
373
+ status_code=429,
374
+ detail="Rate limit exceeded. Max 20 requests per hour."
375
+ )
376
+
377
+ if pipeline is None:
378
+ raise HTTPException(status_code=503, detail="Models not loaded")
379
+
380
+ # File validation
381
+ validate_audio_file(file)
382
+
383
+ # Convert string parameters to boolean
384
+ skip_enhancement_bool = skip_enhancement.lower() in ['true', '1', 'yes']
385
+ generate_tts_bool = generate_tts_param.lower() in ['true', '1', 'yes']
386
+
387
+ start_time = time.time()
388
+
389
+ try:
390
+ contents = await file.read()
391
+
392
+ if len(contents) > Config.MAX_FILE_SIZE:
393
+ raise HTTPException(
394
+ status_code=413,
395
+ detail=f"File too large. Max: {Config.MAX_FILE_SIZE / 1024 / 1024}MB"
396
+ )
397
+
398
+ logger.info(f"📥 Processing: {file.filename} ({len(contents)/1024:.1f} KB)")
399
+
400
+ # Process audio
401
+ result = pipeline.process(
402
+ contents,
403
+ language=language,
404
+ skip_enhancement=skip_enhancement_bool
405
+ )
406
+
407
+ # Save enhanced audio
408
+ temp_filename = f"enhanced_{int(time.time())}_{file.filename}"
409
+ if not temp_filename.endswith('.wav'):
410
+ temp_filename = temp_filename.rsplit('.', 1)[0] + '.wav'
411
+
412
+ temp_path = Config.TEMP_DIR / temp_filename
413
+ sf.write(temp_path, result['enhanced_audio'], result['sample_rate'])
414
+ temp_files[temp_filename] = str(temp_path)
415
+
416
+ enhanced_audio_url = f"/download/{temp_filename}"
417
+
418
+ # Generate TTS if requested
419
+ tts_audio_url = None
420
+ if generate_tts_bool and result['transcript']:
421
+ tts_filename = f"tts_{int(time.time())}_{file.filename}"
422
+ if not tts_filename.endswith('.wav'):
423
+ tts_filename = tts_filename.rsplit('.', 1)[0] + '.wav'
424
+
425
+ tts_path = Config.TEMP_DIR / tts_filename
426
+
427
+ if generate_tts(result['transcript'], str(tts_path), language):
428
+ temp_files[tts_filename] = str(tts_path)
429
+ tts_audio_url = f"/download/{tts_filename}"
430
+ logger.info(f"✅ Generated TTS")
431
+ else:
432
+ logger.warning(f"⚠️ TTS generation failed")
433
+
434
+ processing_time = time.time() - start_time
435
+
436
+ response = {
437
+ "success": True,
438
+ "transcript": result['transcript'],
439
+ "duration": result['duration'],
440
+ "language": result['language'],
441
+ "enhanced_audio_url": enhanced_audio_url,
442
+ "tts_audio_url": tts_audio_url,
443
+ "segments": result.get('segments', []),
444
+ "processing_time": round(processing_time, 2)
445
+ }
446
+
447
+ logger.info(f"✅ Processed in {processing_time:.2f}s")
448
+ return JSONResponse(content=response)
449
+
450
+ except HTTPException:
451
+ raise
452
+ except Exception as e:
453
+ logger.error(f"❌ Error: {e}", exc_info=True)
454
+ raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
455
+
456
+
457
+ @app.post("/enhance", response_model=EnhanceResponse)
458
+ async def enhance_only(
459
+ request: Request,
460
+ file: UploadFile = File(...)
461
+ ):
462
+ """Enhancement only (no transcription)"""
463
+ # Rate limiting
464
+ client_ip = get_client_ip(request)
465
+ if not await rate_limiter.check_rate_limit(client_ip):
466
+ raise HTTPException(status_code=429, detail="Rate limit exceeded")
467
+
468
+ if pipeline is None:
469
+ raise HTTPException(status_code=503, detail="Models not loaded")
470
+
471
+ # File validation
472
+ validate_audio_file(file)
473
+
474
+ start_time = time.time()
475
+
476
+ try:
477
+ contents = await file.read()
478
+
479
+ # Load and enhance
480
+ audio = pipeline.audio_processor.load_audio(contents)
481
+ enhanced_audio = pipeline.enhance_audio(audio)
482
+
483
+ # Save
484
+ temp_filename = f"enhanced_{int(time.time())}_{file.filename}"
485
+ if not temp_filename.endswith('.wav'):
486
+ temp_filename = temp_filename.rsplit('.', 1)[0] + '.wav'
487
+
488
+ temp_path = Config.TEMP_DIR / temp_filename
489
+ sf.write(temp_path, enhanced_audio, pipeline.audio_processor.sample_rate)
490
+ temp_files[temp_filename] = str(temp_path)
491
+
492
+ duration = len(enhanced_audio) / pipeline.audio_processor.sample_rate
493
+ processing_time = time.time() - start_time
494
+
495
+ return {
496
+ "success": True,
497
+ "enhanced_audio_url": f"/download/{temp_filename}",
498
+ "duration": duration,
499
+ "processing_time": round(processing_time, 2)
500
+ }
501
+
502
+ except Exception as e:
503
+ logger.error(f"❌ Enhancement error: {e}", exc_info=True)
504
+ raise HTTPException(status_code=500, detail=str(e))
505
+
506
+
507
+ @app.post("/transcribe", response_model=TranscribeResponse)
508
+ async def transcribe_only(
509
+ request: Request,
510
+ file: UploadFile = File(...),
511
+ language: Optional[str] = Form(default="en"),
512
+ enhance_first: Optional[str] = Form(default="true")
513
+ ):
514
+ """Transcription with optional enhancement"""
515
+ # Rate limiting
516
+ client_ip = get_client_ip(request)
517
+ if not await rate_limiter.check_rate_limit(client_ip):
518
+ raise HTTPException(status_code=429, detail="Rate limit exceeded")
519
+
520
+ if pipeline is None:
521
+ raise HTTPException(status_code=503, detail="Models not loaded")
522
+
523
+ # File validation
524
+ validate_audio_file(file)
525
+
526
+ enhance_bool = enhance_first.lower() in ['true', '1', 'yes']
527
+ start_time = time.time()
528
+
529
+ try:
530
+ contents = await file.read()
531
+
532
+ # Load audio
533
+ audio = pipeline.audio_processor.load_audio(contents)
534
+
535
+ # Optionally enhance
536
+ if enhance_bool:
537
+ audio = pipeline.enhance_audio(audio)
538
+
539
+ # Transcribe
540
+ result = pipeline.transcribe_audio(audio, language)
541
+
542
+ duration = len(audio) / pipeline.audio_processor.sample_rate
543
+ processing_time = time.time() - start_time
544
+
545
+ return {
546
+ "success": True,
547
+ "transcript": result['text'].strip(),
548
+ "duration": duration,
549
+ "language": result.get('language', language),
550
+ "segments": result.get('segments', []),
551
+ "processing_time": round(processing_time, 2)
552
+ }
553
+
554
+ except Exception as e:
555
+ logger.error(f"❌ Transcription error: {e}", exc_info=True)
556
+ raise HTTPException(status_code=500, detail=str(e))
557
+
558
+
559
+ @app.post("/tts")
560
+ async def text_to_speech(request: TTSRequest):
561
+ """Convert text to speech"""
562
+ if not request.text:
563
+ raise HTTPException(status_code=400, detail="No text provided")
564
+
565
+ try:
566
+ temp_filename = f"tts_{int(time.time())}.wav"
567
+ temp_path = Config.TEMP_DIR / temp_filename
568
+
569
+ if not generate_tts(request.text, str(temp_path), request.language):
570
+ raise HTTPException(
571
+ status_code=500,
572
+ detail="TTS failed. Install gtts."
573
+ )
574
+
575
+ return FileResponse(
576
+ temp_path,
577
+ media_type="audio/wav",
578
+ filename=temp_filename
579
+ )
580
+
581
+ except HTTPException:
582
+ raise
583
+ except Exception as e:
584
+ logger.error(f"❌ TTS error: {e}", exc_info=True)
585
+ raise HTTPException(status_code=500, detail=str(e))
586
+
587
+
588
+ @app.get("/download/{filename}")
589
+ async def download_file(filename: str):
590
+ """Download processed audio file"""
591
+ if filename not in temp_files:
592
+ raise HTTPException(status_code=404, detail="File not found or expired")
593
+
594
+ file_path = Path(temp_files[filename])
595
+
596
+ if not file_path.exists():
597
+ raise HTTPException(status_code=404, detail="File not found")
598
+
599
+ return FileResponse(
600
+ file_path,
601
+ media_type="audio/wav",
602
+ filename=filename
603
+ )
604
+
605
+
606
+ @app.delete("/cleanup/{filename}")
607
+ async def cleanup_file(filename: str):
608
+ """Manually cleanup a temporary file"""
609
+ if filename not in temp_files:
610
+ raise HTTPException(status_code=404, detail="File not found")
611
+
612
+ try:
613
+ file_path = Path(temp_files[filename])
614
+ if file_path.exists():
615
+ os.remove(file_path)
616
+ del temp_files[filename]
617
+ return {"success": True, "message": "File deleted"}
618
+ except Exception as e:
619
+ raise HTTPException(status_code=500, detail=str(e))
620
+
621
+
622
+ if __name__ == "__main__":
623
+ import uvicorn
624
+
625
+ # HF Spaces uses port 7860
626
+ uvicorn.run(
627
+ app,
628
+ host="0.0.0.0",
629
+ port=7860,
630
+ log_level="info"
631
+ )