Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 16

Commit

d512c0d

verified ·

1 Parent(s): b3fe36f

Update app.py

Browse files

Files changed (1) hide show

app.py +256 -103

app.py CHANGED Viewed

@@ -1,161 +1,314 @@
 import os
-import io
-import base64
-import json
-import asyncio
 import logging
-from concurrent.futures import ThreadPoolExecutor
 from contextlib import asynccontextmanager
-from fastapi import FastAPI, HTTPException, UploadFile, File, Form
-from fastapi.responses import Response, JSONResponse
-import soundfile as sf
-from neutts_wrapper import NeuTTSWrapper
-# --- Configuration & Global Objects ---
 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Read device from environment variable, defaulting to 'cpu'
-DEVICE = os.getenv("MODEL_DEVICE", "cpu")
-# Use a ThreadPoolExecutor to run blocking ML code in a separate thread
-tts_executor = ThreadPoolExecutor(max_workers=1)
-# --- Lifespan Management (Model Loading) ---
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    """
-    Manages the model's lifecycle. It's loaded at startup and resources are
-    cleaned up at shutdown.
-    """
-    logger.info("Application startup...")
     try:
-        # Load the model wrapper into the application state
-        app.state.tts_wrapper = NeuTTSWrapper(device=DEVICE)
     except Exception as e:
-        logger.error(f"FATAL: Model could not be loaded. {e}")
-        app.state.tts_wrapper = None
-    yield # The application is now running
-    logger.info("Application shutdown...")
-    tts_executor.shutdown(wait=True)
-# --- FastAPI App Initialization ---
 app = FastAPI(
     title="NeuTTS Air Production API",
     description="Production-ready Text-to-Speech with Voice Cloning",
     version="2.0.0",
     lifespan=lifespan
 )
-# --- Helper function for running blocking code ---
-async def run_in_executor(func, *args):
-    """Runs a blocking function in the thread pool to avoid blocking the server."""
     loop = asyncio.get_event_loop()
-    return await loop.run_in_executor(tts_executor, func, *args)
-# --- API Endpoints ---
 @app.get("/")
 async def root():
-    return {"status": "online", "service": "NeuTTS Air API v2"}
 @app.get("/health")
 async def health_check():
-    model_status = "loaded" if app.state.tts_wrapper else "degraded"
-    return {"status": "healthy", "model_status": model_status, "device": DEVICE}
 @app.post("/api/v1/synthesize")
 async def synthesize_speech(
-    ref_text: str = Form(...),
-    gen_text: str = Form(...),
-    ref_audio: UploadFile = File(...)
 ):
-    if not app.state.tts_wrapper:
-        raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
     try:
-        ref_audio_bytes = await ref_audio.read()
-        # Run blocking ML code in the thread pool
-        ref_codes = await run_in_executor(app.state.tts_wrapper.encode_reference, ref_audio_bytes)
-        wav_data = await run_in_executor(app.state.tts_wrapper.infer, gen_text, ref_codes, ref_text)
-        # Process audio in-memory
-        buffer = io.BytesIO()
-        sf.write(buffer, wav_data, 24000, format='WAV')
-        buffer.seek(0)
-        return Response(content=buffer.read(), media_type="audio/wav")
     except Exception as e:
-        logger.error(f"Synthesis failed: {e}")
-        raise HTTPException(status_code=500, detail=f"Synthesis failed: {str(e)}")
 @app.post("/api/v1/synthesize/b64")
 async def synthesize_speech_base64(
     ref_text: str = Form(...),
-    gen_text: str = Form(...),
-    ref_audio: UploadFile = File(...)
 ):
-    if not app.state.tts_wrapper:
-        raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
     try:
-        ref_audio_bytes = await ref_audio.read()
-        # Run blocking ML code in the thread pool
-        ref_codes = await run_in_executor(app.state.tts_wrapper.encode_reference, ref_audio_bytes)
-        wav_data = await run_in_executor(app.state.tts_wrapper.infer, gen_text, ref_codes, ref_text)
-        # Process audio in-memory
         buffer = io.BytesIO()
-        sf.write(buffer, wav_data, 24000, format='WAV')
         buffer.seek(0)
         audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
-        return JSONResponse({"audio_data": audio_b64, "format": "wav"})
     except Exception as e:
-        logger.error(f"Base64 synthesis failed: {e}")
-        raise HTTPException(status_code=500, detail=f"Base64 synthesis failed: {str(e)}")
-@app.post("/api/v1/batch-synthesize")
-async def batch_synthesize(
     ref_text: str = Form(...),
     ref_audio: UploadFile = File(...),
-    texts: str = Form(...)
 ):
-    if not app.state.tts_wrapper:
-        raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
     try:
-        text_list = json.loads(texts)
-        if not isinstance(text_list, list):
-            raise ValueError("Texts must be a JSON array of strings.")
-        ref_audio_bytes = await ref_audio.read()
-        # Encode reference once, in the thread pool
-        ref_codes = await run_in_executor(app.state.tts_wrapper.encode_reference, ref_audio_bytes)
-        results = []
-        for text in text_list:
-            # Infer for each text
-            wav_data = await run_in_executor(app.state.tts_wrapper.infer, text, ref_codes, ref_text)
-            buffer = io.BytesIO()
-            sf.write(buffer, wav_data, 24000, format='WAV')
-            buffer.seek(0)
-            audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
-            results.append({"text": text, "audio_data": audio_b64})
-        return JSONResponse({"generated_clips": results})
-    except json.JSONDecodeError:
-        raise HTTPException(status_code=400, detail="Invalid JSON in 'texts' field.")
     except Exception as e:
-        logger.error(f"Batch synthesis failed: {e}")
-        raise HTTPException(status_code=500, detail=f"Batch synthesis failed: {str(e)}")

+# [file name]: app.py
 import os
+import sys
 import logging
+from typing import Optional
 from contextlib import asynccontextmanager
+from concurrent.futures import ThreadPoolExecutor
+# CRITICAL: Set environment variables BEFORE any imports
+os.environ['NUMBA_CACHE_DIR'] = '/tmp/numba_cache'
+os.environ['HF_HOME'] = '/app/cache'
+os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/cache'
+os.environ['HF_HUB_DISABLE_LOCKING'] = '1'
+# Add neutts-air to Python path
+neutts_path = os.path.join(os.getcwd(), "neutts-air")
+sys.path.insert(0, neutts_path)
+# Create cache directories
+os.makedirs('/app/cache', exist_ok=True)
+os.makedirs('/tmp/numba_cache', exist_ok=True)
 logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("neutts-production-api")
+try:
+    import numpy as np
+    from fastapi import FastAPI, HTTPException, UploadFile, File, Form
+    from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
+    from fastapi.middleware.cors import CORSMiddleware
+    import soundfile as sf
+    import io
+    import asyncio
+    import uuid
+    from neutts_wrapper import NeuTTSWrapper, TTSRequest
+    logger.info("✅ All imports successful")
+except ImportError as e:
+    logger.error(f"❌ Import failed: {e}")
+    raise
+# Device detection and resource management
+def get_best_device():
+    return "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_best_device()
+MAX_WORKERS = 1 if DEVICE == "cpu" else 2
+tts_executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    """Modern lifespan management with proper cleanup"""
     try:
+        app.state.neutts_wrapper = NeuTTSWrapper(device=DEVICE)
+        logger.info(f"✅ Model loaded on {DEVICE}")
     except Exception as e:
+        logger.error(f"❌ Model loading failed: {e}")
+        raise
+    yield
+    # Cleanup
+    tts_executor.shutdown(wait=False)
+    if hasattr(app.state, 'neutts_wrapper'):
+        app.state.neutts_wrapper._cleanup_temp_files()
 app = FastAPI(
     title="NeuTTS Air Production API",
     description="Production-ready Text-to-Speech with Voice Cloning",
     version="2.0.0",
+    docs_url="/docs",
     lifespan=lifespan
 )
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+async def run_tts_async(tts_request: TTSRequest) -> np.ndarray:
+    """Offload blocking TTS call to thread pool"""
     loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(
+        tts_executor,
+        app.state.neutts_wrapper.generate_speech,
+        tts_request
+    )
 @app.get("/")
 async def root():
+    return {
+        "status": "online",
+        "service": "NeuTTS Air Production API",
+        "version": "2.0.0",
+        "device": DEVICE,
+        "model_loaded": hasattr(app.state, 'neutts_wrapper')
+    }
 @app.get("/health")
 async def health_check():
+    """Comprehensive health check with memory monitoring"""
+    if not hasattr(app.state, 'neutts_wrapper'):
+        raise HTTPException(status_code=503, detail="Service unavailable")
+    try:
+        memory_info = app.state.neutts_wrapper.get_memory_usage()
+        return {
+            "status": "healthy",
+            "model_loaded": True,
+            "device": DEVICE,
+            "memory_usage": memory_info,
+            "endpoints": {
+                "synthesize": "/api/v1/synthesize",
+                "synthesize_b64": "/api/v1/synthesize/b64",
+                "synthesize_stream": "/api/v1/synthesize/stream",
+                "system_info": "/api/v1/system"
+            }
+        }
+    except Exception as e:
+        logger.error(f"Health check failed: {e}")
+        raise HTTPException(status_code=503, detail="Service degraded")
+@app.get("/api/v1/system")
+async def system_info():
+    """System information and resource monitoring"""
+    if not hasattr(app.state, 'neutts_wrapper'):
+        raise HTTPException(status_code=503, detail="Service unavailable")
+    memory_info = app.state.neutts_wrapper.get_memory_usage()
+    return {
+        "device": DEVICE,
+        "max_workers": MAX_WORKERS,
+        "memory_usage": memory_info,
+        "cache_info": {
+            "hf_cache": os.environ.get('HF_HOME'),
+            "numba_cache": os.environ.get('NUMBA_CACHE_DIR')
+        }
+    }
 @app.post("/api/v1/synthesize")
 async def synthesize_speech(
+    ref_text: str = Form(..., description="Reference audio transcript", max_length=1000),
+    gen_text: str = Form(..., description="Text to synthesize", max_length=5000),
+    ref_audio: UploadFile = File(..., description="Reference audio file (WAV, max 10MB)"),
+    use_gpu: bool = Form(True, description="Use GPU if available")
 ):
+    """Production-grade speech synthesis with voice cloning"""
+    if not hasattr(app.state, 'neutts_wrapper'):
+        raise HTTPException(status_code=503, detail="Service unavailable")
+    temp_file_path = None
     try:
+        # Validate file type
+        if not ref_audio.filename or not ref_audio.filename.lower().endswith('.wav'):
+            raise HTTPException(400, "Only WAV files are supported as reference audio")
+        # Read and validate file content
+        file_content = await ref_audio.read()
+        # Save uploaded file to temp location
+        temp_file_path = app.state.neutts_wrapper.save_uploaded_file(file_content)
+        # Create TTS request
+        tts_request = TTSRequest(
+            ref_text=ref_text.strip(),
+            gen_text=gen_text.strip(),
+            ref_audio_path=temp_file_path,
+            use_gpu=use_gpu and torch.cuda.is_available()
+        )
+        # Generate speech
+        audio_data = await run_tts_async(tts_request)
+        # Create output file
+        output_filename = f"synthesized_{uuid.uuid4()}.wav"
+        output_path = os.path.join(app.state.neutts_wrapper.temp_dir, output_filename)
+        sf.write(output_path, audio_data, 24000)
+        # Return file response with cleanup
+        return FileResponse(
+            output_path,
+            media_type="audio/wav",
+            filename=output_filename,
+            background=BackgroundTask(app.state.neutts_wrapper.cleanup_file, output_path)
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except RuntimeError as e:
+        raise HTTPException(status_code=500, detail=str(e))
     except Exception as e:
+        logger.error(f"Synthesis error: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+    finally:
+        # Cleanup uploaded temp file
+        if temp_file_path:
+            app.state.neutts_wrapper.cleanup_file(temp_file_path)
 @app.post("/api/v1/synthesize/b64")
 async def synthesize_speech_base64(
     ref_text: str = Form(...),
+    gen_text: str = Form(...),
+    ref_audio: UploadFile = File(...),
+    use_gpu: bool = Form(True)
 ):
+    """Synthesize speech and return as base64 encoded audio"""
+    if not hasattr(app.state, 'neutts_wrapper'):
+        raise HTTPException(status_code=503, detail="Service unavailable")
+    temp_file_path = None
     try:
+        # Validate and save uploaded file
+        if not ref_audio.filename.lower().endswith('.wav'):
+            raise HTTPException(400, "Only WAV files are supported")
+        file_content = await ref_audio.read()
+        temp_file_path = app.state.neutts_wrapper.save_uploaded_file(file_content)
+        # Create TTS request
+        tts_request = TTSRequest(
+            ref_text=ref_text.strip(),
+            gen_text=gen_text.strip(),
+            ref_audio_path=temp_file_path,
+            use_gpu=use_gpu and torch.cuda.is_available()
+        )
+        # Generate speech
+        audio_data = await run_tts_async(tts_request)
+        # Convert to base64
         buffer = io.BytesIO()
+        sf.write(buffer, audio_data, 24000, format='WAV')
         buffer.seek(0)
+        import base64
         audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
+        return JSONResponse({
+            "audio_data": audio_b64,
+            "sample_rate": 24000,
+            "format": "wav",
+            "message": "Synthesis completed successfully"
+        })
     except Exception as e:
+        logger.error(f"Base64 synthesis error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Synthesis failed: {str(e)}")
+    finally:
+        if temp_file_path:
+            app.state.neutts_wrapper.cleanup_file(temp_file_path)
+@app.post("/api/v1/synthesize/stream")
+async def synthesize_speech_stream(
     ref_text: str = Form(...),
+    gen_text: str = Form(...),
     ref_audio: UploadFile = File(...),
+    use_gpu: bool = Form(True)
 ):
+    """Stream synthesized speech for immediate playback"""
+    if not hasattr(app.state, 'neutts_wrapper'):
+        raise HTTPException(status_code=503, detail="Service unavailable")
+    temp_file_path = None
     try:
+        # Validate and save uploaded file
+        file_content = await ref_audio.read()
+        temp_file_path = app.state.neutts_wrapper.save_uploaded_file(file_content)
+        # Create TTS request
+        tts_request = TTSRequest(
+            ref_text=ref_text.strip(),
+            gen_text=gen_text.strip(),
+            ref_audio_path=temp_file_path,
+            use_gpu=use_gpu and torch.cuda.is_available()
+        )
+        # Generate speech
+        audio_data = await run_tts_async(tts_request)
+        # Create streaming response
+        buffer = io.BytesIO()
+        sf.write(buffer, audio_data, 24000, format='MP3')
+        buffer.seek(0)
+        def generate():
+            yield buffer.read()
+        return StreamingResponse(
+            generate(),
+            media_type="audio/mpeg",
+            headers={
+                "Content-Disposition": "attachment; filename=streamed_speech.mp3",
+                "Cache-Control": "no-cache"
+            }
+        )
     except Exception as e:
+        logger.error(f"Streaming synthesis error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Synthesis failed: {str(e)}")
+    finally:
+        if temp_file_path:
+            app.state.neutts_wrapper.cleanup_file(temp_file_path)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)