Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 16

Commit

8a6294a

verified ·

1 Parent(s): 7123400

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -291

app.py CHANGED Viewed

@@ -1,314 +1,70 @@
-# [file name]: app.py
 import os
 import sys
 import logging
-from typing import Optional
-from contextlib import asynccontextmanager
-from concurrent.futures import ThreadPoolExecutor
-# CRITICAL: Set environment variables BEFORE any imports
-os.environ['NUMBA_CACHE_DIR'] = '/tmp/numba_cache'
-os.environ['HF_HOME'] = '/app/cache'
-os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/cache'
-os.environ['HF_HUB_DISABLE_LOCKING'] = '1'
-# Add neutts-air to Python path
-neutts_path = os.path.join(os.getcwd(), "neutts-air")
-sys.path.insert(0, neutts_path)
-# Create cache directories
-os.makedirs('/app/cache', exist_ok=True)
-os.makedirs('/tmp/numba_cache', exist_ok=True)
 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("neutts-production-api")
-try:
-    import numpy as np
-    from fastapi import FastAPI, HTTPException, UploadFile, File, Form
-    from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
-    from fastapi.middleware.cors import CORSMiddleware
-    import soundfile as sf
-    import io
-    import asyncio
-    import uuid
-    from neutts_wrapper import NeuTTSWrapper, TTSRequest
-    logger.info("✅ All imports successful")
-except ImportError as e:
-    logger.error(f"❌ Import failed: {e}")
-    raise
-# Device detection and resource management
-def get_best_device():
-    return "cuda" if torch.cuda.is_available() else "cpu"
-DEVICE = get_best_device()
-MAX_WORKERS = 1 if DEVICE == "cpu" else 2
-tts_executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Modern lifespan management with proper cleanup"""
-    try:
-        app.state.neutts_wrapper = NeuTTSWrapper(device=DEVICE)
-        logger.info(f"✅ Model loaded on {DEVICE}")
-    except Exception as e:
-        logger.error(f"❌ Model loading failed: {e}")
-        raise
-    yield
-    # Cleanup
-    tts_executor.shutdown(wait=False)
-    if hasattr(app.state, 'neutts_wrapper'):
-        app.state.neutts_wrapper._cleanup_temp_files()
-app = FastAPI(
-    title="NeuTTS Air Production API",
-    description="Production-ready Text-to-Speech with Voice Cloning",
-    version="2.0.0",
-    docs_url="/docs",
-    lifespan=lifespan
-)
-# CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
-async def run_tts_async(tts_request: TTSRequest) -> np.ndarray:
-    """Offload blocking TTS call to thread pool"""
-    loop = asyncio.get_event_loop()
-    return await loop.run_in_executor(
-        tts_executor,
-        app.state.neutts_wrapper.generate_speech,
-        tts_request
-    )
-@app.get("/")
-async def root():
-    return {
-        "status": "online",
-        "service": "NeuTTS Air Production API",
-        "version": "2.0.0",
-        "device": DEVICE,
-        "model_loaded": hasattr(app.state, 'neutts_wrapper')
-    }
-@app.get("/health")
-async def health_check():
-    """Comprehensive health check with memory monitoring"""
-    if not hasattr(app.state, 'neutts_wrapper'):
-        raise HTTPException(status_code=503, detail="Service unavailable")
     try:
-        memory_info = app.state.neutts_wrapper.get_memory_usage()
-        return {
-            "status": "healthy",
-            "model_loaded": True,
-            "device": DEVICE,
-            "memory_usage": memory_info,
-            "endpoints": {
-                "synthesize": "/api/v1/synthesize",
-                "synthesize_b64": "/api/v1/synthesize/b64",
-                "synthesize_stream": "/api/v1/synthesize/stream",
-                "system_info": "/api/v1/system"
-            }
-        }
-    except Exception as e:
-        logger.error(f"Health check failed: {e}")
-        raise HTTPException(status_code=503, detail="Service degraded")
-@app.get("/api/v1/system")
-async def system_info():
-    """System information and resource monitoring"""
-    if not hasattr(app.state, 'neutts_wrapper'):
-        raise HTTPException(status_code=503, detail="Service unavailable")
-    memory_info = app.state.neutts_wrapper.get_memory_usage()
-    return {
-        "device": DEVICE,
-        "max_workers": MAX_WORKERS,
-        "memory_usage": memory_info,
-        "cache_info": {
-            "hf_cache": os.environ.get('HF_HOME'),
-            "numba_cache": os.environ.get('NUMBA_CACHE_DIR')
-        }
-    }
-@app.post("/api/v1/synthesize")
 async def synthesize_speech(
-    ref_text: str = Form(..., description="Reference audio transcript", max_length=1000),
-    gen_text: str = Form(..., description="Text to synthesize", max_length=5000),
-    ref_audio: UploadFile = File(..., description="Reference audio file (WAV, max 10MB)"),
-    use_gpu: bool = Form(True, description="Use GPU if available")
-):
-    """Production-grade speech synthesis with voice cloning"""
-    if not hasattr(app.state, 'neutts_wrapper'):
-        raise HTTPException(status_code=503, detail="Service unavailable")
-    temp_file_path = None
-    try:
-        # Validate file type
-        if not ref_audio.filename or not ref_audio.filename.lower().endswith('.wav'):
-            raise HTTPException(400, "Only WAV files are supported as reference audio")
-        # Read and validate file content
-        file_content = await ref_audio.read()
-        # Save uploaded file to temp location
-        temp_file_path = app.state.neutts_wrapper.save_uploaded_file(file_content)
-        # Create TTS request
-        tts_request = TTSRequest(
-            ref_text=ref_text.strip(),
-            gen_text=gen_text.strip(),
-            ref_audio_path=temp_file_path,
-            use_gpu=use_gpu and torch.cuda.is_available()
-        )
-        # Generate speech
-        audio_data = await run_tts_async(tts_request)
-        # Create output file
-        output_filename = f"synthesized_{uuid.uuid4()}.wav"
-        output_path = os.path.join(app.state.neutts_wrapper.temp_dir, output_filename)
-        sf.write(output_path, audio_data, 24000)
-        # Return file response with cleanup
-        return FileResponse(
-            output_path,
-            media_type="audio/wav",
-            filename=output_filename,
-            background=BackgroundTask(app.state.neutts_wrapper.cleanup_file, output_path)
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except RuntimeError as e:
-        raise HTTPException(status_code=500, detail=str(e))
-    except Exception as e:
-        logger.error(f"Synthesis error: {str(e)}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-    finally:
-        # Cleanup uploaded temp file
-        if temp_file_path:
-            app.state.neutts_wrapper.cleanup_file(temp_file_path)
-@app.post("/api/v1/synthesize/b64")
-async def synthesize_speech_base64(
     ref_text: str = Form(...),
     gen_text: str = Form(...),
     ref_audio: UploadFile = File(...),
-    use_gpu: bool = Form(True)
 ):
-    """Synthesize speech and return as base64 encoded audio"""
-    if not hasattr(app.state, 'neutts_wrapper'):
-        raise HTTPException(status_code=503, detail="Service unavailable")
-    temp_file_path = None
     try:
-        # Validate and save uploaded file
-        if not ref_audio.filename.lower().endswith('.wav'):
-            raise HTTPException(400, "Only WAV files are supported")
-        file_content = await ref_audio.read()
-        temp_file_path = app.state.neutts_wrapper.save_uploaded_file(file_content)
-        # Create TTS request
-        tts_request = TTSRequest(
-            ref_text=ref_text.strip(),
-            gen_text=gen_text.strip(),
-            ref_audio_path=temp_file_path,
-            use_gpu=use_gpu and torch.cuda.is_available()
-        )
-        # Generate speech
-        audio_data = await run_tts_async(tts_request)
-        # Convert to base64
-        buffer = io.BytesIO()
-        sf.write(buffer, audio_data, 24000, format='WAV')
-        buffer.seek(0)
-        import base64
-        audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
-        return JSONResponse({
-            "audio_data": audio_b64,
-            "sample_rate": 24000,
-            "format": "wav",
-            "message": "Synthesis completed successfully"
-        })
     except Exception as e:
-        logger.error(f"Base64 synthesis error: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Synthesis failed: {str(e)}")
-    finally:
-        if temp_file_path:
-            app.state.neutts_wrapper.cleanup_file(temp_file_path)
-@app.post("/api/v1/synthesize/stream")
-async def synthesize_speech_stream(
-    ref_text: str = Form(...),
-    gen_text: str = Form(...),
-    ref_audio: UploadFile = File(...),
-    use_gpu: bool = Form(True)
-):
-    """Stream synthesized speech for immediate playback"""
-    if not hasattr(app.state, 'neutts_wrapper'):
-        raise HTTPException(status_code=503, detail="Service unavailable")
-    temp_file_path = None
-    try:
-        # Validate and save uploaded file
-        file_content = await ref_audio.read()
-        temp_file_path = app.state.neutts_wrapper.save_uploaded_file(file_content)
-        # Create TTS request
-        tts_request = TTSRequest(
-            ref_text=ref_text.strip(),
-            gen_text=gen_text.strip(),
-            ref_audio_path=temp_file_path,
-            use_gpu=use_gpu and torch.cuda.is_available()
-        )
-        # Generate speech
-        audio_data = await run_tts_async(tts_request)
-        # Create streaming response
-        buffer = io.BytesIO()
-        sf.write(buffer, audio_data, 24000, format='MP3')
-        buffer.seek(0)
-        def generate():
-            yield buffer.read()
-        return StreamingResponse(
-            generate(),
-            media_type="audio/mpeg",
-            headers={
-                "Content-Disposition": "attachment; filename=streamed_speech.mp3",
-                "Cache-Control": "no-cache"
-            }
-        )
-    except Exception as e:
-        logger.error(f"Streaming synthesis error: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Synthesis failed: {str(e)}")
-    finally:
-        if temp_file_path:
-            app.state.neutts_wrapper.cleanup_file(temp_file_path)
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)

 import os
 import sys
+sys.path.insert(0, os.path.join(os.getcwd(), "neutts-air"))
+from fastapi import FastAPI, HTTPException, UploadFile, File, Form, BackgroundTasks
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+import numpy as np
+import soundfile as sf
+import io
+import uuid
 import logging
+from neuttsair.neutts import NeuTTSAir
 logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize model
+tts = NeuTTSAir(
+    backbone_repo="neuphonic/neutts-air",
+    backbone_device="cpu",  # Explicit CPU
+    codec_repo="neuphonic/neucodec",
+    codec_device="cpu"      # Explicit CPU
 )
+app = FastAPI(title="NeuTTS Air API")
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
+def cleanup_file(file_path: str):
     try:
+        if os.path.exists(file_path):
+            os.remove(file_path)
+    except:
+        pass
+@app.post("/synthesize")
 async def synthesize_speech(
     ref_text: str = Form(...),
     gen_text: str = Form(...),
     ref_audio: UploadFile = File(...),
+    background_tasks: BackgroundTasks = None
 ):
+    temp_path = f"/tmp/{uuid.uuid4()}.wav"
     try:
+        # Save uploaded file
+        with open(temp_path, "wb") as f:
+            f.write(await ref_audio.read())
+        # Core NeuTTS logic (same as working Gradio app)
+        ref_codes = tts.encode_reference(temp_path)
+        wav = tts.infer(gen_text, ref_codes, ref_text)
+        # Return audio
+        output_path = f"/tmp/{uuid.uuid4()}.wav"
+        sf.write(output_path, wav, 24000)
+        if background_tasks:
+            background_tasks.add_task(cleanup_file, temp_path)
+            background_tasks.add_task(cleanup_file, output_path)
+        return FileResponse(output_path, media_type="audio/wav")
     except Exception as e:
+        cleanup_file(temp_path)
+        raise HTTPException(500, f"Synthesis failed: {str(e)}")
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "model_loaded": True}