Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 16

Commit

58b0f90

verified ·

1 Parent(s): 3e11369

Update app.py

Browse files

Files changed (1) hide show

app.py +167 -47

app.py CHANGED Viewed

@@ -1,70 +1,190 @@
-import os
-import sys
-sys.path.insert(0, os.path.join(os.getcwd(), "neutts-air"))
-from fastapi import FastAPI, HTTPException, UploadFile, File, Form, BackgroundTasks
-from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
-import numpy as np
-import soundfile as sf
-import io
-import uuid
 import logging
 from neuttsair.neutts import NeuTTSAir
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Initialize model
-tts = NeuTTSAir(
-    backbone_repo="neuphonic/neutts-air",
-    backbone_device="cpu",  # Explicit CPU
-    codec_repo="neuphonic/neucodec",
-    codec_device="cpu"      # Explicit CPU
 )
-app = FastAPI(title="NeuTTS Air API")
-app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
-def cleanup_file(file_path: str):
     try:
-        if os.path.exists(file_path):
-            os.remove(file_path)
-    except:
-        pass
 @app.post("/synthesize")
 async def synthesize_speech(
-    ref_text: str = Form(...),
-    gen_text: str = Form(...),
-    ref_audio: UploadFile = File(...),
-    background_tasks: BackgroundTasks = None
 ):
-    temp_path = f"/tmp/{uuid.uuid4()}.wav"
     try:
-        # Save uploaded file
-        with open(temp_path, "wb") as f:
-            f.write(await ref_audio.read())
-        # Core NeuTTS logic (same as working Gradio app)
-        ref_codes = tts.encode_reference(temp_path)
-        wav = tts.infer(gen_text, ref_codes, ref_text)
-        # Return audio
-        output_path = f"/tmp/{uuid.uuid4()}.wav"
-        sf.write(output_path, wav, 24000)
-        if background_tasks:
-            background_tasks.add_task(cleanup_file, temp_path)
-            background_tasks.add_task(cleanup_file, output_path)
-        return FileResponse(output_path, media_type="audio/wav")
     except Exception as e:
-        cleanup_file(temp_path)
-        raise HTTPException(500, f"Synthesis failed: {str(e)}")
-@app.get("/health")
-async def health_check():
-    return {"status": "healthy", "model_loaded": True}

+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional
+import uvicorn
+import tempfile
+import os
+import time
 import logging
+from pathlib import Path
+# NeuTTS Air imports
 from neuttsair.neutts import NeuTTSAir
+import soundfile as sf
+# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+app = FastAPI(
+    title="NeuTTS Air API",
+    description="Professional Text-to-Speech with Instant Voice Cloning",
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc"
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
+# Pydantic models for request validation
+class TTSRequest(BaseModel):
+    text: str
+    ref_text: Optional[str] = ""
+    language: Optional[str] = "en"
+class HealthResponse(BaseModel):
+    status: str
+    model_loaded: bool
+    timestamp: str
+# Global model instance
+tts_model = None
+@app.on_event("startup")
+async def startup_event():
+    """Initialize the TTS model on startup"""
+    global tts_model
     try:
+        logger.info("Loading NeuTTS Air model...")
+        tts_model = NeuTTSAir(
+            backbone_repo="neuphonic/neutts-air-q4-gguf",
+            backbone_device="cpu",
+            codec_repo="neuphonic/neucodec",
+            codec_device="cpu"
+        )
+        logger.info("✅ NeuTTS Air model loaded successfully")
+    except Exception as e:
+        logger.error(f"❌ Failed to load NeuTTS Air model: {e}")
+        raise
+@app.get("/", include_in_schema=False)
+async def root():
+    return {"message": "NeuTTS Air API", "status": "running"}
+@app.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint"""
+    return HealthResponse(
+        status="healthy",
+        model_loaded=tts_model is not None,
+        timestamp=time.strftime("%Y-%m-%d %H:%M:%S")
+    )
 @app.post("/synthesize")
 async def synthesize_speech(
+    text: str = Form(..., description="Text to synthesize"),
+    ref_audio: UploadFile = File(..., description="Reference audio file (3-15 seconds)"),
+    ref_text: str = Form("", description="Transcript of reference audio")
 ):
+    """
+    Synthesize speech from text using a reference audio for voice cloning
+    """
+    if tts_model is None:
+        raise HTTPException(status_code=503, detail="TTS model not loaded")
+    # Validate audio file
+    if not ref_audio.content_type.startswith('audio/'):
+        raise HTTPException(status_code=400, detail="Invalid audio file format")
     try:
+        # Save uploaded audio to temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_ref:
+            content = await ref_audio.read()
+            temp_ref.write(content)
+            ref_audio_path = temp_ref.name
+        # Generate speech
+        logger.info(f"Synthesizing: '{text}'")
+        ref_codes = tts_model.encode_reference(ref_audio_path)
+        audio_data = tts_model.infer(text, ref_codes, ref_text)
+        # Save output to temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
+            sf.write(temp_output.name, audio_data, 24000)
+            output_path = temp_output.name
+        # Cleanup input file
+        os.unlink(ref_audio_path)
+        # Return audio file
+        return FileResponse(
+            output_path,
+            media_type='audio/wav',
+            filename=f"generated_speech_{int(time.time())}.wav",
+            background=BackgroundTask(lambda: os.unlink(output_path))
+        )
     except Exception as e:
+        logger.error(f"Synthesis error: {e}")
+        raise HTTPException(status_code=500, detail=f"Synthesis failed: {str(e)}")
+@app.post("/synthesize-from-sample")
+async def synthesize_from_sample(request: TTSRequest):
+    """
+    Synthesize speech using built-in sample voices
+    """
+    if tts_model is None:
+        raise HTTPException(status_code=503, detail="TTS model not loaded")
+    try:
+        # Use built-in sample (Dave)
+        sample_path = "samples/dave.wav"
+        if not os.path.exists(sample_path):
+            raise HTTPException(status_code=500, detail="Sample audio not found")
+        ref_codes = tts_model.encode_reference(sample_path)
+        audio_data = tts_model.infer(request.text, ref_codes, "My name is Dave and I'm from London.")
+        # Save output to temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
+            sf.write(temp_output.name, audio_data, 24000)
+            output_path = temp_output.name
+        return FileResponse(
+            output_path,
+            media_type='audio/wav',
+            filename=f"sample_speech_{int(time.time())}.wav",
+            background=BackgroundTask(lambda: os.unlink(output_path))
+        )
+    except Exception as e:
+        logger.error(f"Sample synthesis error: {e}")
+        raise HTTPException(status_code=500, detail=f"Sample synthesis failed: {str(e)}")
+@app.get("/voices/samples")
+async def get_sample_voices():
+    """Get available sample voices"""
+    samples_dir = Path("samples")
+    samples = []
+    if samples_dir.exists():
+        for file in samples_dir.glob("*.wav"):
+            samples.append({
+                "name": file.stem,
+                "path": str(file),
+                "size": file.stat().st_size
+            })
+    return {"samples": samples}
+if __name__ == "__main__":
+    uvicorn.run(
+        "app:app",
+        host="0.0.0.0",
+        port=7860,
+        reload=False,  # Disable reload in production
+        workers=1,     # Single worker for CPU optimization
+        access_log=True
+    )