Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 16

Commit

ff7d020

verified ·

1 Parent(s): 8c1e9c7

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -172

app.py CHANGED Viewed

@@ -1,190 +1,57 @@
-from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.responses import FileResponse
-from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from typing import Optional
-import uvicorn
-import tempfile
-import os
-import time
-import logging
-from pathlib import Path
-# NeuTTS Air imports
 from neuttsair.neutts import NeuTTSAir
-import soundfile as sf
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-app = FastAPI(
-    title="NeuTTS Air API",
-    description="Professional Text-to-Speech with Instant Voice Cloning",
-    version="1.0.0",
-    docs_url="/docs",
-    redoc_url="/redoc"
-)
-# CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Pydantic models for request validation
 class TTSRequest(BaseModel):
     text: str
-    ref_text: Optional[str] = ""
-    language: Optional[str] = "en"
-class HealthResponse(BaseModel):
-    status: str
-    model_loaded: bool
-    timestamp: str
-# Global model instance
-tts_model = None
-@app.on_event("startup")
-async def startup_event():
-    """Initialize the TTS model on startup"""
-    global tts_model
-    try:
-        logger.info("Loading NeuTTS Air model...")
-        tts_model = NeuTTSAir(
-            backbone_repo="neuphonic/neutts-air-q4-gguf",
-            backbone_device="cpu",
-            codec_repo="neuphonic/neucodec",
-            codec_device="cpu"
-        )
-        logger.info("✅ NeuTTS Air model loaded successfully")
-    except Exception as e:
-        logger.error(f"❌ Failed to load NeuTTS Air model: {e}")
-        raise
-@app.get("/", include_in_schema=False)
-async def root():
-    return {"message": "NeuTTS Air API", "status": "running"}
-@app.get("/health", response_model=HealthResponse)
-async def health_check():
-    """Health check endpoint"""
-    return HealthResponse(
-        status="healthy",
-        model_loaded=tts_model is not None,
-        timestamp=time.strftime("%Y-%m-%d %H:%M:%S")
-    )
-@app.post("/synthesize")
-async def synthesize_speech(
-    text: str = Form(..., description="Text to synthesize"),
-    ref_audio: UploadFile = File(..., description="Reference audio file (3-15 seconds)"),
-    ref_text: str = Form("", description="Transcript of reference audio")
-):
     """
-    Synthesize speech from text using a reference audio for voice cloning
     """
-    if tts_model is None:
-        raise HTTPException(status_code=503, detail="TTS model not loaded")
-    # Validate audio file
-    if not ref_audio.content_type.startswith('audio/'):
-        raise HTTPException(status_code=400, detail="Invalid audio file format")
-    try:
-        # Save uploaded audio to temporary file
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_ref:
-            content = await ref_audio.read()
-            temp_ref.write(content)
-            ref_audio_path = temp_ref.name
-        # Generate speech
-        logger.info(f"Synthesizing: '{text}'")
-        ref_codes = tts_model.encode_reference(ref_audio_path)
-        audio_data = tts_model.infer(text, ref_codes, ref_text)
-        # Save output to temporary file
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
-            sf.write(temp_output.name, audio_data, 24000)
-            output_path = temp_output.name
-        # Cleanup input file
-        os.unlink(ref_audio_path)
-        # Return audio file
-        return FileResponse(
-            output_path,
-            media_type='audio/wav',
-            filename=f"generated_speech_{int(time.time())}.wav",
-            background=BackgroundTask(lambda: os.unlink(output_path))
-        )
-    except Exception as e:
-        logger.error(f"Synthesis error: {e}")
-        raise HTTPException(status_code=500, detail=f"Synthesis failed: {str(e)}")
-@app.post("/synthesize-from-sample")
-async def synthesize_from_sample(request: TTSRequest):
-    """
-    Synthesize speech using built-in sample voices
-    """
-    if tts_model is None:
-        raise HTTPException(status_code=503, detail="TTS model not loaded")
     try:
-        # Use built-in sample (Dave)
-        sample_path = "samples/dave.wav"
-        if not os.path.exists(sample_path):
-            raise HTTPException(status_code=500, detail="Sample audio not found")
-        ref_codes = tts_model.encode_reference(sample_path)
-        audio_data = tts_model.infer(request.text, ref_codes, "My name is Dave and I'm from London.")
-        # Save output to temporary file
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
-            sf.write(temp_output.name, audio_data, 24000)
-            output_path = temp_output.name
-        return FileResponse(
-            output_path,
-            media_type='audio/wav',
-            filename=f"sample_speech_{int(time.time())}.wav",
-            background=BackgroundTask(lambda: os.unlink(output_path))
-        )
-    except Exception as e:
-        logger.error(f"Sample synthesis error: {e}")
-        raise HTTPException(status_code=500, detail=f"Sample synthesis failed: {str(e)}")
-@app.get("/voices/samples")
-async def get_sample_voices():
-    """Get available sample voices"""
-    samples_dir = Path("samples")
-    samples = []
-    if samples_dir.exists():
-        for file in samples_dir.glob("*.wav"):
-            samples.append({
-                "name": file.stem,
-                "path": str(file),
-                "size": file.stat().st_size
-            })
-    return {"samples": samples}
-if __name__ == "__main__":
-    uvicorn.run(
-        "app:app",
-        host="0.0.0.0",
-        port=7860,
-        reload=False,  # Disable reload in production
-        workers=1,     # Single worker for CPU optimization
-        access_log=True
-    )

+import tempfile
+import soundfile as sf
+from fastapi import FastAPI, HTTPException
 from fastapi.responses import FileResponse
 from pydantic import BaseModel
 from neuttsair.neutts import NeuTTSAir
+# Initialize FastAPI app
+app = FastAPI(title="NeuTTS-Air API", description="A FastAPI service for the NeuTTS-Air model.")
+# Load the NeuTTS-Air model
+# The path is relative to the working directory in the Docker container
+MODEL_PATH = "neutts-air-q4-gguf"
+try:
+    tts = NeuTTSAir(backbone_repo=MODEL_PATH, backbone_device="cpu")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    tts = None
+# Pydantic model for the request body
 class TTSRequest(BaseModel):
     text: str
+    ref_audio_path: str
+    ref_text: str
+@app.get("/")
+def read_root():
+    """Simple health check endpoint."""
+    return {"message": "NeuTTS-Air FastAPI is running."}
+@app.post("/tts", summary="Generate speech from text")
+async def tts_endpoint(request: TTSRequest):
     """
+    Generates a WAV audio file from text using a reference audio and transcript.
     """
+    if tts is None:
+        raise HTTPException(status_code=503, detail="Model is not loaded.")
     try:
+        # Load the reference audio
+        # Note: You must provide a valid path to an audio file
+        # The user will need to upload their own reference audios or use pre-uploaded ones
+        ref_codes = tts.encode_reference(request.ref_audio_path)
+        # Perform inference
+        wav_audio = tts.infer(request.text, ref_codes, request.ref_text)
+        # Save the audio to a temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+            sf.write(tmp.name, wav_audio, tts.codec.sampling_rate)
+            filepath = tmp.name
+        # Return the audio file
+        return FileResponse(filepath, media_type="audio/wav", filename="generated_speech.wav")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}")