Rajhuggingface4253 commited on
Commit
3b32b80
·
verified ·
1 Parent(s): ff87d75

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -0
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import uuid
4
+ from typing import Optional
5
+ import numpy as np
6
+ from fastapi import FastAPI, HTTPException, UploadFile, File, Form
7
+ from fastapi.responses import FileResponse, JSONResponse
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ import soundfile as sf
10
+ import io
11
+
12
+ # Add neutts-air to path (same as your working code)
13
+ sys.path.append("neutts-air")
14
+
15
+ try:
16
+ from neuttsair.neutts import NeuTTSAir
17
+ except ImportError as e:
18
+ raise RuntimeError(f"Failed to import NeuTTS Air: {e}. Make sure neutts-air submodule is initialized.")
19
+
20
+ # Initialize FastAPI app
21
+ app = FastAPI(
22
+ title="NeuTTS Air Production API",
23
+ description="Production-ready Text-to-Speech with Voice Cloning",
24
+ version="1.0.0"
25
+ )
26
+
27
+ # CORS middleware
28
+ app.add_middleware(
29
+ CORSMiddleware,
30
+ allow_origins=["*"],
31
+ allow_methods=["*"],
32
+ allow_headers=["*"],
33
+ )
34
+
35
+ # Global model instance (same initialization as your working code)
36
+ tts = NeuTTSAir(
37
+ backbone_repo="neuphonic/neutts-air",
38
+ backbone_device="cpu", # Changed to CPU for Hugging Face Spaces
39
+ codec_repo="neuphonic/neucodec",
40
+ codec_device="cpu" # Changed to CPU for Hugging Face Spaces
41
+ )
42
+
43
+ # Create directories
44
+ os.makedirs("uploads", exist_ok=True)
45
+ os.makedirs("outputs", exist_ok=True)
46
+
47
+ @app.get("/")
48
+ async def root():
49
+ return {"status": "online", "service": "NeuTTS Air API"}
50
+
51
+ @app.get("/health")
52
+ async def health_check():
53
+ return {"status": "healthy", "model_loaded": tts is not None}
54
+
55
+ @app.post("/api/v1/synthesize")
56
+ async def synthesize_speech(
57
+ ref_text: str = Form(..., description="Reference audio transcript"),
58
+ gen_text: str = Form(..., description="Text to synthesize"),
59
+ ref_audio: UploadFile = File(..., description="Reference audio file (WAV)")
60
+ ):
61
+ """
62
+ Synthesize speech using voice cloning
63
+ """
64
+ try:
65
+ # Validate audio file
66
+ if not ref_audio.filename.lower().endswith('.wav'):
67
+ raise HTTPException(400, "Only WAV files are supported as reference audio")
68
+
69
+ # Save uploaded file
70
+ upload_path = f"uploads/{uuid.uuid4()}_{ref_audio.filename}"
71
+ with open(upload_path, "wb") as f:
72
+ content = await ref_audio.read()
73
+ f.write(content)
74
+
75
+ # Perform inference (same pattern as your working code)
76
+ ref_codes = tts.encode_reference(upload_path)
77
+ wav = tts.infer(gen_text, ref_codes, ref_text)
78
+
79
+ # Save output
80
+ output_path = f"outputs/{uuid.uuid4()}.wav"
81
+ sf.write(output_path, wav, 24000)
82
+
83
+ return FileResponse(
84
+ output_path,
85
+ media_type="audio/wav",
86
+ filename="synthesized_speech.wav"
87
+ )
88
+
89
+ except Exception as e:
90
+ raise HTTPException(500, f"Synthesis failed: {str(e)}")
91
+
92
+ @app.post("/api/v1/synthesize/b64")
93
+ async def synthesize_speech_base64(
94
+ ref_text: str = Form(...),
95
+ gen_text: str = Form(...),
96
+ ref_audio: UploadFile = File(...)
97
+ ):
98
+ """
99
+ Synthesize speech and return as base64 encoded audio
100
+ """
101
+ try:
102
+ # Save uploaded file
103
+ upload_path = f"uploads/{uuid.uuid4()}_{ref_audio.filename}"
104
+ with open(upload_path, "wb") as f:
105
+ content = await ref_audio.read()
106
+ f.write(content)
107
+
108
+ # Perform inference
109
+ ref_codes = tts.encode_reference(upload_path)
110
+ wav = tts.infer(gen_text, ref_codes, ref_text)
111
+
112
+ # Convert to base64
113
+ buffer = io.BytesIO()
114
+ sf.write(buffer, wav, 24000, format='WAV')
115
+ buffer.seek(0)
116
+
117
+ import base64
118
+ audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
119
+
120
+ return JSONResponse({
121
+ "audio_data": audio_b64,
122
+ "sample_rate": 24000,
123
+ "format": "wav"
124
+ })
125
+
126
+ except Exception as e:
127
+ raise HTTPException(500, f"Synthesis failed: {str(e)}")
128
+
129
+ # Batch processing endpoint
130
+ @app.post("/api/v1/batch-synthesize")
131
+ async def batch_synthesize(
132
+ ref_text: str = Form(...),
133
+ ref_audio: UploadFile = File(...),
134
+ texts: str = Form(..., description="JSON array of texts to synthesize")
135
+ ):
136
+ """
137
+ Synthesize multiple texts with the same voice
138
+ """
139
+ try:
140
+ import json
141
+ text_list = json.loads(texts)
142
+
143
+ # Save reference audio
144
+ upload_path = f"uploads/{uuid.uuid4()}_{ref_audio.filename}"
145
+ with open(upload_path, "wb") as f:
146
+ content = await ref_audio.read()
147
+ f.write(content)
148
+
149
+ # Encode reference once
150
+ ref_codes = tts.encode_reference(upload_path)
151
+
152
+ results = []
153
+ for i, text in enumerate(text_list):
154
+ wav = tts.infer(text, ref_codes, ref_text)
155
+ output_path = f"outputs/{uuid.uuid4()}.wav"
156
+ sf.write(output_path, wav, 24000)
157
+ results.append(output_path)
158
+
159
+ return {"generated_files": results}
160
+
161
+ except Exception as e:
162
+ raise HTTPException(500, f"Batch synthesis failed: {str(e)}")
163
+
164
+ if __name__ == "__main__":
165
+ import uvicorn
166
+ uvicorn.run(app, host="0.0.0.0", port=7860)