yukee1992 commited on
Commit
330aa32
Β·
verified Β·
1 Parent(s): fafe000

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +314 -0
app.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import uuid
4
+ from datetime import datetime
5
+ from typing import List, Optional
6
+
7
+ import requests
8
+ from fastapi import FastAPI, HTTPException, Form, UploadFile, File
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from pydantic import BaseModel
11
+ import torch
12
+ import numpy as np
13
+ from TTS.api import TTS
14
+
15
+ # Configure environment
16
+ os.makedirs("/tmp/voices", exist_ok=True)
17
+ os.makedirs("/tmp/output", exist_ok=True)
18
+
19
+ # Initialize FastAPI app
20
+ app = FastAPI(title="TTS API", description="API for text-to-speech with Coqui TTS and voice cloning")
21
+
22
+ # Add CORS middleware
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"],
26
+ allow_credentials=True,
27
+ allow_methods=["*"],
28
+ allow_headers=["*"],
29
+ )
30
+
31
+ # Configuration
32
+ OCI_UPLOAD_API_URL = os.getenv("OCI_UPLOAD_API_URL", "http://localhost:7860")
33
+ DEFAULT_MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
34
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
35
+
36
+ print(f"βœ… Using device: {DEVICE}")
37
+
38
+ # Initialize TTS model
39
+ try:
40
+ tts = TTS(DEFAULT_MODEL).to(DEVICE)
41
+ print("βœ… TTS model loaded successfully")
42
+ except Exception as e:
43
+ print(f"❌ Failed to load TTS model: {e}")
44
+ tts = None
45
+
46
+ class TTSRequest(BaseModel):
47
+ text: str
48
+ project_id: str
49
+ voice_name: Optional[str] = "default"
50
+ language: Optional[str] = "en"
51
+
52
+ class BatchTTSRequest(BaseModel):
53
+ texts: List[str]
54
+ project_id: str
55
+ voice_name: Optional[str] = "default"
56
+ language: Optional[str] = "en"
57
+
58
+ class VoiceCloneRequest(BaseModel):
59
+ project_id: str
60
+ voice_name: str
61
+ description: Optional[str] = ""
62
+
63
+ def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voiceover"):
64
+ """Upload file to OCI using your existing API with subfolder support"""
65
+ try:
66
+ if not OCI_UPLOAD_API_URL:
67
+ return None, "OCI upload API URL not configured"
68
+
69
+ # Use voiceover subfolder
70
+ url = f"{OCI_UPLOAD_API_URL}/api/upload"
71
+
72
+ with open(file_path, "rb") as f:
73
+ files = {"file": (filename, f, "audio/wav")}
74
+ data = {
75
+ "project_id": project_id,
76
+ "subfolder": "voiceover" # This creates project_id/voiceover/ structure
77
+ }
78
+
79
+ response = requests.post(url, files=files, data=data, timeout=30)
80
+
81
+ if response.status_code == 200:
82
+ result = response.json()
83
+ if result.get("status") == "success":
84
+ return result, None
85
+ else:
86
+ return None, result.get("message", "Upload failed")
87
+ else:
88
+ return None, f"Upload failed with status {response.status_code}"
89
+
90
+ except Exception as e:
91
+ return None, f"Upload error: {str(e)}"
92
+
93
+ def upload_to_oci_with_retry(file_path: str, filename: str, project_id: str, file_type="voiceover", max_retries=3):
94
+ """Upload file to OCI with retry logic"""
95
+ for attempt in range(max_retries):
96
+ try:
97
+ print(f"πŸ”„ Upload attempt {attempt + 1} of {max_retries} for {filename}")
98
+ result, error = upload_to_oci(file_path, filename, project_id, file_type)
99
+
100
+ if error:
101
+ if attempt < max_retries - 1:
102
+ wait_time = 2 ** attempt # Exponential backoff
103
+ print(f"⏳ Upload failed, retrying in {wait_time}s: {error}")
104
+ time.sleep(wait_time)
105
+ continue
106
+ else:
107
+ return None, error
108
+ else:
109
+ return result, None
110
+
111
+ except Exception as e:
112
+ if attempt < max_retries - 1:
113
+ wait_time = 2 ** attempt
114
+ print(f"⏳ Upload exception, retrying in {wait_time}s: {str(e)}")
115
+ time.sleep(wait_time)
116
+ continue
117
+ else:
118
+ return None, f"Upload failed after {max_retries} attempts: {str(e)}"
119
+
120
+ return None, "Upload failed: unexpected error"
121
+
122
+ @app.post("/api/tts")
123
+ async def generate_tts(request: TTSRequest):
124
+ """Generate TTS for a single text"""
125
+ try:
126
+ if tts is None:
127
+ raise HTTPException(status_code=500, detail="TTS model not loaded")
128
+
129
+ print(f"πŸ“₯ TTS request for project: {request.project_id}")
130
+ print(f" Text length: {len(request.text)} characters")
131
+ print(f" Voice: {request.voice_name}")
132
+ print(f" Language: {request.language}")
133
+
134
+ # Generate unique filename with sequential naming
135
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
136
+ filename = f"voiceover_{timestamp}.wav"
137
+ output_path = f"/tmp/output/{filename}"
138
+
139
+ # Generate TTS
140
+ tts.tts_to_file(
141
+ text=request.text,
142
+ speaker_wav=f"/tmp/voices/{request.voice_name}.wav" if request.voice_name != "default" else None,
143
+ language=request.language,
144
+ file_path=output_path
145
+ )
146
+
147
+ print(f"βœ… TTS generated: {output_path}")
148
+
149
+ # Upload to OCI
150
+ upload_result, error = upload_to_oci_with_retry(
151
+ output_path, filename, request.project_id, "voiceover"
152
+ )
153
+
154
+ if error:
155
+ print(f"❌ OCI upload failed: {error}")
156
+ # Still return the local file path if upload fails
157
+ return {
158
+ "status": "partial_success",
159
+ "message": f"TTS generated but upload failed: {error}",
160
+ "local_file": output_path,
161
+ "filename": filename
162
+ }
163
+
164
+ print(f"βœ… Upload successful: {filename}")
165
+
166
+ # Clean up local file
167
+ try:
168
+ os.remove(output_path)
169
+ except:
170
+ pass
171
+
172
+ return {
173
+ "status": "success",
174
+ "message": "TTS generated and uploaded successfully",
175
+ "filename": filename,
176
+ "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}")
177
+ }
178
+
179
+ except Exception as e:
180
+ print(f"❌ TTS generation error: {str(e)}")
181
+ raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
182
+
183
+ @app.post("/api/batch-tts")
184
+ async def batch_generate_tts(request: BatchTTSRequest):
185
+ """Generate TTS for multiple texts with sequential naming"""
186
+ try:
187
+ if tts is None:
188
+ raise HTTPException(status_code=500, detail="TTS model not loaded")
189
+
190
+ print(f"πŸ“₯ Batch TTS request for project: {request.project_id}")
191
+ print(f" Number of texts: {len(request.texts)}")
192
+ print(f" Voice: {request.voice_name}")
193
+ print(f" Language: {request.language}")
194
+
195
+ results = []
196
+
197
+ for i, text in enumerate(request.texts):
198
+ print(f" Processing text {i+1}/{len(request.texts)}")
199
+
200
+ # Generate sequential filename
201
+ filename = f"voiceover_{i+1:02d}.wav"
202
+ output_path = f"/tmp/output/{filename}"
203
+
204
+ # Generate TTS
205
+ tts.tts_to_file(
206
+ text=text,
207
+ speaker_wav=f"/tmp/voices/{request.voice_name}.wav" if request.voice_name != "default" else None,
208
+ language=request.language,
209
+ file_path=output_path
210
+ )
211
+
212
+ # Upload to OCI
213
+ upload_result, error = upload_to_oci_with_retry(
214
+ output_path, filename, request.project_id, "voiceover"
215
+ )
216
+
217
+ if error:
218
+ print(f"❌ OCI upload failed for {filename}: {error}")
219
+ results.append({
220
+ "text_index": i,
221
+ "status": "partial_success",
222
+ "message": f"TTS generated but upload failed: {error}",
223
+ "local_file": output_path,
224
+ "filename": filename
225
+ })
226
+ else:
227
+ print(f"βœ… Upload successful: {filename}")
228
+ results.append({
229
+ "text_index": i,
230
+ "status": "success",
231
+ "message": "TTS generated and uploaded successfully",
232
+ "filename": filename,
233
+ "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}")
234
+ })
235
+
236
+ # Clean up local file
237
+ try:
238
+ os.remove(output_path)
239
+ except:
240
+ pass
241
+
242
+ return {
243
+ "status": "completed",
244
+ "project_id": request.project_id,
245
+ "results": results
246
+ }
247
+
248
+ except Exception as e:
249
+ print(f"❌ Batch TTS generation error: {str(e)}")
250
+ raise HTTPException(status_code=500, detail=f"Batch TTS generation failed: {str(e)}")
251
+
252
+ @app.post("/api/upload-voice")
253
+ async def upload_voice_sample(
254
+ project_id: str = Form(...),
255
+ voice_name: str = Form(...),
256
+ file: UploadFile = File(...)
257
+ ):
258
+ """Upload a voice sample for cloning"""
259
+ try:
260
+ print(f"πŸ“₯ Voice upload request: {voice_name} for project {project_id}")
261
+
262
+ # Validate file type
263
+ if not file.filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
264
+ raise HTTPException(status_code=400, detail="Only audio files are allowed")
265
+
266
+ # Save voice sample
267
+ voice_path = f"/tmp/voices/{voice_name}.wav"
268
+ with open(voice_path, "wb") as f:
269
+ content = await file.read()
270
+ f.write(content)
271
+
272
+ print(f"βœ… Voice sample saved: {voice_path}")
273
+
274
+ return {
275
+ "status": "success",
276
+ "message": "Voice sample uploaded successfully",
277
+ "voice_name": voice_name,
278
+ "local_path": voice_path
279
+ }
280
+
281
+ except Exception as e:
282
+ print(f"❌ Voice upload error: {str(e)}")
283
+ raise HTTPException(status_code=500, detail=f"Voice upload failed: {str(e)}")
284
+
285
+ @app.get("/api/health")
286
+ async def health_check():
287
+ """Health check endpoint"""
288
+ return {
289
+ "status": "healthy",
290
+ "tts_loaded": tts is not None,
291
+ "device": DEVICE,
292
+ "timestamp": datetime.now().isoformat()
293
+ }
294
+
295
+ @app.get("/")
296
+ async def root():
297
+ """Root endpoint with API information"""
298
+ return {
299
+ "message": "TTS API with Coqui TTS and Voice Cloning",
300
+ "endpoints": {
301
+ "POST /api/tts": "Generate TTS for a single text",
302
+ "POST /api/batch-tts": "Generate TTS for multiple texts",
303
+ "POST /api/upload-voice": "Upload a voice sample for cloning",
304
+ "GET /api/health": "Health check"
305
+ },
306
+ "model": DEFAULT_MODEL if tts else "Not loaded"
307
+ }
308
+
309
+ if __name__ == "__main__":
310
+ import uvicorn
311
+ print("πŸš€ Starting TTS API with Coqui TTS and Voice Cloning...")
312
+ print("πŸ“Š API endpoints available at: http://localhost:7860/")
313
+ print("πŸ“š Documentation available at: http://localhost:7860/docs")
314
+ uvicorn.run(app, host="0.0.0.0", port=7860)