Spaces:
Sleeping
Sleeping
Avinyaa
commited on
Commit
·
9a88d9c
1
Parent(s):
c53fcc3
new
Browse files- README.md +109 -1
- app.py +169 -0
- client_example.py +73 -0
- dockerfile +28 -0
- requirements.txt +9 -0
- test.py +10 -0
README.md
CHANGED
|
@@ -7,4 +7,112 @@ sdk: docker
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# TTS API
|
| 11 |
+
|
| 12 |
+
A FastAPI-based Text-to-Speech API using XTTS-v2 for voice cloning.
|
| 13 |
+
|
| 14 |
+
## Features
|
| 15 |
+
|
| 16 |
+
- Convert text to speech using voice cloning
|
| 17 |
+
- Upload reference speaker audio files
|
| 18 |
+
- Support for multiple languages
|
| 19 |
+
- RESTful API with automatic documentation
|
| 20 |
+
- Docker support
|
| 21 |
+
|
| 22 |
+
## Setup
|
| 23 |
+
|
| 24 |
+
### Local Development
|
| 25 |
+
|
| 26 |
+
1. Install dependencies:
|
| 27 |
+
```bash
|
| 28 |
+
pip install -r requirements.txt
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
2. Run the API:
|
| 32 |
+
```bash
|
| 33 |
+
python app.py
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
The API will be available at `http://localhost:8000`
|
| 37 |
+
|
| 38 |
+
### Using Docker
|
| 39 |
+
|
| 40 |
+
1. Build the Docker image:
|
| 41 |
+
```bash
|
| 42 |
+
docker build -t tts-api .
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
2. Run the container:
|
| 46 |
+
```bash
|
| 47 |
+
docker run -p 8000:8000 tts-api
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
## API Endpoints
|
| 51 |
+
|
| 52 |
+
### Health Check
|
| 53 |
+
- **GET** `/health` - Check API status
|
| 54 |
+
|
| 55 |
+
### Text-to-Speech
|
| 56 |
+
- **POST** `/tts` - Convert text to speech with uploaded speaker file
|
| 57 |
+
- **Parameters:**
|
| 58 |
+
- `text` (form): Text to convert to speech
|
| 59 |
+
- `language` (form): Language code (default: "en")
|
| 60 |
+
- `speaker_file` (file): Reference speaker audio file
|
| 61 |
+
|
| 62 |
+
### API Documentation
|
| 63 |
+
- **GET** `/docs` - Interactive API documentation (Swagger UI)
|
| 64 |
+
- **GET** `/redoc` - Alternative API documentation
|
| 65 |
+
|
| 66 |
+
## Usage Examples
|
| 67 |
+
|
| 68 |
+
### Using Python requests
|
| 69 |
+
|
| 70 |
+
```python
|
| 71 |
+
import requests
|
| 72 |
+
|
| 73 |
+
# Prepare the request
|
| 74 |
+
url = "http://localhost:8000/tts"
|
| 75 |
+
data = {
|
| 76 |
+
"text": "Hello, this is a test of voice cloning!",
|
| 77 |
+
"language": "en"
|
| 78 |
+
}
|
| 79 |
+
files = {
|
| 80 |
+
"speaker_file": open("path/to/speaker.wav", "rb")
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
# Make the request
|
| 84 |
+
response = requests.post(url, data=data, files=files)
|
| 85 |
+
|
| 86 |
+
# Save the generated audio
|
| 87 |
+
if response.status_code == 200:
|
| 88 |
+
with open("output.wav", "wb") as f:
|
| 89 |
+
f.write(response.content)
|
| 90 |
+
print("Speech generated successfully!")
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
### Using curl
|
| 94 |
+
|
| 95 |
+
```bash
|
| 96 |
+
curl -X POST "http://localhost:8000/tts" \
|
| 97 |
+
-F "text=Hello, this is a test!" \
|
| 98 |
+
-F "language=en" \
|
| 99 |
+
-F "speaker_file=@path/to/speaker.wav" \
|
| 100 |
+
--output generated_speech.wav
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
### Using the provided client example
|
| 104 |
+
|
| 105 |
+
```bash
|
| 106 |
+
python client_example.py
|
| 107 |
+
```
|
| 108 |
+
|
| 109 |
+
## Requirements
|
| 110 |
+
|
| 111 |
+
- Python 3.8+
|
| 112 |
+
- CUDA-compatible GPU (recommended for faster processing)
|
| 113 |
+
- Audio file in supported format (WAV, MP3, etc.) for speaker reference
|
| 114 |
+
|
| 115 |
+
## Model
|
| 116 |
+
|
| 117 |
+
This API uses the XTTS-v2_C3PO model for voice cloning, which is automatically downloaded when building the Docker image.
|
| 118 |
+
|
app.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, UploadFile, File, Form
|
| 2 |
+
from fastapi.responses import FileResponse
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from TTS.api import TTS
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
import uuid
|
| 8 |
+
import torch
|
| 9 |
+
from typing import Optional
|
| 10 |
+
import logging
|
| 11 |
+
|
| 12 |
+
# Configure logging
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
app = FastAPI(title="TTS API", description="Text-to-Speech API using XTTS-v2", version="1.0.0")
|
| 17 |
+
|
| 18 |
+
class TTSRequest(BaseModel):
|
| 19 |
+
text: str
|
| 20 |
+
language: str = "en"
|
| 21 |
+
|
| 22 |
+
class TTSService:
|
| 23 |
+
def __init__(self):
|
| 24 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 25 |
+
logger.info(f"Using device: {self.device}")
|
| 26 |
+
|
| 27 |
+
# Use absolute paths for the model
|
| 28 |
+
model_path = "/app/XTTS-v2_C3PO/"
|
| 29 |
+
config_path = "/app/XTTS-v2_C3PO/config.json"
|
| 30 |
+
|
| 31 |
+
# Check if model files exist
|
| 32 |
+
if not os.path.exists(config_path):
|
| 33 |
+
logger.warning(f"Custom model config not found at {config_path}")
|
| 34 |
+
# List contents of model directory for debugging
|
| 35 |
+
model_dir = "/app/XTTS-v2_C3PO"
|
| 36 |
+
if os.path.exists(model_dir):
|
| 37 |
+
logger.info(f"Contents of {model_dir}: {os.listdir(model_dir)}")
|
| 38 |
+
else:
|
| 39 |
+
logger.warning(f"Model directory {model_dir} does not exist")
|
| 40 |
+
|
| 41 |
+
# Fallback to default XTTS model
|
| 42 |
+
logger.info("Falling back to default XTTS model")
|
| 43 |
+
try:
|
| 44 |
+
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
|
| 45 |
+
logger.info("Default TTS model loaded successfully")
|
| 46 |
+
return
|
| 47 |
+
except Exception as e:
|
| 48 |
+
logger.error(f"Failed to load default TTS model: {e}")
|
| 49 |
+
raise e
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
self.tts = TTS(
|
| 53 |
+
model_path=model_path,
|
| 54 |
+
config_path=config_path,
|
| 55 |
+
progress_bar=False,
|
| 56 |
+
gpu=torch.cuda.is_available()
|
| 57 |
+
).to(self.device)
|
| 58 |
+
logger.info("Custom TTS model loaded successfully")
|
| 59 |
+
except Exception as e:
|
| 60 |
+
logger.error(f"Failed to load custom TTS model: {e}")
|
| 61 |
+
# Fallback to default model
|
| 62 |
+
logger.info("Falling back to default XTTS model")
|
| 63 |
+
try:
|
| 64 |
+
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
|
| 65 |
+
logger.info("Default TTS model loaded successfully")
|
| 66 |
+
except Exception as fallback_e:
|
| 67 |
+
logger.error(f"Failed to load default TTS model: {fallback_e}")
|
| 68 |
+
raise fallback_e
|
| 69 |
+
|
| 70 |
+
def generate_speech(self, text: str, speaker_wav_path: str, language: str = "en") -> str:
|
| 71 |
+
"""Generate speech and return the path to the output file"""
|
| 72 |
+
try:
|
| 73 |
+
# Create a unique filename for the output
|
| 74 |
+
output_filename = f"output_{uuid.uuid4().hex}.wav"
|
| 75 |
+
output_path = os.path.join(tempfile.gettempdir(), output_filename)
|
| 76 |
+
|
| 77 |
+
# Generate speech
|
| 78 |
+
self.tts.tts_to_file(
|
| 79 |
+
text=text,
|
| 80 |
+
file_path=output_path,
|
| 81 |
+
speaker_wav=speaker_wav_path,
|
| 82 |
+
language=language
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
return output_path
|
| 86 |
+
except Exception as e:
|
| 87 |
+
logger.error(f"Error generating speech: {e}")
|
| 88 |
+
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
|
| 89 |
+
|
| 90 |
+
# Initialize TTS service
|
| 91 |
+
tts_service = TTSService()
|
| 92 |
+
|
| 93 |
+
@app.get("/")
|
| 94 |
+
async def root():
|
| 95 |
+
return {"message": "TTS API is running", "status": "healthy"}
|
| 96 |
+
|
| 97 |
+
@app.get("/health")
|
| 98 |
+
async def health_check():
|
| 99 |
+
return {"status": "healthy", "device": tts_service.device}
|
| 100 |
+
|
| 101 |
+
@app.post("/tts")
|
| 102 |
+
async def text_to_speech(
|
| 103 |
+
text: str = Form(...),
|
| 104 |
+
language: str = Form("en"),
|
| 105 |
+
speaker_file: UploadFile = File(...)
|
| 106 |
+
):
|
| 107 |
+
"""
|
| 108 |
+
Convert text to speech using a reference speaker voice
|
| 109 |
+
|
| 110 |
+
- **text**: The text to convert to speech
|
| 111 |
+
- **language**: Language code (default: "en")
|
| 112 |
+
- **speaker_file**: Audio file containing the reference speaker voice
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
if not text.strip():
|
| 116 |
+
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 117 |
+
|
| 118 |
+
# Validate file type
|
| 119 |
+
if not speaker_file.content_type.startswith('audio/'):
|
| 120 |
+
raise HTTPException(status_code=400, detail="Speaker file must be an audio file")
|
| 121 |
+
|
| 122 |
+
try:
|
| 123 |
+
# Save uploaded speaker file temporarily
|
| 124 |
+
speaker_temp_path = "XTTS-v2_C3PO/reference.wav"
|
| 125 |
+
|
| 126 |
+
with open(speaker_temp_path, "wb") as buffer:
|
| 127 |
+
content = await speaker_file.read()
|
| 128 |
+
buffer.write(content)
|
| 129 |
+
|
| 130 |
+
# Generate speech
|
| 131 |
+
output_path = tts_service.generate_speech(text, speaker_temp_path, language)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
# Return the generated audio file
|
| 135 |
+
return FileResponse(
|
| 136 |
+
output_path,
|
| 137 |
+
media_type="audio/wav",
|
| 138 |
+
filename=f"tts_output_{uuid.uuid4().hex}.wav",
|
| 139 |
+
headers={"Content-Disposition": "attachment"}
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
except Exception as e:
|
| 143 |
+
# Clean up files in case of error
|
| 144 |
+
if 'speaker_temp_path' in locals() and os.path.exists(speaker_temp_path):
|
| 145 |
+
os.remove(speaker_temp_path)
|
| 146 |
+
|
| 147 |
+
logger.error(f"Error in TTS endpoint: {e}")
|
| 148 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 149 |
+
|
| 150 |
+
@app.post("/tts-with-url")
|
| 151 |
+
async def text_to_speech_with_url(request: TTSRequest, speaker_wav_url: str):
|
| 152 |
+
"""
|
| 153 |
+
Convert text to speech using a reference speaker voice from URL
|
| 154 |
+
|
| 155 |
+
- **request**: TTSRequest containing text and language
|
| 156 |
+
- **speaker_wav_url**: URL to the reference speaker audio file
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
if not request.text.strip():
|
| 160 |
+
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 161 |
+
|
| 162 |
+
try:
|
| 163 |
+
# For this endpoint, you would need to download the file from URL
|
| 164 |
+
# This is a simplified version - you might want to add URL validation and download logic
|
| 165 |
+
raise HTTPException(status_code=501, detail="URL-based speaker input not implemented yet")
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
logger.error(f"Error in TTS URL endpoint: {e}")
|
| 169 |
+
raise HTTPException(status_code=500, detail=str(e))
|
client_example.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
def test_tts_api():
|
| 5 |
+
"""Example of how to use the TTS API"""
|
| 6 |
+
|
| 7 |
+
# API endpoint
|
| 8 |
+
url = "http://localhost:8000/tts"
|
| 9 |
+
|
| 10 |
+
# Text to convert to speech
|
| 11 |
+
text = "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."
|
| 12 |
+
|
| 13 |
+
# Path to your speaker reference audio file
|
| 14 |
+
speaker_file_path = "/path/to/target/speaker.wav" # Update this path
|
| 15 |
+
|
| 16 |
+
# Check if speaker file exists
|
| 17 |
+
if not os.path.exists(speaker_file_path):
|
| 18 |
+
print(f"Error: Speaker file not found at {speaker_file_path}")
|
| 19 |
+
print("Please update the speaker_file_path variable with a valid audio file path")
|
| 20 |
+
return
|
| 21 |
+
|
| 22 |
+
# Prepare the request
|
| 23 |
+
data = {
|
| 24 |
+
"text": text,
|
| 25 |
+
"language": "en"
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
files = {
|
| 29 |
+
"speaker_file": open(speaker_file_path, "rb")
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
print("Sending request to TTS API...")
|
| 34 |
+
response = requests.post(url, data=data, files=files)
|
| 35 |
+
|
| 36 |
+
if response.status_code == 200:
|
| 37 |
+
# Save the generated audio
|
| 38 |
+
output_filename = "generated_speech.wav"
|
| 39 |
+
with open(output_filename, "wb") as f:
|
| 40 |
+
f.write(response.content)
|
| 41 |
+
print(f"Success! Generated speech saved as {output_filename}")
|
| 42 |
+
else:
|
| 43 |
+
print(f"Error: {response.status_code}")
|
| 44 |
+
print(response.text)
|
| 45 |
+
|
| 46 |
+
except requests.exceptions.ConnectionError:
|
| 47 |
+
print("Error: Could not connect to the API. Make sure the server is running on http://localhost:8000")
|
| 48 |
+
except Exception as e:
|
| 49 |
+
print(f"Error: {e}")
|
| 50 |
+
finally:
|
| 51 |
+
files["speaker_file"].close()
|
| 52 |
+
|
| 53 |
+
def check_api_health():
|
| 54 |
+
"""Check if the API is running"""
|
| 55 |
+
try:
|
| 56 |
+
response = requests.get("http://localhost:8000/health")
|
| 57 |
+
if response.status_code == 200:
|
| 58 |
+
print("API is healthy:", response.json())
|
| 59 |
+
else:
|
| 60 |
+
print("API health check failed:", response.status_code)
|
| 61 |
+
except requests.exceptions.ConnectionError:
|
| 62 |
+
print("API is not running. Start it with: python app.py")
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
print("TTS API Client Example")
|
| 66 |
+
print("=" * 30)
|
| 67 |
+
|
| 68 |
+
# First check if API is running
|
| 69 |
+
check_api_health()
|
| 70 |
+
print()
|
| 71 |
+
|
| 72 |
+
# Test the TTS functionality
|
| 73 |
+
test_tts_api()
|
dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install git and git-lfs
|
| 6 |
+
RUN apt-get update && apt-get install -y git git-lfs && rm -rf /var/lib/apt/lists/*
|
| 7 |
+
|
| 8 |
+
# Initialize git lfs
|
| 9 |
+
RUN git lfs install
|
| 10 |
+
|
| 11 |
+
COPY requirements.txt .
|
| 12 |
+
|
| 13 |
+
RUN pip install uv
|
| 14 |
+
RUN uv pip install --no-cache-dir -r requirements.txt --system
|
| 15 |
+
|
| 16 |
+
echo "Cloning the XTTS-v2_C3PO model..."
|
| 17 |
+
# Clone the XTTS-v2_C3PO model and verify it
|
| 18 |
+
RUN git clone https://huggingface.co/Borcherding/XTTS-v2_C3PO && \
|
| 19 |
+
ls -la XTTS-v2_C3PO/ && \
|
| 20 |
+
echo "Model directory contents:" && \
|
| 21 |
+
find XTTS-v2_C3PO/ -type f -name "*.json" -o -name "*.pth" -o -name "*.pt" | head -10
|
| 22 |
+
|
| 23 |
+
COPY . .
|
| 24 |
+
|
| 25 |
+
# Expose the port
|
| 26 |
+
EXPOSE 8000
|
| 27 |
+
|
| 28 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
coqui-tts
|
| 2 |
+
pandas
|
| 3 |
+
scikit-learn
|
| 4 |
+
fastapi
|
| 5 |
+
uvicorn[standard]
|
| 6 |
+
python-multipart
|
| 7 |
+
torch
|
| 8 |
+
torchaudio
|
| 9 |
+
requests
|
test.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from TTS.api import TTS
|
| 2 |
+
|
| 3 |
+
tts = TTS(model_path="XTTS-v2_C3PO/",
|
| 4 |
+
config_path="XTTS-v2_C3PO/config.json", progress_bar=False, gpu=True).to(self.device)
|
| 5 |
+
|
| 6 |
+
# generate speech by cloning a voice using default settings
|
| 7 |
+
tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
|
| 8 |
+
file_path="output.wav",
|
| 9 |
+
speaker_wav="/path/to/target/speaker.wav",
|
| 10 |
+
language="en")
|