|
|
from starlette.responses import StreamingResponse |
|
|
from tts import MelToWav, TextToMel |
|
|
from advanced_tts import load_all_models, run_tts_paragraph |
|
|
from typing import Optional |
|
|
from pydantic import BaseModel |
|
|
from fastapi import FastAPI, HTTPException |
|
|
import uvicorn |
|
|
import base64 |
|
|
import argparse |
|
|
import json |
|
|
import time |
|
|
from argparse import Namespace |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
class TextJson(BaseModel): |
|
|
text: str |
|
|
lang: Optional[str] = "hi" |
|
|
noise_scale: Optional[float]=0.667 |
|
|
length_scale: Optional[float]=1.0 |
|
|
transliteration: Optional[int]=1 |
|
|
number_conversion: Optional[int]=1 |
|
|
split_sentences: Optional[int]=1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/TTS/") |
|
|
async def tts(input: TextJson): |
|
|
text = input.text |
|
|
lang = input.lang |
|
|
|
|
|
args = Namespace(**input.dict()) |
|
|
|
|
|
args.wav = '../../results/api/'+str(int(time.time())) + '.wav' |
|
|
|
|
|
if text: |
|
|
sr, audio = run_tts_paragraph(args) |
|
|
else: |
|
|
raise HTTPException(status_code=400, detail={"error": "No text"}) |
|
|
|
|
|
|
|
|
audio = open(args.wav, mode='rb') |
|
|
return StreamingResponse(audio, media_type="audio/wav") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument("-a", "--acoustic", required=True, type=str) |
|
|
parser.add_argument("-v", "--vocoder", required=True, type=str) |
|
|
parser.add_argument("-d", "--device", type=str, default="cpu") |
|
|
parser.add_argument("-L", "--lang", type=str, required=True) |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
load_all_models(args) |
|
|
|
|
|
uvicorn.run( |
|
|
"api:app", host="0.0.0.0", port=6006, log_level="debug" |
|
|
) |
|
|
|