Spaces:
Sleeping
Sleeping
File size: 2,239 Bytes
edc5445 9fe7102 814634e 0300579 5bcf511 9fe7102 8cbcb5c 21c3757 64f8c24 3f674bb f6905a0 f6d0d12 9fe7102 814634e c0b470e 9fe7102 edc5445 3353131 3f674bb f6d0d12 9fe7102 3f674bb 3353131 9fe7102 3f674bb 9fe7102 1537a46 9fe7102 639afb1 3353131 9fe7102 3353131 a5201f5 9fe7102 1537a46 c51fa1f 9fe7102 2db5666 9fe7102 2db5666 1537a46 9fe7102 2db5666 a5201f5 3353131 2db5666 a5201f5 3353131 3f674bb 3353131 afd6df1 3f674bb 579d95d 9fe7102 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from logic import synthesize_voice, plot_data, plot_waveforms
import base64
import sys
import numpy as np
from io import BytesIO
from hifigan.inference_e2e import hifi_gan_inference
app = FastAPI()
@app.get("/")
def read_root():
data = {"Voice": "Cloning", "Status": "Success"}
return JSONResponse(content=data)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
hugging_face_api_url = "https://huggingface.co/spaces/lord-reso/host/synthesize"
@app.post("/synthesize")
async def synthesize(request: Request):
print("call successful")
json = await request.json()
print(json)
font_type = json['font_select']
input_text = json['input_text']
print("generating mel-spectrogram")
# Generate mel-spectrogram using Tacotron2
# mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "Shruti_finetuned.pt")
mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "kaggle_12000.pt")
print("mel generation successful")
# Convert mel-spectrogram to base64 for display in HTML
mel_output_base64 = plot_data([mel_output_data, mel_output_postnet_data, alignments_data])
# Audio Synthesis begins
print("Starting audio synthesis")
buffer = BytesIO()
np.save(buffer, mel_output_data)
input_mel = buffer.getvalue()
hifigan_checkpoint = "generator_v1"
# Generate audio using Hifigan
audio_data = hifi_gan_inference(input_mel, hifigan_checkpoint)
print("Creating time-domain waveform")
# Plot the waveform
wave_base64 = plot_waveforms(audio_data)
# Encode audio content as Base64
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
# Customize the response based on the information you want to send to the frontend
response_data = {
'mel_spectrogram': mel_output_base64,
'audio_data': audio_base64,
'waveform': wave_base64,
}
return JSONResponse(content=response_data) |