Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Request | |
| from fastapi.responses import JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from logic import synthesize_voice, plot_data, plot_waveforms | |
| import base64 | |
| import sys | |
| import numpy as np | |
| from io import BytesIO | |
| from hifigan.inference_e2e import hifi_gan_inference | |
| app = FastAPI() | |
| def read_root(): | |
| data = {"Voice": "Cloning", "Status": "Success"} | |
| return JSONResponse(content=data) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| hugging_face_api_url = "https://huggingface.co/spaces/lord-reso/host/synthesize" | |
| async def synthesize(request: Request): | |
| print("call successful") | |
| json = await request.json() | |
| print(json) | |
| font_type = json['font_select'] | |
| input_text = json['input_text'] | |
| print("generating mel-spectrogram") | |
| # Generate mel-spectrogram using Tacotron2 | |
| # mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "Shruti_finetuned.pt") | |
| mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "kaggle_12000.pt") | |
| print("mel generation successful") | |
| # Convert mel-spectrogram to base64 for display in HTML | |
| mel_output_base64 = plot_data([mel_output_data, mel_output_postnet_data, alignments_data]) | |
| # Audio Synthesis begins | |
| print("Starting audio synthesis") | |
| buffer = BytesIO() | |
| np.save(buffer, mel_output_data) | |
| input_mel = buffer.getvalue() | |
| hifigan_checkpoint = "generator_v1" | |
| # Generate audio using Hifigan | |
| audio_data = hifi_gan_inference(input_mel, hifigan_checkpoint) | |
| print("Creating time-domain waveform") | |
| # Plot the waveform | |
| wave_base64 = plot_waveforms(audio_data) | |
| # Encode audio content as Base64 | |
| audio_base64 = base64.b64encode(audio_data).decode('utf-8') | |
| # Customize the response based on the information you want to send to the frontend | |
| response_data = { | |
| 'mel_spectrogram': mel_output_base64, | |
| 'audio_data': audio_base64, | |
| 'waveform': wave_base64, | |
| } | |
| return JSONResponse(content=response_data) |