from fastapi import FastAPI, Request from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from logic import synthesize_voice, plot_data, plot_waveforms import base64 import sys import numpy as np from io import BytesIO from hifigan.inference_e2e import hifi_gan_inference app = FastAPI() @app.get("/") def read_root(): data = {"Voice": "Cloning", "Status": "Success"} return JSONResponse(content=data) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) hugging_face_api_url = "https://huggingface.co/spaces/lord-reso/host/synthesize" @app.post("/synthesize") async def synthesize(request: Request): print("call successful") json = await request.json() print(json) font_type = json['font_select'] input_text = json['input_text'] print("generating mel-spectrogram") # Generate mel-spectrogram using Tacotron2 # mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "Shruti_finetuned.pt") mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "kaggle_12000.pt") print("mel generation successful") # Convert mel-spectrogram to base64 for display in HTML mel_output_base64 = plot_data([mel_output_data, mel_output_postnet_data, alignments_data]) # Audio Synthesis begins print("Starting audio synthesis") buffer = BytesIO() np.save(buffer, mel_output_data) input_mel = buffer.getvalue() hifigan_checkpoint = "generator_v1" # Generate audio using Hifigan audio_data = hifi_gan_inference(input_mel, hifigan_checkpoint) print("Creating time-domain waveform") # Plot the waveform wave_base64 = plot_waveforms(audio_data) # Encode audio content as Base64 audio_base64 = base64.b64encode(audio_data).decode('utf-8') # Customize the response based on the information you want to send to the frontend response_data = { 'mel_spectrogram': mel_output_base64, 'audio_data': audio_base64, 'waveform': wave_base64, } return JSONResponse(content=response_data)