host / app.py
projanshakya's picture
Update app.py
8081f33 verified
raw
history blame
2.06 kB
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from logic import synthesize_voice, plot_data, plot_waveforms
import base64
from typing import Dict
app = FastAPI()
# You need to replace the placeholders above with the actual URLs for the models.
# Allow requests from your Vercel domain
origins = [
"https://host-test-smoky.vercel.app",
# Add other allowed origins if needed
]
# Set up CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
#aseda
@app.post("/synthesize", response_model=Dict[str, str])
async def synthesize(request_data: Dict[str, str]):
font_type = request_data['font_select']
input_text = request_data['input_text']
# Font selection logic (customize based on your requirements)
if font_type == 'Preeti':
# Implement Preeti font logic
pass
elif font_type == 'Unicode':
# Implement Unicode font logic
pass
# Generate mel-spectrogram using Tacotron2
mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "Shruti_finetuned")
# Convert mel-spectrogram to base64 for display in HTML
mel_output_base64 = plot_data([mel_output_data, mel_output_postnet_data, alignments_data])
# Save the generated audio file
audio_file_path = 'audio_output/mel1_generated_e2e.wav'
# Plot the waveform
wave_base64 = plot_waveforms(audio_file_path)
# Encode audio content as Base64
with open(audio_file_path, 'rb') as audio_file:
audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
# Customize the response based on the information you want to send to the frontend
response_data = {
'mel_spectrogram': mel_output_base64,
'audio_data': audio_base64,
'waveform': wave_base64,
'some_other_data': 'example_value',
}
return JSONResponse(content=response_data)