File size: 2,058 Bytes
f2fb9a3
 
 
5bcf511
 
f2fb9a3
5bcf511
f2fb9a3
c0b470e
5bcf511
 
 
f2fb9a3
 
 
 
 
 
 
 
 
 
 
 
 
 
8081f33
f2fb9a3
 
 
 
5bcf511
f2fb9a3
5bcf511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2fb9a3
 
5bcf511
 
 
f2fb9a3
5bcf511
 
 
f2fb9a3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from logic import synthesize_voice, plot_data, plot_waveforms
import base64
from typing import Dict

app = FastAPI()


# You need to replace the placeholders above with the actual URLs for the models.

# Allow requests from your Vercel domain
origins = [
    "https://host-test-smoky.vercel.app",
    # Add other allowed origins if needed
]

# Set up CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
#aseda
@app.post("/synthesize", response_model=Dict[str, str])
async def synthesize(request_data: Dict[str, str]):
    font_type = request_data['font_select']
    input_text = request_data['input_text']

    # Font selection logic (customize based on your requirements)
    if font_type == 'Preeti':
        # Implement Preeti font logic
        pass
    elif font_type == 'Unicode':
        # Implement Unicode font logic
        pass

    # Generate mel-spectrogram using Tacotron2
    mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "Shruti_finetuned")

    # Convert mel-spectrogram to base64 for display in HTML
    mel_output_base64 = plot_data([mel_output_data, mel_output_postnet_data, alignments_data])

    # Save the generated audio file
    audio_file_path = 'audio_output/mel1_generated_e2e.wav'

    # Plot the waveform
    wave_base64 = plot_waveforms(audio_file_path)

    # Encode audio content as Base64
    with open(audio_file_path, 'rb') as audio_file:
        audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')

    # Customize the response based on the information you want to send to the frontend
    response_data = {
        'mel_spectrogram': mel_output_base64,
        'audio_data': audio_base64,
        'waveform': wave_base64,
        'some_other_data': 'example_value',
    }

    return JSONResponse(content=response_data)