lord-reso commited on
Commit
f6905a0
·
verified ·
1 Parent(s): 40ec177

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -40
app.py CHANGED
@@ -1,54 +1,80 @@
1
- from flask import Flask, render_template, request, jsonify
 
 
2
  from logic import synthesize_voice, plot_data, plot_waveforms
3
  import base64
4
- from flask_cors import CORS, cross_origin
 
5
 
6
- app = Flask(__name__)
7
- CORS(app, support_credentials=True)
8
 
 
 
 
9
 
10
- @app.route('/')
11
- def index():
12
- return {'Voice': 'Cloning'}
13
 
14
- @app.route('/synthesize', methods=['POST'])
15
- def synthesize():
16
- font_type = request.json['font_select']
17
- input_text = request.json['input_text']
 
 
 
18
 
19
- # Font selection logic (you can customize this based on your requirements)
20
- if font_type == 'Preeti':
21
- # Implement Preeti font logic
22
- pass
23
- elif font_type == 'Unicode':
24
- # Implement Unicode font logic
25
- pass
26
 
27
- # Generate mel-spectrogram using Tacotron2
28
- mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "Shruti_finetuned")
 
 
 
29
 
30
- # Convert mel-spectrogram to base64 for display in HTML
31
- mel_output_base64 = plot_data([mel_output_data, mel_output_postnet_data, alignments_data])
 
 
32
 
33
- # Save the generated audio file
34
- audio_file_path = 'audio_output/mel1_generated_e2e.wav'
35
 
36
- # Plot the waveform
37
- wave_base64 = plot_waveforms(audio_file_path)
38
 
39
- # Encode audio content as Base64
40
- with open(audio_file_path, 'rb') as audio_file:
41
- audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
42
-
43
- # You can customize the response based on what information you want to send to the frontend
44
- response_data = {
45
- 'mel_spectrogram': mel_output_base64,
46
- 'audio_data': audio_base64,
47
- 'waveform': wave_base64,
48
- 'some_other_data': 'example_value',
49
- }
50
 
51
- return jsonify(response_data)
 
52
 
53
- if __name__ == '__main__':
54
- app.run(host='0.0.0.0', port=8000, debug=True, threaded=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.responses import JSONResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
  from logic import synthesize_voice, plot_data, plot_waveforms
5
  import base64
6
+ from typing import Dict
7
+ import httpx
8
 
9
+ app = FastAPI()
 
10
 
11
+ @app.get("/")
12
+ def read_root():
13
+ return {"Voice": "Cloning"}
14
 
 
 
 
15
 
16
+ app.add_middleware(
17
+ CORSMiddleware,
18
+ allow_origins=["*"],
19
+ allow_credentials=True,
20
+ allow_methods=["*"],
21
+ allow_headers=["*"],
22
+ )
23
 
24
+ hugging_face_api_url = "https://huggingface.co/spaces/lord-reso/host/synthesize"
 
 
 
 
 
 
25
 
26
+ @app.post("/synthesize", response_model=Dict[str, str])
27
+ async def synthesize(request_data: Dict[str, str]):
28
+ try:
29
+ async with httpx.AsyncClient() as client:
30
+ response = await client.post(hugging_face_api_url, json=request_data)
31
 
32
+ if response.status_code != 200:
33
+ error_message = f"Error from Hugging Face API: {response.text}"
34
+ print(f"Error from Hugging Face API: {response.text}")
35
+ return JSONResponse(content={"error": error_message}, status_code=500)
36
 
37
+ # Process the response from Hugging Face API
38
+ hugging_face_response = response.json()
39
 
40
+ font_type = request_data['font_select']
41
+ input_text = request_data['input_text']
42
 
43
+ # Font selection logic (customize based on your requirements)
44
+ if font_type == 'Preeti':
45
+ # Implement Preeti font logic
46
+ pass
47
+ elif font_type == 'Unicode':
48
+ # Implement Unicode font logic
49
+ pass
 
 
 
 
50
 
51
+ # Generate mel-spectrogram using Tacotron2
52
+ mel_output_data, mel_output_postnet_data, alignments_data = synthesize_voice(input_text, "Shruti_finetuned")
53
 
54
+ # Convert mel-spectrogram to base64 for display in HTML
55
+ mel_output_base64 = plot_data([mel_output_data, mel_output_postnet_data, alignments_data])
56
+
57
+ # Save the generated audio file
58
+ audio_file_path = 'audio_output/mel1_generated_e2e.wav'
59
+
60
+ # Plot the waveform
61
+ wave_base64 = plot_waveforms(audio_file_path)
62
+
63
+ # Encode audio content as Base64
64
+ with open(audio_file_path, 'rb') as audio_file:
65
+ audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
66
+
67
+ # Customize the response based on the information you want to send to the frontend
68
+ response_data = {
69
+ 'mel_spectrogram': mel_output_base64,
70
+ 'audio_data': audio_base64,
71
+ 'waveform': wave_base64,
72
+ 'some_other_data': 'example_value',
73
+ 'hugging_face_response': hugging_face_response, # Include Hugging Face API response
74
+ }
75
+
76
+ return JSONResponse(content=response_data)
77
+ except Exception as e:
78
+ error_message = f"Error during processing: {str(e)}"
79
+ print(f"Error during processing: {str(e)}")
80
+ return JSONResponse(content={"error": error_message}, status_code=500)