rishidahiya commited on
Commit
784779a
·
verified ·
1 Parent(s): 9cc8c72

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_file
2
+ from flask_cors import CORS
3
+ from encoder import inference as encoder_inference
4
+ from synthesizer.inference import Synthesizer
5
+ from vocoder import inference as vocoder_inference
6
+ import librosa
7
+ import soundfile as sf
8
+ from io import BytesIO
9
+ import os
10
+
11
+ app = Flask(__name__)
12
+ CORS(app)
13
+
14
+ # Load models once at startup
15
+ print("Loading models...")
16
+ encoder_inference.load_model("saved_models/encoder.pt")
17
+ synthesizer = Synthesizer("saved_models/synthesizer.pt")
18
+ vocoder_inference.load_model("saved_models/vocoder.pt")
19
+ print("✓ Models loaded!")
20
+
21
+ @app.route('/health', methods=['GET'])
22
+ def health():
23
+ return jsonify({"status": "ok"}), 200
24
+
25
+ @app.route('/clone', methods=['POST'])
26
+ def clone_voice():
27
+ """Clone voice and synthesize speech"""
28
+ try:
29
+ # Get text and voice sample
30
+ text = request.form.get('text')
31
+ voice_sample = request.files.get('voice_sample')
32
+
33
+ if not text or not voice_sample:
34
+ return jsonify({"error": "Missing 'text' or 'voice_sample'"}), 400
35
+
36
+ # Save uploaded file temporarily
37
+ temp_path = f"/tmp/{voice_sample.filename}"
38
+ voice_sample.save(temp_path)
39
+
40
+ # Load and preprocess audio
41
+ wav, sr = librosa.load(temp_path, sr=16000)
42
+ wav = encoder_inference.preprocess_wav(wav)
43
+
44
+ # Generate speaker embedding
45
+ embed = encoder_inference.embed_utterance(wav)
46
+
47
+ # Synthesize speech
48
+ mels = synthesizer.synthesize_spectrograms([text], [embed])
49
+
50
+ # Vocode to audio
51
+ audio = vocoder_inference.vocoder(mels[0])
52
+
53
+ # Save to bytes
54
+ audio_io = BytesIO()
55
+ sf.write(audio_io, audio, 22050, format='WAV')
56
+ audio_io.seek(0)
57
+
58
+ # Cleanup
59
+ os.remove(temp_path)
60
+
61
+ return send_file(audio_io, mimetype='audio/wav', as_attachment=True, download_name='cloned_voice.wav')
62
+
63
+ except Exception as e:
64
+ return jsonify({"error": str(e)}), 400
65
+
66
+ @app.route('/', methods=['GET'])
67
+ def index():
68
+ return '''
69
+ <h1>Voice Cloning API</h1>
70
+ <p>POST to /clone with:</p>
71
+ <ul>
72
+ <li>text: Hindi/Kannada text to synthesize</li>
73
+ <li>voice_sample: WAV/OGG audio file (5-10 seconds)</li>
74
+ </ul>
75
+ <p>Returns: WAV audio with cloned voice</p>
76
+ '''
77
+
78
+ if __name__ == '__main__':
79
+ app.run(host='0.0.0.0', port=7860, debug=False)