Bhavibond commited on
Commit
0871702
·
verified ·
1 Parent(s): e6265b7

pass input_embeddings

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -43,10 +43,14 @@ def process_audio(audio, target_language):
43
 
44
  # Step 3: Generate speech from translated text
45
  inputs = processor(text=translated_text, return_tensors="pt")
 
 
 
 
46
 
47
  with torch.no_grad():
48
- # Pass the correct input_features to the model
49
- speech = tts.generate_speech(inputs["input_features"], speaker_embeddings)
50
 
51
  # Save generated speech
52
  output_audio_path = "output/generated_speech.wav"
 
43
 
44
  # Step 3: Generate speech from translated text
45
  inputs = processor(text=translated_text, return_tensors="pt")
46
+ input_ids = inputs.input_ids
47
+
48
+ # Convert input_ids to embeddings using model embeddings
49
+ input_embeddings = tts.encoder.embed_tokens(input_ids)
50
 
51
  with torch.no_grad():
52
+ # Generate speech using embeddings and speaker embeddings
53
+ speech = tts.generate_speech(input_embeddings, speaker_embeddings)
54
 
55
  # Save generated speech
56
  output_audio_path = "output/generated_speech.wav"