Bhavibond commited on
Commit
4a54047
·
verified ·
1 Parent(s): 0871702

fix the encoder for low end execution

Browse files
Files changed (1) hide show
  1. app.py +4 -8
app.py CHANGED
@@ -15,8 +15,8 @@ tts = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
15
 
16
  # Load speaker embeddings from dataset
17
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
18
- speaker_embeddings = embeddings_dataset[7306]["xvector"] # Example speaker embedding
19
- speaker_embeddings = torch.tensor(speaker_embeddings).unsqueeze(0) # Reshape for the model
20
 
21
  # Ensure cache directory for output files
22
  os.makedirs("output", exist_ok=True)
@@ -43,14 +43,10 @@ def process_audio(audio, target_language):
43
 
44
  # Step 3: Generate speech from translated text
45
  inputs = processor(text=translated_text, return_tensors="pt")
46
- input_ids = inputs.input_ids
47
-
48
- # Convert input_ids to embeddings using model embeddings
49
- input_embeddings = tts.encoder.embed_tokens(input_ids)
50
 
51
  with torch.no_grad():
52
- # Generate speech using embeddings and speaker embeddings
53
- speech = tts.generate_speech(input_embeddings, speaker_embeddings)
54
 
55
  # Save generated speech
56
  output_audio_path = "output/generated_speech.wav"
 
15
 
16
  # Load speaker embeddings from dataset
17
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
18
+ speaker_embeddings = embeddings_dataset[7306]["xvector"]
19
+ speaker_embeddings = torch.tensor(speaker_embeddings).unsqueeze(0)
20
 
21
  # Ensure cache directory for output files
22
  os.makedirs("output", exist_ok=True)
 
43
 
44
  # Step 3: Generate speech from translated text
45
  inputs = processor(text=translated_text, return_tensors="pt")
46
+ input_features = inputs.input_features
 
 
 
47
 
48
  with torch.no_grad():
49
+ speech = tts.generate_speech(input_features, speaker_embeddings)
 
50
 
51
  # Save generated speech
52
  output_audio_path = "output/generated_speech.wav"