Spaces:
Runtime error
Runtime error
Commit
Β·
639d737
1
Parent(s):
6f905f8
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -14,12 +14,11 @@ asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-
|
|
| 14 |
model_id = "Sandiago21/speecht5_finetuned_mozilla_foundation_common_voice_13_german" # update with your model id
|
| 15 |
# pipe = pipeline("automatic-speech-recognition", model=model_id)
|
| 16 |
model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
|
|
|
|
| 17 |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
| 18 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
| 19 |
speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
|
| 20 |
|
| 21 |
-
processor = SpeechT5Processor.from_pretrained(model_id)
|
| 22 |
-
|
| 23 |
replacements = [
|
| 24 |
("Γ", "E"),
|
| 25 |
("Γ", "E"),
|
|
@@ -112,8 +111,8 @@ def speech_to_speech_translation(audio):
|
|
| 112 |
|
| 113 |
title = "Cascaded STST"
|
| 114 |
description = """
|
| 115 |
-
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in
|
| 116 |
-
[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech, fine-tuned in
|
| 117 |

|
| 118 |
"""
|
| 119 |
|
|
|
|
| 14 |
model_id = "Sandiago21/speecht5_finetuned_mozilla_foundation_common_voice_13_german" # update with your model id
|
| 15 |
# pipe = pipeline("automatic-speech-recognition", model=model_id)
|
| 16 |
model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
|
| 17 |
+
processor = SpeechT5Processor.from_pretrained(model_id)
|
| 18 |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
| 19 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
| 20 |
speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
|
| 21 |
|
|
|
|
|
|
|
| 22 |
replacements = [
|
| 23 |
("Γ", "E"),
|
| 24 |
("Γ", "E"),
|
|
|
|
| 111 |
|
| 112 |
title = "Cascaded STST"
|
| 113 |
description = """
|
| 114 |
+
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in German. Demo uses OpenAI's [Whisper Large v2](https://huggingface.co/openai/whisper-large-v2) model for speech translation, and [Sandiago21/speecht5_finetuned_mozilla_foundation_common_voice_13_german](https://huggingface.co/Sandiago21/speecht5_finetuned_mozilla_foundation_common_voice_13_german) checkpoint for text-to-speech, which is based on Microsoft's
|
| 115 |
+
[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech, fine-tuned in German Audio dataset:
|
| 116 |

|
| 117 |
"""
|
| 118 |
|