Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,6 +38,11 @@ class TTS_Interface:
|
|
| 38 |
def __init__(self):
|
| 39 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 40 |
self.utterance_cloner = UtteranceCloner(model_id="Meta", device=self.device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
self.utterance_cloner.tts.set_language("de")
|
| 42 |
self.acoustic_model = Aligner()
|
| 43 |
self.acoustic_model.load_state_dict(torch.load("Models/Aligner/aligner.pt", map_location='cpu')["asr_model"])
|
|
@@ -46,7 +51,6 @@ class TTS_Interface:
|
|
| 46 |
self.text = "Quellen hattest du ihm, hattest dem Flüchtigen, kühle Schatten geschenkt, und die Gestade sahen, all ihm nach, und es bebte, aus den Wellen ihr lieblich Bild."
|
| 47 |
reference_audio = "reference_audios/2.wav"
|
| 48 |
self.duration, self.pitch, self.energy, _, _ = self.utterance_cloner.extract_prosody(self.text, reference_audio, lang="de", on_line_fine_tune=True)
|
| 49 |
-
self.utterance_cloner.tts.text2phone.use_word_boundaries = False
|
| 50 |
self.phones = self.utterance_cloner.tts.text2phone.get_phone_string(self.text)
|
| 51 |
|
| 52 |
#######
|
|
|
|
| 38 |
def __init__(self):
|
| 39 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 40 |
self.utterance_cloner = UtteranceCloner(model_id="Meta", device=self.device)
|
| 41 |
+
|
| 42 |
+
# for simplicity, since we are using an oracle for this demo, and we have seen enough German data to get by without word boundaries
|
| 43 |
+
self.utterance_cloner.tf.use_word_boundaries = False
|
| 44 |
+
self.utterance_cloner.tts.text2phone.use_word_boundaries = False
|
| 45 |
+
|
| 46 |
self.utterance_cloner.tts.set_language("de")
|
| 47 |
self.acoustic_model = Aligner()
|
| 48 |
self.acoustic_model.load_state_dict(torch.load("Models/Aligner/aligner.pt", map_location='cpu')["asr_model"])
|
|
|
|
| 51 |
self.text = "Quellen hattest du ihm, hattest dem Flüchtigen, kühle Schatten geschenkt, und die Gestade sahen, all ihm nach, und es bebte, aus den Wellen ihr lieblich Bild."
|
| 52 |
reference_audio = "reference_audios/2.wav"
|
| 53 |
self.duration, self.pitch, self.energy, _, _ = self.utterance_cloner.extract_prosody(self.text, reference_audio, lang="de", on_line_fine_tune=True)
|
|
|
|
| 54 |
self.phones = self.utterance_cloner.tts.text2phone.get_phone_string(self.text)
|
| 55 |
|
| 56 |
#######
|