Update README.md
Browse files
README.md
CHANGED
|
@@ -23,7 +23,44 @@ It achieves the following results on the evaluation set:
|
|
| 23 |
|
| 24 |
trianed using roman urdu, using a transliteration function normal urdu was mapped to roman urdu.
|
| 25 |
|
| 26 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
More information needed
|
| 29 |
|
|
|
|
| 23 |
|
| 24 |
trianed using roman urdu, using a transliteration function normal urdu was mapped to roman urdu.
|
| 25 |
|
| 26 |
+
## Use
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
from IPython.display import Audio
|
| 30 |
+
|
| 31 |
+
load the model:
|
| 32 |
+
model= SpeechT5ForTextToSpeech.from_pretrained("pocketmonkey/speecht5_tts_urdu")
|
| 33 |
+
|
| 34 |
+
get a speaker embedding:
|
| 35 |
+
example = dataset["test"][304]
|
| 36 |
+
speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
|
| 37 |
+
speaker_embeddings.shape
|
| 38 |
+
|
| 39 |
+
def urdu_to_roman_urdu(text):
|
| 40 |
+
urdu_to_roman_dict = {
|
| 41 |
+
'ا': 'a', 'ب': 'b', 'پ': 'p', 'ت': 't', 'ٹ': 't', 'ث': 's', 'ج': 'j', 'چ': 'ch',
|
| 42 |
+
'ح': 'h', 'خ': 'kh', 'د': 'd', 'ڈ': 'd', 'ذ': 'z', 'ر': 'r', 'ڑ': 'r', 'ز': 'z',
|
| 43 |
+
'ژ': 'zh', 'س': 's', 'ش': 'sh', 'ص': 's', 'ض': 'z', 'ط': 't', 'ظ': 'z', 'ع': 'a',
|
| 44 |
+
'غ': 'gh', 'ف': 'f', 'ق': 'q', 'ک': 'k', 'گ': 'g', 'ل': 'l', 'م': 'm', 'ن': 'n',
|
| 45 |
+
'ں': 'n', 'و': 'w', 'ہ': 'h', 'ء': 'a', 'ی': 'y', 'ے': 'e', 'آ': 'a', 'ؤ': 'o',
|
| 46 |
+
'ئ': 'y', 'ٔ': '', ' ': ' ', '۔': '.', '،': ',', '؛': ';', '؟': '?','ھ': 'h'
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
roman_text = ''.join(urdu_to_roman_dict.get(char, char) for char in text)
|
| 50 |
+
return roman_text
|
| 51 |
+
|
| 52 |
+
text = "زندگی میں کامیابی"
|
| 53 |
+
text=urdu_to_roman_urdu(text)
|
| 54 |
+
|
| 55 |
+
inputs = processor(text=text, return_tensors="pt")
|
| 56 |
+
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
speech = vocoder(spectrogram)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
Audio(speech.numpy(), rate=16000)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
|
| 65 |
More information needed
|
| 66 |
|