{
"cells": [
{
"cell_type": "markdown",
"source": [
"# HebTTS Quickstart\n",
"We created a jupyter notebook in order to easily generate your desired samples!\n",
"\n",
"\n",
"
\n",
"
\n",
" [](https://colab.research.google.com/drive/1f3-6Dqbna9_hI5C9V4qTIG05dixW-r72?usp=sharing) \n",
" [](https://github.com/slp-rl/HebTTS) \n",
"\n",
"\n",
"\n",
"\n",
"---\n",
"\n",
"\n"
],
"metadata": {
"id": "3wmzNy2fDSsr"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "56kkbeJahIe1"
},
"outputs": [],
"source": [
"#@title Install 鈥嶐煉籠n",
"\n",
"%%capture\n",
"! git clone https://github.com/slp-rl/HebTTS.git\n",
"\n",
"! pip install torch torchaudio\n",
"! pip install torchmetrics\n",
"! pip install omegaconf\n",
"! pip install git+https://github.com/lhotse-speech/lhotse\n",
"! pip install librosa\n",
"! pip install encodec\n",
"! pip install phonemizer\n",
"! pip install audiocraft # optional\n",
"! gdown https://drive.google.com/uc?id=11NoOJzMLRX9q1C_Q4sX0w2b9miiDjGrv\n",
"\n",
"from pathlib import Path\n",
"import glob\n",
"import os\n",
"from IPython.display import Audio, display\n",
"from pathlib import Path\n",
"\n",
"\n",
"def display_audio(prompt_file):\n",
" l=100\n",
" speaker = os.path.basename(os.path.dirname((os.path.dirname(prompt_file))))\n",
" audio_files = list(Path(os.path.join(os.path.dirname(prompt_file))).rglob(\"*.wav\"))\n",
" print(f\"Prompt: {open(prompt_file).read()}\")\n",
" print(f\"Speaker: {speaker}\")\n",
" print(\"-\"*100)\n",
"\n",
" for audio_file in audio_files:\n",
" display(Audio(audio_file, autoplay=False))\n",
" print(\"-\"*100)\n",
" print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uIBF-IFwxLjL",
"cellView": "form"
},
"outputs": [],
"source": [
"# @title Enter Hebrew text for generation\n",
"# @markdown You can concatenate prompts with '|' to generate few samples at once\n",
"hebrew_text_to_generate = \"讛讬讬 诪讛 拽讜专讛 | 讜讘砖讘讬诇 诇讛讘讬谉 诇诪讛 诪讞讬专 讛讚诇拽 讻诇 讻讱 注诇讛 爪专讬讱 诇讞讝讜专 砖谞转讬讬诐 讗讞专讜谞讬转\" # @param {type:\"string\"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RFrYpBJsxQW3",
"cellView": "form"
},
"outputs": [],
"source": [
"\n",
"# @title Choose speaker\n",
"speaker = \"shaul\" # @param [\"\\\"osim\\\"\", \"\\\"geek\\\"\", \"\\\"shaul\\\"\"] {type:\"raw\", allow-input: true}"
]
},
{
"cell_type": "markdown",
"source": [
"**Note:** inference may be slower based on the allocated resources by google colab. It is recomended to choose a gpu in `Edit` -> `Notebook settings` -> `Hardware accelerator`."
],
"metadata": {
"id": "NMvd3VytEJEZ"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "U8Hszx6axgns"
},
"outputs": [],
"source": [
"#@title Generate!\n",
"from pathlib import Path\n",
"\n",
"speaker_dir = Path(f\"./out/{speaker}\")\n",
"speaker_dir.mkdir(exist_ok=True, parents=True)\n",
"i=0\n",
"if any(speaker_dir.iterdir()):\n",
" max(map(lambda x:int(x.name),speaker_dir.glob(\"*\")))+1\n",
"output_dir = speaker_dir / f\"{i}\"\n",
"output_dir.mkdir(exist_ok=True, parents=True)\n",
"prompt_file = output_dir / \"prompt.txt\"\n",
"with open(prompt_file, \"w\") as f:\n",
" f.write(hebrew_text_to_generate)\n",
"\n",
"! python HebTTS/infer.py \\\n",
" --checkpoint checkpoint.pt \\\n",
" --output-dir $output_dir \\\n",
" --text \"$hebrew_text_to_generate\" \\\n",
" --speaker \"$speaker\" \\\n",
" --speaker-yaml HebTTS/speakers/speakers.yaml \\\n",
" --tokens-file HebTTS/tokenizer/unique_words_tokens_all.k2symbols \\\n",
" --vocab-file HebTTS/tokenizer/vocab.txt \\\n",
" --mbd True\n",
"\n",
"display_audio(prompt_file)"
]
},
{
"cell_type": "code",
"source": [
"#@title Listen\n",
"for prompt_file in glob.glob(\"./out/*/*/prompt.txt\"):\n",
" display_audio(prompt_file)"
],
"metadata": {
"cellView": "form",
"id": "CXiMWmNxJzNZ"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"nbformat": 4,
"nbformat_minor": 0
}