{ "cells": [ { "cell_type": "markdown", "source": [ "# HebTTS Quickstart\n", "We created a jupyter notebook in order to easily generate your desired samples!\n", "\n", "\n", "  \n", "  \n", " [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1f3-6Dqbna9_hI5C9V4qTIG05dixW-r72?usp=sharing)  \n", " [![Open In Colab](https://badges.aleen42.com/src/github.svg)](https://github.com/slp-rl/HebTTS)  \n", "\n", "\n", "\n", "\n", "---\n", "\n", "\n" ], "metadata": { "id": "3wmzNy2fDSsr" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "56kkbeJahIe1" }, "outputs": [], "source": [ "#@title Install 鈥嶐煉籠n", "\n", "%%capture\n", "! git clone https://github.com/slp-rl/HebTTS.git\n", "\n", "! pip install torch torchaudio\n", "! pip install torchmetrics\n", "! pip install omegaconf\n", "! pip install git+https://github.com/lhotse-speech/lhotse\n", "! pip install librosa\n", "! pip install encodec\n", "! pip install phonemizer\n", "! pip install audiocraft # optional\n", "! gdown https://drive.google.com/uc?id=11NoOJzMLRX9q1C_Q4sX0w2b9miiDjGrv\n", "\n", "from pathlib import Path\n", "import glob\n", "import os\n", "from IPython.display import Audio, display\n", "from pathlib import Path\n", "\n", "\n", "def display_audio(prompt_file):\n", " l=100\n", " speaker = os.path.basename(os.path.dirname((os.path.dirname(prompt_file))))\n", " audio_files = list(Path(os.path.join(os.path.dirname(prompt_file))).rglob(\"*.wav\"))\n", " print(f\"Prompt: {open(prompt_file).read()}\")\n", " print(f\"Speaker: {speaker}\")\n", " print(\"-\"*100)\n", "\n", " for audio_file in audio_files:\n", " display(Audio(audio_file, autoplay=False))\n", " print(\"-\"*100)\n", " print()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "uIBF-IFwxLjL", "cellView": "form" }, "outputs": [], "source": [ "# @title Enter Hebrew text for generation\n", "# @markdown You can concatenate prompts with '|' to generate few samples at once\n", "hebrew_text_to_generate = \"讛讬讬 诪讛 拽讜专讛 | 讜讘砖讘讬诇 诇讛讘讬谉 诇诪讛 诪讞讬专 讛讚诇拽 讻诇 讻讱 注诇讛 爪专讬讱 诇讞讝讜专 砖谞转讬讬诐 讗讞专讜谞讬转\" # @param {type:\"string\"}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "RFrYpBJsxQW3", "cellView": "form" }, "outputs": [], "source": [ "\n", "# @title Choose speaker\n", "speaker = \"shaul\" # @param [\"\\\"osim\\\"\", \"\\\"geek\\\"\", \"\\\"shaul\\\"\"] {type:\"raw\", allow-input: true}" ] }, { "cell_type": "markdown", "source": [ "**Note:** inference may be slower based on the allocated resources by google colab. It is recomended to choose a gpu in `Edit` -> `Notebook settings` -> `Hardware accelerator`." ], "metadata": { "id": "NMvd3VytEJEZ" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "U8Hszx6axgns" }, "outputs": [], "source": [ "#@title Generate!\n", "from pathlib import Path\n", "\n", "speaker_dir = Path(f\"./out/{speaker}\")\n", "speaker_dir.mkdir(exist_ok=True, parents=True)\n", "i=0\n", "if any(speaker_dir.iterdir()):\n", " max(map(lambda x:int(x.name),speaker_dir.glob(\"*\")))+1\n", "output_dir = speaker_dir / f\"{i}\"\n", "output_dir.mkdir(exist_ok=True, parents=True)\n", "prompt_file = output_dir / \"prompt.txt\"\n", "with open(prompt_file, \"w\") as f:\n", " f.write(hebrew_text_to_generate)\n", "\n", "! python HebTTS/infer.py \\\n", " --checkpoint checkpoint.pt \\\n", " --output-dir $output_dir \\\n", " --text \"$hebrew_text_to_generate\" \\\n", " --speaker \"$speaker\" \\\n", " --speaker-yaml HebTTS/speakers/speakers.yaml \\\n", " --tokens-file HebTTS/tokenizer/unique_words_tokens_all.k2symbols \\\n", " --vocab-file HebTTS/tokenizer/vocab.txt \\\n", " --mbd True\n", "\n", "display_audio(prompt_file)" ] }, { "cell_type": "code", "source": [ "#@title Listen\n", "for prompt_file in glob.glob(\"./out/*/*/prompt.txt\"):\n", " display_audio(prompt_file)" ], "metadata": { "cellView": "form", "id": "CXiMWmNxJzNZ" }, "execution_count": null, "outputs": [] } ], "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "nbformat": 4, "nbformat_minor": 0 }