Upload 12 files

Browse files

Files changed (13) hide show

.gitattributes +1 -0
LICENSE +395 -0
README.md +96 -3
api.py +63 -0
app.py +179 -0
environment.yml +127 -0
get_phone_mapped_python.py +75 -0
inference.py +153 -0
license.pdf +3 -0
multilingualcharmap.json +1 -0
requirements.txt +10 -0
start.sh +6 -0
text_preprocess_for_inference.py +949 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+license.pdf filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,395 @@

+Attribution 4.0 International
+=======================================================================
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of
+Creative Commons public licenses does not create a lawyer-client or
+other relationship. Creative Commons makes its licenses and related
+information available on an "as-is" basis. Creative Commons gives no
+warranties regarding its licenses, any material licensed under their
+terms and conditions, or any related information. Creative Commons
+disclaims all liability for damages resulting from their use to the
+fullest extent possible.
+Using Creative Commons Public Licenses
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share
+original works of authorship and other material subject to copyright
+and certain other rights specified in the public license below. The
+following considerations are for informational purposes only, are not
+exhaustive, and do not form part of our licenses.
+     Considerations for licensors: Our public licenses are
+     intended for use by those authorized to give the public
+     permission to use material in ways otherwise restricted by
+     copyright and certain other rights. Our licenses are
+     irrevocable. Licensors should read and understand the terms
+     and conditions of the license they choose before applying it.
+     Licensors should also secure all rights necessary before
+     applying our licenses so that the public can reuse the
+     material as expected. Licensors should clearly mark any
+     material not subject to the license. This includes other CC-
+     licensed material, or material used under an exception or
+     limitation to copyright. More considerations for licensors:
+	wiki.creativecommons.org/Considerations_for_licensors
+     Considerations for the public: By using one of our public
+     licenses, a licensor grants the public permission to use the
+     licensed material under specified terms and conditions. If
+     the licensor's permission is not necessary for any reason--for
+     example, because of any applicable exception or limitation to
+     copyright--then that use is not regulated by the license. Our
+     licenses grant only permissions under copyright and certain
+     other rights that a licensor has authority to grant. Use of
+     the licensed material may still be restricted for other
+     reasons, including because others have copyright or other
+     rights in the material. A licensor may make special requests,
+     such as asking that all changes be marked or described.
+     Although not required by our licenses, you are encouraged to
+     respect those requests where reasonable. More_considerations
+     for the public:
+	wiki.creativecommons.org/Considerations_for_licensees
+=======================================================================
+Creative Commons Attribution 4.0 International Public License
+By exercising the Licensed Rights (defined below), You accept and agree
+to be bound by the terms and conditions of this Creative Commons
+Attribution 4.0 International Public License ("Public License"). To the
+extent this Public License may be interpreted as a contract, You are
+granted the Licensed Rights in consideration of Your acceptance of
+these terms and conditions, and the Licensor grants You such rights in
+consideration of benefits the Licensor receives from making the
+Licensed Material available under these terms and conditions.
+Section 1 -- Definitions.
+  a. Adapted Material means material subject to Copyright and Similar
+     Rights that is derived from or based upon the Licensed Material
+     and in which the Licensed Material is translated, altered,
+     arranged, transformed, or otherwise modified in a manner requiring
+     permission under the Copyright and Similar Rights held by the
+     Licensor. For purposes of this Public License, where the Licensed
+     Material is a musical work, performance, or sound recording,
+     Adapted Material is always produced where the Licensed Material is
+     synched in timed relation with a moving image.
+  b. Adapter's License means the license You apply to Your Copyright
+     and Similar Rights in Your contributions to Adapted Material in
+     accordance with the terms and conditions of this Public License.
+  c. Copyright and Similar Rights means copyright and/or similar rights
+     closely related to copyright including, without limitation,
+     performance, broadcast, sound recording, and Sui Generis Database
+     Rights, without regard to how the rights are labeled or
+     categorized. For purposes of this Public License, the rights
+     specified in Section 2(b)(1)-(2) are not Copyright and Similar
+     Rights.
+  d. Effective Technological Measures means those measures that, in the
+     absence of proper authority, may not be circumvented under laws
+     fulfilling obligations under Article 11 of the WIPO Copyright
+     Treaty adopted on December 20, 1996, and/or similar international
+     agreements.
+  e. Exceptions and Limitations means fair use, fair dealing, and/or
+     any other exception or limitation to Copyright and Similar Rights
+     that applies to Your use of the Licensed Material.
+  f. Licensed Material means the artistic or literary work, database,
+     or other material to which the Licensor applied this Public
+     License.
+  g. Licensed Rights means the rights granted to You subject to the
+     terms and conditions of this Public License, which are limited to
+     all Copyright and Similar Rights that apply to Your use of the
+     Licensed Material and that the Licensor has authority to license.
+  h. Licensor means the individual(s) or entity(ies) granting rights
+     under this Public License.
+  i. Share means to provide material to the public by any means or
+     process that requires permission under the Licensed Rights, such
+     as reproduction, public display, public performance, distribution,
+     dissemination, communication, or importation, and to make material
+     available to the public including in ways that members of the
+     public may access the material from a place and at a time
+     individually chosen by them.
+  j. Sui Generis Database Rights means rights other than copyright
+     resulting from Directive 96/9/EC of the European Parliament and of
+     the Council of 11 March 1996 on the legal protection of databases,
+     as amended and/or succeeded, as well as other essentially
+     equivalent rights anywhere in the world.
+  k. You means the individual or entity exercising the Licensed Rights
+     under this Public License. Your has a corresponding meaning.
+Section 2 -- Scope.
+  a. License grant.
+       1. Subject to the terms and conditions of this Public License,
+          the Licensor hereby grants You a worldwide, royalty-free,
+          non-sublicensable, non-exclusive, irrevocable license to
+          exercise the Licensed Rights in the Licensed Material to:
+            a. reproduce and Share the Licensed Material, in whole or
+               in part; and
+            b. produce, reproduce, and Share Adapted Material.
+       2. Exceptions and Limitations. For the avoidance of doubt, where
+          Exceptions and Limitations apply to Your use, this Public
+          License does not apply, and You do not need to comply with
+          its terms and conditions.
+       3. Term. The term of this Public License is specified in Section
+          6(a).
+       4. Media and formats; technical modifications allowed. The
+          Licensor authorizes You to exercise the Licensed Rights in
+          all media and formats whether now known or hereafter created,
+          and to make technical modifications necessary to do so. The
+          Licensor waives and/or agrees not to assert any right or
+          authority to forbid You from making technical modifications
+          necessary to exercise the Licensed Rights, including
+          technical modifications necessary to circumvent Effective
+          Technological Measures. For purposes of this Public License,
+          simply making modifications authorized by this Section 2(a)
+          (4) never produces Adapted Material.
+       5. Downstream recipients.
+            a. Offer from the Licensor -- Licensed Material. Every
+               recipient of the Licensed Material automatically
+               receives an offer from the Licensor to exercise the
+               Licensed Rights under the terms and conditions of this
+               Public License.
+            b. No downstream restrictions. You may not offer or impose
+               any additional or different terms or conditions on, or
+               apply any Effective Technological Measures to, the
+               Licensed Material if doing so restricts exercise of the
+               Licensed Rights by any recipient of the Licensed
+               Material.
+       6. No endorsement. Nothing in this Public License constitutes or
+          may be construed as permission to assert or imply that You
+          are, or that Your use of the Licensed Material is, connected
+          with, or sponsored, endorsed, or granted official status by,
+          the Licensor or others designated to receive attribution as
+          provided in Section 3(a)(1)(A)(i).
+  b. Other rights.
+       1. Moral rights, such as the right of integrity, are not
+          licensed under this Public License, nor are publicity,
+          privacy, and/or other similar personality rights; however, to
+          the extent possible, the Licensor waives and/or agrees not to
+          assert any such rights held by the Licensor to the limited
+          extent necessary to allow You to exercise the Licensed
+          Rights, but not otherwise.
+       2. Patent and trademark rights are not licensed under this
+          Public License.
+       3. To the extent possible, the Licensor waives any right to
+          collect royalties from You for the exercise of the Licensed
+          Rights, whether directly or through a collecting society
+          under any voluntary or waivable statutory or compulsory
+          licensing scheme. In all other cases the Licensor expressly
+          reserves any right to collect such royalties.
+Section 3 -- License Conditions.
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+  a. Attribution.
+       1. If You Share the Licensed Material (including in modified
+          form), You must:
+            a. retain the following if it is supplied by the Licensor
+               with the Licensed Material:
+                 i. identification of the creator(s) of the Licensed
+                    Material and any others designated to receive
+                    attribution, in any reasonable manner requested by
+                    the Licensor (including by pseudonym if
+                    designated);
+                ii. a copyright notice;
+               iii. a notice that refers to this Public License;
+                iv. a notice that refers to the disclaimer of
+                    warranties;
+                 v. a URI or hyperlink to the Licensed Material to the
+                    extent reasonably practicable;
+            b. indicate if You modified the Licensed Material and
+               retain an indication of any previous modifications; and
+            c. indicate the Licensed Material is licensed under this
+               Public License, and include the text of, or the URI or
+               hyperlink to, this Public License.
+       2. You may satisfy the conditions in Section 3(a)(1) in any
+          reasonable manner based on the medium, means, and context in
+          which You Share the Licensed Material. For example, it may be
+          reasonable to satisfy the conditions by providing a URI or
+          hyperlink to a resource that includes the required
+          information.
+       3. If requested by the Licensor, You must remove any of the
+          information required by Section 3(a)(1)(A) to the extent
+          reasonably practicable.
+       4. If You Share Adapted Material You produce, the Adapter's
+          License You apply must not prevent recipients of the Adapted
+          Material from complying with this Public License.
+Section 4 -- Sui Generis Database Rights.
+Where the Licensed Rights include Sui Generis Database Rights that
+apply to Your use of the Licensed Material:
+  a. for the avoidance of doubt, Section 2(a)(1) grants You the right
+     to extract, reuse, reproduce, and Share all or a substantial
+     portion of the contents of the database;
+  b. if You include all or a substantial portion of the database
+     contents in a database in which You have Sui Generis Database
+     Rights, then the database in which You have Sui Generis Database
+     Rights (but not its individual contents) is Adapted Material; and
+  c. You must comply with the conditions in Section 3(a) if You Share
+     all or a substantial portion of the contents of the database.
+For the avoidance of doubt, this Section 4 supplements and does not
+replace Your obligations under this Public License where the Licensed
+Rights include other Copyright and Similar Rights.
+Section 5 -- Disclaimer of Warranties and Limitation of Liability.
+  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
+     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
+     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
+     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
+     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
+     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
+     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
+     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
+     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
+     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
+  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
+     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
+     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
+     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
+     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
+     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
+     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
+     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
+     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
+  c. The disclaimer of warranties and limitation of liability provided
+     above shall be interpreted in a manner that, to the extent
+     possible, most closely approximates an absolute disclaimer and
+     waiver of all liability.
+Section 6 -- Term and Termination.
+  a. This Public License applies for the term of the Copyright and
+     Similar Rights licensed here. However, if You fail to comply with
+     this Public License, then Your rights under this Public License
+     terminate automatically.
+  b. Where Your right to use the Licensed Material has terminated under
+     Section 6(a), it reinstates:
+       1. automatically as of the date the violation is cured, provided
+          it is cured within 30 days of Your discovery of the
+          violation; or
+       2. upon express reinstatement by the Licensor.
+     For the avoidance of doubt, this Section 6(b) does not affect any
+     right the Licensor may have to seek remedies for Your violations
+     of this Public License.
+  c. For the avoidance of doubt, the Licensor may also offer the
+     Licensed Material under separate terms or conditions or stop
+     distributing the Licensed Material at any time; however, doing so
+     will not terminate this Public License.
+  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
+     License.
+Section 7 -- Other Terms and Conditions.
+  a. The Licensor shall not be bound by any additional or different
+     terms or conditions communicated by You unless expressly agreed.
+  b. Any arrangements, understandings, or agreements regarding the
+     Licensed Material not stated herein are separate from and
+     independent of the terms and conditions of this Public License.
+Section 8 -- Interpretation.
+  a. For the avoidance of doubt, this Public License does not, and
+     shall not be interpreted to, reduce, limit, restrict, or impose
+     conditions on any use of the Licensed Material that could lawfully
+     be made without permission under this Public License.
+  b. To the extent possible, if any provision of this Public License is
+     deemed unenforceable, it shall be automatically reformed to the
+     minimum extent necessary to make it enforceable. If the provision
+     cannot be reformed, it shall be severed from this Public License
+     without affecting the enforceability of the remaining terms and
+     conditions.
+  c. No term or condition of this Public License will be waived and no
+     failure to comply consented to unless expressly agreed to by the
+     Licensor.
+  d. Nothing in this Public License constitutes or may be interpreted
+     as a limitation upon, or waiver of, any privileges and immunities
+     that apply to the Licensor or You, including from the legal
+     processes of any jurisdiction or authority.
+=======================================================================
+Creative Commons is not a party to its public
+licenses. Notwithstanding, Creative Commons may elect to apply one of
+its public licenses to material it publishes and in those instances
+will be considered the “Licensor.” The text of the Creative Commons
+public licenses is dedicated to the public domain under the CC0 Public
+Domain Dedication. Except for the limited purpose of indicating that
+material is shared under a Creative Commons public license or as
+otherwise permitted by the Creative Commons policies published at
+creativecommons.org/policies, Creative Commons does not authorize the
+use of the trademark "Creative Commons" or any other trademark or logo
+of Creative Commons without its prior written consent including,
+without limitation, in connection with any unauthorized modifications
+to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For
+the avoidance of doubt, this paragraph does not form part of the
+public licenses.
+Creative Commons may be contacted at creativecommons.org.

README.md CHANGED Viewed

@@ -1,3 +1,96 @@
----
-license: cc-by-4.0
----

+# Latest Fastspeech2 Models using FLAT Start
+This repository branch `(New-Models)` contains new and high quality Fastspeech2 Models for Indian languages implemented using the Flat Start for speech synthesis. The models are capable of generating mel-spectrograms from text inputs and can be used to synthesize speech.
+**NOTE: The main branch became large in size and underwent few changes in the inference and preprocessing scripts, necessitating the creation of a separate branch. Training information and the script will be shared after further code optimization and footprint reduction.**
+Clone this branch using the command:
+```
+git clone -b New-Models --single-branch https://github.com/smtiitm/Fastspeech2_HS.git
+```
+The Repo is large in size. New Models are in "language"_latest folder.
+## Model Files
+The model for each language includes the following files:
+- `config.yaml`: Configuration file for the Fastspeech2 Model.
+- `energy_stats.npz`: Energy statistics for normalization during synthesis.
+- `feats_stats.npz`: Features statistics for normalization during synthesis.
+- `feats_type`: Features type information.
+- `pitch_stats.npz`: Pitch statistics for normalization during synthesis.
+- `model.pth`: Pre-trained Fastspeech2 model weights.
+## Installation
+1. Install [Miniconda](https://docs.conda.io/projects/miniconda/en/latest/) first. Create a conda environment using the provided `environment.yml` file:
+```shell
+conda env create -f environment.yml
+```
+2.Activate the conda environment (check inside environment.yaml file):
+```shell
+conda activate tts-hs-hifigan
+```
+3.  Install PyTorch separately (you can install the specific version based on your requirements):
+```shell
+conda install pytorch cudatoolkit
+pip install torchaudio
+```
+## Vocoder
+For generating WAV files from mel-spectrograms, you can use a vocoder of your choice. One popular option is the [HIFIGAN](https://github.com/jik876/hifi-gan) vocoder (Clone this repo and put it in the current working directory). Please refer to the documentation of the vocoder you choose for installation and usage instructions.
+(**We have used the HIFIGAN V1 vocoder and have provided Vocoder for few languages in the Vocoder folder. If needed, make sure to adjust the path in the inference file.**)
+## Usage
+The directory paths are Relative. ( But if needed, Make changes to **text_preprocess_for_inference.py** and **inference.py** file, Update folder/file paths wherever required.)
+**Please give language/gender in small cases and sample text between quotes. Adjust output speed using the alpha parameter (higher for slow voiced output and vice versa). Output argument is optional; the provide name will be used for the output file.**
+Use the inference file to synthesize speech from text inputs:
+```shell
+python inference.py --sample_text "Your input text here" --language <language> --gender <gender> --alpha <alpha> --output_file <file_name.wav OR path/to/file_name.wav>
+```
+**Example:**
+```
+python inference.py --sample_text "श्रीलंका और पाकिस्तान में खेला जा रहा एशिया कप अब तक का सबसे विवादित टूर्नामेंट होता जा रहा है।" --language hindi_latest --gender male --alpha 1 --output_file male_hindi_output.wav
+```
+The file will be stored as `male_hindi_output.wav` and will be inside current working directory. If **--output_file** argument is not given it will be stored as `<language>_<gender>_output.wav` in the current working directory.
+**Use "language"_latest in --language to use latest models.**
+### Citation
+If you use this Fastspeech2 Model in your research or work, please consider citing:
+“
+COPYRIGHT
+2024, Speech Technology Consortium,
+Bhashini, MeiTY and by Hema A Murthy & S Umesh,
+DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING
+and
+ELECTRICAL ENGINEERING,
+IIT MADRAS. ALL RIGHTS RESERVED "
+Shield: [![CC BY 4.0][cc-by-shield]][cc-by]
+This work is licensed under a
+[Creative Commons Attribution 4.0 International License][cc-by].
+[![CC BY 4.0][cc-by-image]][cc-by]
+[cc-by]: http://creativecommons.org/licenses/by/4.0/
+[cc-by-image]: https://i.creativecommons.org/l/by/4.0/88x31.png
+[cc-by-shield]: https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg

api.py ADDED Viewed

	@@ -0,0 +1,63 @@

+# TTS IITM SPEECH LAB
+import requests
+import json
+import base64
+text = "सुप्रभात, आप कैसे हैं?" # hindi
+# text = "സുപ്രഭാതം, സുഖമാ?" # malayalam
+# text = "সুপ্ৰভাত, তুমি কেনে?" # manipuri
+# text = "सुप्रभात, तुम्ही कसे आहात?" # marathi
+# text = "ಶುಭೋದಯ, ನೀವು ಹೇಗಿದ್ದೀರಿ?" # kannada
+# text = "बसु म्विथ्बो, बरि दिबाबो?" # bodo male not working <---
+# text = "Good morning, how are you?" # english
+# text = "সুপ্ৰভাত, আপুনি কেমন আছে?" # assamese
+# text = "காலை வணக்கம், நீங்கள் எப்படி இருக்கின்றீர்கள்?" # tamil
+# text = "ସୁପ୍ରଭାତ, ଆପଣ କେମିତି ଅଛନ୍ତି?" # odia male not working <---
+# text = "सुप्रभात, आप कैसे छो?" # rajasthani
+# text = "శుభోదయం, మీరు ఎలా ఉన్నారు?" # telugu
+# text = "সুপ্রভাত, আপনি কেমন আছেন?" # bengali male not working <---
+# text = "સુપ્રભાત, તમે કેમ છો?" # gujarati
+lang = 'hindi'
+gender = 'female'
+url = "http://localhost:4005/tts"
+# url = 'http://projects.respark.iitm.ac.in:8009/tts' # proxy
+payload = json.dumps({
+"input": text,
+"gender": gender,
+"lang": lang,
+"alpha": 1,
+"segmentwise":"True"
+})
+headers = {'Content-Type': 'application/json'}
+response = requests.request("POST", url, headers=headers, data=payload).json()
+audio = response['audio']
+file_name = "tts.mp3"
+wav_file = open(file_name,'wb')
+decode_string = base64.b64decode(audio)
+wav_file.write(decode_string)
+wav_file.close()
+'''
+Supported languages
+Assamese
+Bengali
+Bodo
+English
+Gujarati
+Hindi
+Kannada
+Malayalam
+Manipuri
+Marathi
+Odia
+Punjabi
+Rajasthani
+Tamil
+Telugu
+Urdu
+'''

app.py ADDED Viewed

	@@ -0,0 +1,179 @@

+from flask import Flask, render_template, request, send_file, jsonify
+import requests
+import json
+import ssl
+import logging
+import sys
+import os
+import base64
+import io
+#replace the path with your hifigan path to import Generator from models.py
+sys.path.append("hifigan")
+# import argparse
+import torch
+from espnet2.bin.tts_inference import Text2Speech
+from models import Generator
+from scipy.io.wavfile import write
+from meldataset import MAX_WAV_VALUE
+from env import AttrDict
+import json
+import yaml
+from text_preprocess_for_inference import TTSDurAlignPreprocessor
+# import time
+logging.basicConfig(filename='access.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+SAMPLING_RATE = 22050
+if torch.cuda.is_available():
+    device = "cuda"
+else:
+    device = "cpu"
+preprocessor = TTSDurAlignPreprocessor()
+app = Flask(__name__)
+# app.config['SECRET_KEY'] = 'key'
+# socketio = SocketIO(app)
+# @socketio.on('new_user')
+# def handle_new_user(data):
+#     client_id = data['id']
+#     # print('\n'+f"New user connected with ID: {client_id}")
+#     logging.info('\n'+f"New user connected with ID: {client_id}")
+def load_hifigan_vocoder(language, gender, device):
+    # Load HiFi-GAN vocoder configuration file and generator model for the specified language and gender
+    vocoder_config = f"vocoder/{gender}/aryan/hifigan/config.json"
+    vocoder_generator = f"vocoder/{gender}/aryan/hifigan/generator"
+    # Read the contents of the vocoder configuration file
+    with open(vocoder_config, 'r') as f:
+        data = f.read()
+    json_config = json.loads(data)
+    h = AttrDict(json_config)
+    torch.manual_seed(h.seed)
+    # Move the generator model to the specified device (CPU or GPU)
+    device = torch.device(device)
+    generator = Generator(h).to(device)
+    state_dict_g = torch.load(vocoder_generator, device)
+    generator.load_state_dict(state_dict_g['generator'])
+    generator.eval()
+    generator.remove_weight_norm()
+    # Return the loaded and prepared HiFi-GAN generator model
+    return generator
+def load_fastspeech2_model(language, gender, device):
+    #updating the config.yaml fiel based on language and gender
+    with open(f"{language}/{gender}/model/config.yaml", "r") as file:
+     config = yaml.safe_load(file)
+    current_working_directory = os.getcwd()
+    feat="model/feats_stats.npz"
+    pitch="model/pitch_stats.npz"
+    energy="model/energy_stats.npz"
+    feat_path=os.path.join(current_working_directory,language,gender,feat)
+    pitch_path=os.path.join(current_working_directory,language,gender,pitch)
+    energy_path=os.path.join(current_working_directory,language,gender,energy)
+    config["normalize_conf"]["stats_file"]  = feat_path
+    config["pitch_normalize_conf"]["stats_file"]  = pitch_path
+    config["energy_normalize_conf"]["stats_file"]  = energy_path
+    with open(f"{language}/{gender}/model/config.yaml", "w") as file:
+        yaml.dump(config, file)
+    tts_model = f"{language}/{gender}/model/model.pth"
+    tts_config = f"{language}/{gender}/model/config.yaml"
+    return Text2Speech(train_config=tts_config, model_file=tts_model, device=device)
+def text_synthesis(language, gender, sample_text, vocoder, MAX_WAV_VALUE, device, alpha=1):
+    # Perform Text-to-Speech synthesis
+    with torch.no_grad():
+        # Load the FastSpeech2 model for the specified language and gender
+        model = load_fastspeech2_model(language, gender, device)
+        # Generate mel-spectrograms from the input text using the FastSpeech2 model
+        out = model(sample_text, decode_conf={"alpha": alpha})
+        print("TTS Done")
+        x = out["feat_gen_denorm"].T.unsqueeze(0) * 2.3262
+        x = x.to(device)
+        # Use the HiFi-GAN vocoder to convert mel-spectrograms to raw audio waveforms
+        y_g_hat = vocoder(x)
+        audio = y_g_hat.squeeze()
+        audio = audio * MAX_WAV_VALUE
+        audio = audio.cpu().numpy().astype('int16')
+        # Return the synthesized audio
+        return audio
+def setup_app():
+    genders = ['male','female']
+    # to make dummy calls in all languages available
+    languages = {'hindi': "नमस्ते",'malayalam': "ഹലോ",'manipuri': "হ্যালো",'marathi': "हॅलो",'kannada': "ಹಲೋ",'bodo': "हॅलो",'english': "Hello",'assamese': "হ্যালো",'tamil': "ஹலோ",'odia': "ହେଲୋ",'rajasthani': "हॅलो",'telugu': "హలో",'bengali': "হ্যালো",'gujarati': "હલો"}
+    vocoders = {}
+    for gender in genders:
+        vocoders[gender]={}
+        for language,text in languages.items():
+            # Load the HiFi-GAN vocoder with dynamic language and gender
+            vocoder = load_hifigan_vocoder(language, gender, device)
+            vocoders[gender][language] = vocoder
+            # dummy calls
+            print(f"making dummy calls for {language} - {gender}")
+            try:
+                out = text_synthesis(language, gender, text, vocoder, MAX_WAV_VALUE, device)
+            except:
+                message = f"cannot make dummy call for {gender} - {language} <==================="
+                print(message.upper())
+    print("Server Started...")
+    return vocoders
+vocoders = setup_app()
+@app.route('/', methods=['GET'])
+def main():
+    return "IITM_TTS_V2"
+@app.route('/tts', methods=['GET', 'POST'], strict_slashes=False)
+def tts():
+    try:
+        json_data = request.get_json()
+        text = json_data["input"]
+        if not isinstance(text,str):
+            input_type = type(text)
+            ret = jsonify(status='failure', reason=f"Unsupported input type {input_type}. Input text should be in string format.")
+        gender = json_data["gender"]
+        language = json_data["lang"].lower()
+        alpha = json_data["alpha"]
+        # Preprocess the sample text
+        preprocessed_text, phrases = preprocessor.preprocess(text, language, gender)
+        preprocessed_text = " ".join(preprocessed_text)
+        vocoder = vocoders[gender][language]
+        out = text_synthesis(language, gender, preprocessed_text, vocoder, MAX_WAV_VALUE, device, alpha=alpha)
+        # output_file = f"{language}_{gender}_output.wav"
+        # write(output_file, SAMPLING_RATE, out)
+        # audio_wav_bytes = base64.b64encode(open(output_file, "rb").read())
+        # avoid saving file on disk
+        output_stream = io.BytesIO()
+        write(output_stream, SAMPLING_RATE, out)
+        audio_wav_bytes = base64.b64encode(output_stream.getvalue())
+        ret = jsonify(status="success",audio=audio_wav_bytes.decode('utf-8'))
+    except Exception as err:
+        ret = jsonify(status="failure", reason=str(err))
+    return ret
+if __name__ == '__main__':
+    # ssl_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
+    # ssl_context.load_cert_chain('./ssl2023/iitm2022.crt','./ssl2023/iitm2022.key')
+    app.run(host='0.0.0.0', port=4005, debug=True)

environment.yml ADDED Viewed

	@@ -0,0 +1,127 @@

+name: tts-hs-hifigan
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - ca-certificates=2022.10.11=h06a4308_0
+  - certifi=2022.9.24=py37h06a4308_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - ncurses=6.3=h5eee18b_3
+  - openssl=1.1.1s=h7f8727e_0
+  - pip=22.2.2=py37h06a4308_0
+  - python=3.7.15=haa1d7c7_0
+  - readline=8.2=h5eee18b_0
+  - setuptools=65.5.0=py37h06a4308_0
+  - sqlite=3.39.3=h5082296_0
+  - tk=8.6.12=h1ccaba5_0
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - xz=5.2.6=h5eee18b_0
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+    - aiosignal==1.3.1
+    - appdirs==1.4.4
+    - attrs==22.1.0
+    - audioread==3.0.0
+    - backcall==0.2.0
+    - cffi==1.15.1
+    - charset-normalizer==2.1.1
+    - ci-sdr==0.0.2
+    - click==8.0.4
+    - configargparse==1.5.3
+    - ctc-segmentation==1.7.4
+    - cycler==0.11.0
+    - cython==0.29.32
+    - decorator==5.1.1
+    - distance==0.1.3
+    - distlib==0.3.6
+    - docopt==0.6.2
+    - einops==0.6.0
+    - espnet==202209
+    - espnet-tts-frontend==0.0.3
+    - fast-bss-eval==0.1.3
+    - filelock==3.8.0
+    - flask==2.2.2
+    - fonttools==4.38.0
+    - frozenlist==1.3.3
+    - g2p-en==2.1.0
+    - grpcio==1.50.0
+    - gunicorn==20.1.0
+    - h5py==3.7.0
+    - humanfriendly==10.0
+    - idna==3.4
+    - importlib-metadata==4.13.0
+    - importlib-resources==5.10.0
+    - indic-num2words==1.0.1
+    - indic_unified_parser==1.0.6
+    - inflect==6.0.2
+    - ipython==7.34.0
+    - itsdangerous==2.1.2
+    - jaconv==0.3
+    - jamo==0.4.1
+    - jedi==0.18.2
+    - jinja2==3.1.2
+    - joblib==1.2.0
+    - jsonschema==4.17.0
+    - kaldiio==2.17.2
+    - kiwisolver==1.4.4
+    - librosa==0.9.2
+    - llvmlite==0.39.1
+    - markupsafe==2.1.1
+    - matplotlib==3.5.3
+    - matplotlib-inline==0.1.6
+    - msgpack==1.0.4
+    - nltk==3.7
+    - numba==0.56.4
+    - numpy==1.21.6
+    - packaging==21.3
+    - pandas==1.3.5
+    - parso==0.8.3
+    - pexpect==4.8.0
+    - pickleshare==0.7.5
+    - pillow==9.3.0
+    - pkgutil-resolve-name==1.3.10
+    - platformdirs==2.5.4
+    - pooch==1.6.0
+    - prompt-toolkit==3.0.36
+    - protobuf==3.20.1
+    - ptyprocess==0.7.0
+    - pycparser==2.21
+    - pydantic==1.10.2
+    - pydub==0.25.1
+    - pygments==2.14.0
+    - pyparsing==3.0.9
+    - pypinyin==0.44.0
+    - pyrsistent==0.19.2
+    - python-dateutil==2.8.2
+    - pytorch-wpe==0.0.1
+    - pytz==2022.6
+    - pyworld==0.3.2
+    - pyyaml==6.0
+    - ray==2.1.0
+    - regex==2022.10.31
+    - requests==2.28.1
+    - resampy==0.4.2
+    - scikit-learn==1.0.2
+    - scipy==1.7.3
+    - sentencepiece==0.1.97
+    - six==1.16.0
+    - soundfile==0.11.0
+    - threadpoolctl==3.1.0
+    - torch-complex==0.4.3
+    - tqdm==4.64.1
+    - traitlets==5.8.0
+    - typeguard==2.13.3
+    - typing-extensions==4.4.0
+    - unidecode==1.3.6
+    - urllib3==1.26.12
+    - virtualenv==20.16.7
+    - wcwidth==0.2.5
+    - webvtt-py==0.4.6
+    - werkzeug==2.2.2
+    - zipp==3.10.0
+prefix: /speech/Apps/Flask_app_env/conda_dir/envs/tts-hs-hifigan

get_phone_mapped_python.py ADDED Viewed

	@@ -0,0 +1,75 @@

+class TextReplacer:
+    def __init__(self):
+        self.replacements = {
+        'aa':'A',
+        'ae':'ऍ',
+        'ag':'ऽ',
+        'ai':'ऐ',
+        'au':'औ',
+        'axx':'अ',
+        'ax':'ऑ',
+        'bh':'B',
+        'ch':'C',
+        'dh':'ध',
+        'dxhq':'T',
+        'dxh':'ढ',
+        'dxq':'D',
+        'dx':'ड',
+        'ee':'E',
+        'ei':'ऐ',
+        'eu':'உ',
+        'gh':'घ',
+        'gq':'G',
+        'hq':'H',
+        'ii':'I',
+        'jh':'J',
+        'khq':'K',
+        'kh':'ख',
+        'kq':'क',
+        'ln':'ൾ',
+        'lw':'ൽ',
+        'lx':'ള',
+        'mq':'M',
+        'nd':'ऩ',
+        'ng':'ङ',
+        'nj':'ञ',
+        'nk':'Y',
+        'nn':'N',
+        'nw':'ൺ',
+        'nx':'ण',
+        'oo':'O',
+        'ou':'औ',
+        'ph':'P',
+        'rqw':'ॠ',
+        'rq':'R',
+        'rw':'ർ',
+        'rx':'ऱ',
+        'sh':'श',
+        'sx':'ष',
+        'txh':'ठ',
+        'th':'थ',
+        'tx':'ट',
+        'uu':'U',
+        'wv':'W',
+        'zh':'Z'
+    # ... Add more replacements as needed
+        }
+    def apply_replacements(self, text):
+        for key, value in self.replacements.items():
+            # print('KEY AND VALUE OF PARSED OUTPUT',key, value)
+            text = text.replace(key, value)
+        temp=""
+        for i in range(len(text)):
+            if text[i]!=" ":
+                temp=temp+text[i]
+        return temp
+    def apply_replacements_by_phonems(self, text):
+        ans=self.replacements[text]
+        # for key, value in self.replacements.items():
+        #     # print('KEY AND VALUE OF PARSED OUTPUT',key, value)
+        #     text = text.replace(key, value)
+        return ans

inference.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import sys
+import os
+#replace the path with your hifigan path to import Generator from models.py
+sys.path.append("hifigan")
+import argparse
+import torch
+from espnet2.bin.tts_inference import Text2Speech
+from models import Generator
+from scipy.io.wavfile import write
+from meldataset import MAX_WAV_VALUE
+from env import AttrDict
+import json
+import yaml
+import concurrent.futures
+import numpy as np
+import time
+from text_preprocess_for_inference import TTSDurAlignPreprocessor, CharTextPreprocessor, TTSPreprocessor
+SAMPLING_RATE = 48000
+def load_hifigan_vocoder(language, gender, device):
+    # Load HiFi-GAN vocoder configuration file and generator model for the specified language and gender
+    vocoder_config = f"vocoder/{gender}/{language}/config.json"
+    vocoder_generator = f"vocoder/{gender}/{language}/generator"
+    # Read the contents of the vocoder configuration file
+    with open(vocoder_config, 'r') as f:
+        data = f.read()
+    json_config = json.loads(data)
+    h = AttrDict(json_config)
+    torch.manual_seed(h.seed)
+    # Move the generator model to the specified device (CPU or GPU)
+    device = torch.device(device)
+    generator = Generator(h).to(device)
+    state_dict_g = torch.load(vocoder_generator, device)
+    generator.load_state_dict(state_dict_g['generator'])
+    generator.eval()
+    generator.remove_weight_norm()
+    # Return the loaded and prepared HiFi-GAN generator model
+    return generator
+def load_fastspeech2_model(language, gender, device):
+    #updating the config.yaml fiel based on language and gender
+    with open(f"{language}/{gender}/model/config.yaml", "r") as file:
+     config = yaml.safe_load(file)
+    current_working_directory = os.getcwd()
+    feat="model/feats_stats.npz"
+    pitch="model/pitch_stats.npz"
+    energy="model/energy_stats.npz"
+    feat_path=os.path.join(current_working_directory,language,gender,feat)
+    pitch_path=os.path.join(current_working_directory,language,gender,pitch)
+    energy_path=os.path.join(current_working_directory,language,gender,energy)
+    config["normalize_conf"]["stats_file"]  = feat_path
+    config["pitch_normalize_conf"]["stats_file"]  = pitch_path
+    config["energy_normalize_conf"]["stats_file"]  = energy_path
+    with open(f"{language}/{gender}/model/config.yaml", "w") as file:
+        yaml.dump(config, file)
+    tts_model = f"{language}/{gender}/model/model.pth"
+    tts_config = f"{language}/{gender}/model/config.yaml"
+    return Text2Speech(train_config=tts_config, model_file=tts_model, device=device)
+def text_synthesis(language, gender, sample_text, vocoder, MAX_WAV_VALUE, device, alpha):
+    # Perform Text-to-Speech synthesis
+    with torch.no_grad():
+        # Load the FastSpeech2 model for the specified language and gender
+        model = load_fastspeech2_model(language, gender, device)
+        # Generate mel-spectrograms from the input text using the FastSpeech2 model
+        out = model(sample_text, decode_conf={"alpha": alpha})
+        print("TTS Done")
+        x = out["feat_gen_denorm"].T.unsqueeze(0) * 2.3262
+        x = x.to(device)
+        # Use the HiFi-GAN vocoder to convert mel-spectrograms to raw audio waveforms
+        y_g_hat = vocoder(x)
+        audio = y_g_hat.squeeze()
+        audio = audio * MAX_WAV_VALUE
+        audio = audio.cpu().numpy().astype('int16')
+        # Return the synthesized audio
+        return audio
+def split_into_chunks(text, words_per_chunk=100):
+    words = text.split()
+    chunks = [words[i:i + words_per_chunk] for i in range(0, len(words), words_per_chunk)]
+    return [' '.join(chunk) for chunk in chunks]
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Text-to-Speech Inference")
+    parser.add_argument("--language", type=str, required=True, help="Language (e.g., hindi)")
+    parser.add_argument("--gender", type=str, required=True, help="Gender (e.g., female)")
+    parser.add_argument("--sample_text", type=str, required=True, help="Text to be synthesized")
+    parser.add_argument("--output_file", type=str, default="", help="Output WAV file path")
+    parser.add_argument("--alpha", type=float, default=1, help="Alpha Parameter for speed control (e.g. 1.1 (slow) or 0.8 (fast))")
+    args = parser.parse_args()
+    phone_dictionary = {}
+    # Set the device
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Load the HiFi-GAN vocoder with dynamic language and gender
+    vocoder = load_hifigan_vocoder(args.language, args.gender, device)
+    if args.language == "urdu" or args.language == "punjabi":
+            preprocessor = CharTextPreprocessor()
+    elif args.language == "english":
+            preprocessor = TTSPreprocessor()
+    else:
+            preprocessor = TTSDurAlignPreprocessor()
+    start_time = time.time()
+    audio_arr = []
+    result = split_into_chunks(args.sample_text)
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        # Process each text sample concurrently
+        for sample_text in result:
+            # Preprocess the text and obtain a list of phrases
+            preprocessed_text, phrases = preprocessor.preprocess(sample_text, args.language, args.gender, phone_dictionary)
+            preprocessed_text = " ".join(preprocessed_text)
+            # Generate audio from the preprocessed text using a text-to-speech synthesis function
+            audio = text_synthesis(args.language, args.gender, preprocessed_text, vocoder, MAX_WAV_VALUE, device, args.alpha)
+            # Set the output file name
+            if args.output_file:
+                output_file = f"{args.output_file}"
+            else:
+                output_file = f"{args.language}_{args.gender}_output.wav"
+            # Append the generated audio to the list
+            audio_arr.append(audio)
+    result_array = np.concatenate(audio_arr, axis=0)
+    write(output_file, SAMPLING_RATE, result_array)

license.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e45a02755dcbb6015e3ff0a8e6de54a929ea5a85233e49773cb8c0fd6177b6ae
+size 138348

multilingualcharmap.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"assamese_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "assamese_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "bengali_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "b", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "bengali_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "b", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "bodo_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "y", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0921", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0921", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "gujarati_male": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "gujarati_female": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "hindi_male": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "\u0915", "K": "K", "G": "G", "z": "z", "D": "D", "T": "T", "f": "f", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "hindi_female": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "\u0915", "K": "K", "G": "G", "z": "z", "D": "D", "T": "T", "f": "f", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "kannada_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "kannada_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "malayalam_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "\u0930", "M": "n", "q": "q", "H": "H", "Z": "Z", "\u0928": "n", "N": "N", "\u0d7e": "\u0d7e", "\u0d7d": "\u0d7d", "\u0d7a": "\u0d7a", "\u0d7c": "\u0d7c", "\u0960": "R"}, "malayalam_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "\u0930", "M": "n", "q": "q", "H": "H", "Z": "Z", "\u0928": "n", "N": "N", "\u0d7e": "\u0d7e", "\u0d7d": "\u0d7d", "\u0d7a": "\u0d7a", "\u0d7c": "\u0d7c", "\u0960": "R"}, "manipuri_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "r", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "g", "\u0919": "\u0919", "c": "c", "C": "c", "j": "j", "J": "j", "\u091e": "y", "\u091f": "\u091f", "\u0920": "\u091f", "\u0921": "\u091f", "\u0922": "\u091f", "\u0923": "n", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "d", "n": "n", "p": "p", "P": "P", "b": "b", "B": "b", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0936", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u091f", "T": "\u091f", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "n", "\u0d7c": "r", "\u0960": "r"}, "manipuri_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "r", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "g", "\u0919": "\u0919", "c": "c", "C": "c", "j": "j", "J": "j", "\u091e": "y", "\u091f": "\u091f", "\u0920": "\u091f", "\u0921": "\u091f", "\u0922": "\u091f", "\u0923": "n", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "d", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u091f", "T": "\u091f", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "n", "\u0d7c": "r", "\u0960": "r"}, "marathi_male": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "f", "\u0930": "\u0930", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "marathi_female": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "z", "D": "\u0921", "T": "\u0922", "f": "f", "\u0930": "\u0930", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "odia_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "T", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "odia_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "E", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "T", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "rajasthani_male": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "z", "D": "D", "T": "T", "f": "f", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "rajasthani_female": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "y", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "z", "D": "D", "T": "\u0922", "f": "f", "\u0930": "r", "M": "n", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "tamil_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "\u0b89", "U": "U", "R": "r", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "k", "g": "g", "\u0918": "g", "\u0919": "\u0919", "c": "c", "C": "c", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u091f", "\u0921": "\u0921", "\u0922": "\u0921", "\u0923": "\u0923", "t": "t", "\u0925": "t", "d": "d", "\u0927": "d", "n": "n", "p": "p", "P": "p", "b": "b", "B": "b", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0937", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "k", "G": "g", "z": "j", "D": "\u0921", "T": "\u0921", "f": "f", "\u0930": "\u0930", "M": "n", "q": "n", "H": "h", "Z": "Z", "\u0928": "\u0928", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "r"}, "tamil_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "\u0b89", "U": "U", "R": "r", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "k", "g": "g", "\u0918": "g", "\u0919": "\u0919", "c": "c", "C": "c", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u091f", "\u0921": "\u0921", "\u0922": "\u0921", "\u0923": "\u0923", "t": "t", "\u0925": "t", "d": "d", "\u0927": "d", "n": "n", "p": "p", "P": "p", "b": "b", "B": "b", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0937", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "k", "G": "g", "z": "j", "D": "\u0921", "T": "\u0921", "f": "f", "\u0930": "\u0930", "M": "n", "q": "n", "H": "h", "Z": "Z", "\u0928": "\u0928", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "r"}, "telugu_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "\u0930", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "telugu_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "\u0930", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}}

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+# use this requirement file if not usong conda, but pip
+# create the tts-hs-hifigan virtual environment using "python3 -m venv tts-hs-hifigan" > "source tts-hs-hifigan/bin/activate" > "pip install -r requirements.txt"
+flask
+requests
+torch
+espnet
+matplotlib
+pandas
+indic-num2words
+gunicorn

start.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+source tts-hs-hifigan/bin/activate
+CUDA_VISIBLE_DEVICES="" gunicorn -w 2 -b 0.0.0.0:4005 app:app --timeout 600 #--daemon # to run in cpu
+# CUDA_VISIBLE_DEVICES=1 gunicorn -w 2 -b 0.0.0.0:4005 app:app --timeout 600 --daemon # to run in specific gpu
+# CUDA_VISIBLE_DEVICES="" > to make all the GPUs available invisible

text_preprocess_for_inference.py ADDED Viewed

	@@ -0,0 +1,949 @@

+'''
+TTS Preprocessing
+Developed by Arun Kumar A(CS20S013) - November 2022
+Code Changes by Utkarsh - 2023
+'''
+import os
+import re
+import json
+import pandas as pd
+import string
+from collections import defaultdict
+import time
+import subprocess
+import shutil
+from multiprocessing import Process
+import traceback
+#imports of dependencies from environment.yml
+from num_to_words import num_to_word
+from g2p_en import G2p
+def add_to_dictionary(dict_to_add, dict_file):
+    append_string = ""
+    for key, value in dict_to_add.items():
+        append_string += (str(key) + " " + str(value) + "\n")
+    if os.path.isfile(dict_file):
+        # make a copy of the dictionary
+        source_dir = os.path.dirname(dict_file)
+        dict_file_name = os.path.basename(dict_file)
+        temp_file_name = "." + dict_file_name + ".temp"
+        temp_dict_file = os.path.join(source_dir, temp_file_name)
+        shutil.copy(dict_file, temp_dict_file)
+        # append the new words in the dictionary to the temp file
+        with open(temp_dict_file, "a") as f:
+            f.write(append_string)
+        # check if the write is successful and then replace the temp file as the dict file
+        try:
+            df_orig = pd.read_csv(dict_file, delimiter=" ", header=None, dtype=str)
+            df_temp = pd.read_csv(temp_dict_file, delimiter=" ", header=None, dtype=str)
+            if len(df_temp) > len(df_orig):
+                os.rename(temp_dict_file, dict_file)
+                print(f"{len(dict_to_add)} new words appended to Dictionary: {dict_file}")
+        except:
+            print(traceback.format_exc())
+    else:
+        # create a new dictionary
+        with open(dict_file, "a") as f:
+            f.write(append_string)
+        print(f"New Dictionary: {dict_file} created with {len(dict_to_add)} words")
+class TextCleaner:
+    def __init__(self):
+        # this is a static set of cleaning rules to be applied
+        self.cleaning_rules = {
+            " +" : " ",
+            "^ +" : "",
+            " +$" : "",
+            "#" : "",
+            "[.,;।!](\r\n)*" : "# ",
+            "[.,;।!](\n)*" : "# ",
+            "(\r\n)+" : "# ",
+            "(\n)+" : "# ",
+            "(\r)+" : "# ",
+            """[?;:)(!|&’‘,।\."]""": "",
+            "[/']" : "",
+            "[-–]" : " ",
+        }
+    def clean(self, text):
+        for key, replacement in self.cleaning_rules.items():
+            text = re.sub(key, replacement, text)
+        return text
+    def clean_list(self, text):
+        # input is supposed to be a list of strings
+        output_text = []
+        for line in text:
+            line = line.strip()
+            for key, replacement in self.cleaning_rules.items():
+                line = re.sub(key, replacement, line)
+            output_text.append(line)
+        return output_text
+class Phonifier:
+    def __init__(self, dict_location=None):
+        if dict_location is None:
+            dict_location = "phone_dict"
+        self.dict_location = dict_location
+        # self.phone_dictionary = {}
+        # # load dictionary for all the available languages
+        # for dict_file in os.listdir(dict_location):
+        #     try:
+        #         if dict_file.startswith("."):
+        #             # ignore hidden files
+        #             continue
+        #         language = dict_file
+        #         dict_file_path = os.path.join(dict_location, dict_file)
+        #         df = pd.read_csv(dict_file_path, delimiter=" ", header=None, dtype=str)
+        #         self.phone_dictionary[language] = df.set_index(0).to_dict('dict')[1]
+        #     except Exception as e:
+        #         print(traceback.format_exc())
+        # print("Phone dictionary loaded for the following languages:", list(self.phone_dictionary.keys()))
+        self.g2p = G2p()
+        print('Loading G2P model... Done!')
+        # Mapping between the cmu phones and the iitm cls
+        self.cmu_2_cls_map = {
+            "AA" : "aa",
+            "AA0" : "aa",
+            "AA1" : "aa",
+            "AA2" : "aa",
+            "AE" : "axx",
+            "AE0" : "axx",
+            "AE1" : "axx",
+            "AE2" : "axx",
+            "AH" : "a",
+            "AH0" : "a",
+            "AH1" : "a",
+            "AH2" : "a",
+            "AO" : "ax",
+            "AO0" : "ax",
+            "AO1" : "ax",
+            "AO2" : "ax",
+            "AW" : "ou",
+            "AW0" : "ou",
+            "AW1" : "ou",
+            "AW2" : "ou",
+            "AX" : "a",
+            "AY" : "ei",
+            "AY0" : "ei",
+            "AY1" : "ei",
+            "AY2" : "ei",
+            "B" : "b",
+            "CH" : "c",
+            "D" : "dx",
+            "DH" : "d",
+            "EH" : "ee",
+            "EH0" : "ee",
+            "EH1" : "ee",
+            "EH2" : "ee",
+            "ER" : "a r",
+            "ER0" : "a r",
+            "ER1" : "a r",
+            "ER2" : "a r",
+            "EY" : "ee",
+            "EY0" : "ee",
+            "EY1" : "ee",
+            "EY2" : "ee",
+            "F" : "f",
+            "G" : "g",
+            "HH" : "h",
+            "IH" : "i",
+            "IH0" : "i",
+            "IH1" : "i",
+            "IH2" : "i",
+            "IY" : "ii",
+            "IY0" : "ii",
+            "IY1" : "ii",
+            "IY2" : "ii",
+            "JH" : "j",
+            "K" : "k",
+            "L" : "l",
+            "M" : "m",
+            "N" : "n",
+            "NG" : "ng",
+            "OW" : "o",
+            "OW0" : "o",
+            "OW1" : "o",
+            "OW2" : "o",
+            "OY" : "ei",
+            "OY0" : "ei",
+            "OY1" : "ei",
+            "OY2" : "ei",
+            "P" : "p",
+            "R" : "r",
+            "S" : "s",
+            "SH" : "sh",
+            "T" : "tx",
+            "TH" : "t",
+            "UH" : "u",
+            "UH0" : "u",
+            "UH1" : "u",
+            "UH2" : "u",
+            "UW" : "uu",
+            "UW0" : "uu",
+            "UW1" : "uu",
+            "UW2" : "uu",
+            "V" : "w",
+            "W" : "w",
+            "Y" : "y",
+            "Z" : "z",
+            "ZH" : "sh",
+        }
+        # Mapping between the iitm cls and iitm char
+        self.cls_2_chr_map = {
+            "aa" : "A",
+            "ii" : "I",
+            "uu" : "U",
+            "ee" : "E",
+            "oo" : "O",
+            "nn" : "N",
+            "ae" : "ऍ",
+            "ag" : "ऽ",
+            "au" : "औ",
+            "axx" : "अ",
+            "ax" : "ऑ",
+            "bh" : "B",
+            "ch" : "C",
+            "dh" : "ध",
+            "dx" : "ड",
+            "dxh" : "ढ",
+            "dxhq" : "T",
+            "dxq" : "D",
+            "ei" : "ऐ",
+            "ai" : "ऐ",
+            "eu" : "உ",
+            "gh" : "घ",
+            "gq" : "G",
+            "hq" : "H",
+            "jh" : "J",
+            "kh" : "ख",
+            "khq" : "K",
+            "kq" : "क",
+            "ln" : "ൾ",
+            "lw" : "ൽ",
+            "lx" : "ള",
+            "mq" : "M",
+            "nd" : "न",
+            "ng" : "ङ",
+            "nj" : "ञ",
+            "nk" : "Y",
+            "nw" : "ൺ",
+            "nx" : "ण",
+            "ou" : "औ",
+            "ph" : "P",
+            "rq" : "R",
+            "rqw" : "ॠ",
+            "rw" : "ർ",
+            "rx" : "र",
+            "sh" : "श",
+            "sx" : "ष",
+            "th" : "थ",
+            "tx" : "ट",
+            "txh" : "ठ",
+            "wv" : "W",
+            "zh" : "Z",
+        }
+        # Multilingual support for OOV characters
+        oov_map_json_file = 'multilingualcharmap.json'
+        with open(oov_map_json_file, 'r') as oov_file:
+            self.oov_map = json.load(oov_file)
+    def load_lang_dict(self, language, phone_dictionary):
+        # load dictionary for requested language
+        try:
+            dict_file = language
+            print("language", language)
+            dict_file_path = os.path.join(self.dict_location, dict_file)
+            print("dict_file_path", dict_file_path)
+            df = pd.read_csv(dict_file_path, delimiter=" ", header=None, dtype=str)
+            phone_dictionary[language] = df.set_index(0).to_dict('dict')[1]
+            dict_file = 'english'
+            dict_file_path = os.path.join(self.dict_location, dict_file)
+            df = pd.read_csv(dict_file_path, delimiter=" ", header=None, dtype=str)
+            phone_dictionary['english'] = df.set_index(0).to_dict('dict')[1]
+        except Exception as e:
+            print(traceback.format_exc())
+        return phone_dictionary
+    def __is_float(self, word):
+        parts = word.split('.')
+        if len(parts) != 2:
+            return False
+        return parts[0].isdecimal() and parts[1].isdecimal()
+    def en_g2p(self, word):
+        phn_out = self.g2p(word)
+        # print(f"phn_out: {phn_out}")
+        # iterate over the string list and replace each word with the corresponding value from the dictionary
+        for i, phn in enumerate(phn_out):
+            if phn in self.cmu_2_cls_map.keys():
+                phn_out[i] = self.cmu_2_cls_map[phn]
+                # cls_out = self.cmu_2_cls_map[phn]
+                if phn_out[i] in self.cls_2_chr_map.keys():
+                    phn_out[i] = self.cls_2_chr_map[phn_out[i]]
+                else:
+                    pass
+            else:
+                pass  # ignore words that are not in the dictionary
+            # print(f"i: {i}, phn: {phn}, cls_out: {cls_out}, phn_out: {phn_out[i]}")
+        return ("".join(phn_out)).strip().replace(" ", "")
+    def __post_phonify(self, text, language, gender):
+        language_gender_id = language+'_'+gender
+        if language_gender_id in self.oov_map.keys():
+            output_string = ''
+            for char in text:
+                if char in self.oov_map[language_gender_id].keys():
+                    output_string += self.oov_map[language_gender_id][char]
+                else:
+                    output_string += char
+                # output_string += self.oov_map['language_gender_id']['char']
+            return output_string
+        else:
+            return text
+    def __is_english_word(self, word):
+        maxchar = max(word)
+        if u'\u0000' <= maxchar <= u'\u007f':
+            return True
+        return False
+    def __phonify(self, text, language, gender, phone_dictionary):
+        # text is expected to be a list of strings
+        words = set((" ".join(text)).split(" "))
+        #print(f"words test: {words}")
+        non_dict_words = []
+        if language in phone_dictionary:
+            for word in words:
+                # print(f"word: {word}")
+                if word not in phone_dictionary[language] and (language == "english" or (not self.__is_english_word(word))):
+                    non_dict_words.append(word)
+                    #print('INSIDE IF CONDITION OF ADDING WORDS')
+        else:
+            non_dict_words = words
+        print(f"word not in dict: {non_dict_words}")
+        if len(non_dict_words) > 0:
+            # unified parser has to be run for the non dictionary words
+            os.makedirs("tmp", exist_ok=True)
+            timestamp = str(time.time())
+            non_dict_words_file = os.path.abspath("tmp/non_dict_words_" + timestamp)
+            out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+            with open(non_dict_words_file, "w") as f:
+                f.write("\n".join(non_dict_words))
+            if(language == 'tamil'):
+                current_directory = os.getcwd()
+                #tamil_parser_cmd = "tamil_parser.sh"
+                tamil_parser_cmd = f"{current_directory}/ssn_parser_new/tamil_parser.py"
+                #subprocess.run(["bash", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "ssn_parser"])
+                subprocess.run(["python", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, f"{current_directory}/ssn_parser_new"])
+            elif(language == 'english'):
+                phn_out_dict = {}
+                for i in range(0,len(non_dict_words)):
+                    phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
+                # Create a string representation of the dictionary
+                data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
+                print(f"data_str: {data_str}")
+                with open(out_dict_file, "w") as f:
+                    f.write(data_str)
+            else:
+                out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+                from get_phone_mapped_python import TextReplacer
+                from indic_unified_parser.uparser import wordparse
+                text_replacer=TextReplacer()
+                # def write_output_to_file(output_text, file_path):
+                #     with open(file_path, 'w') as f:
+                #         f.write(output_text)
+                parsed_output_list = []
+                for word in non_dict_words:
+                    parsed_word = wordparse(word, 0, 0, 1)
+                    parsed_output_list.append(parsed_word)
+                replaced_output_list = [text_replacer.apply_replacements(parsed_word) for parsed_word in parsed_output_list]
+                with open(out_dict_file, 'w', encoding='utf-8') as file:
+                    for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
+                        line = f"{original_word}\t{formatted_word}\n"
+                        file.write(line)
+                        print(line, end='')
+            try:
+                df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
+                #print('DATAFRAME OUTPUT FILE', df.head())
+                new_dict = df.dropna().set_index(0).to_dict('dict')[1]
+                #print("new dict",new_dict)
+                if language not in phone_dictionary:
+                    phone_dictionary[language] = new_dict
+                else:
+                    phone_dictionary[language].update(new_dict)
+                # run a non-blocking child process to update the dictionary file
+                #print("phone_dict", self.phone_dictionary)
+                p = Process(target=add_to_dictionary, args=(new_dict, os.path.join(self.dict_location, language)))
+                p.start()
+            except Exception as err:
+                print(f"Error: While loading {out_dict_file}")
+                traceback.print_exc()
+        # phonify text with dictionary
+        text_phonified = []
+        for phrase in text:
+            phrase_phonified = []
+            for word in phrase.split(" "):
+                if self.__is_english_word(word):
+                    if word in phone_dictionary["english"]:
+                        phrase_phonified.append(str(phone_dictionary["english"][word]))
+                    else:
+                        phrase_phonified.append(str(self.en_g2p(word)))
+                elif word in phone_dictionary[language]:
+                    # if a word could not be parsed, skip it
+                    phrase_phonified.append(str(phone_dictionary[language][word]))
+            # text_phonified.append(self.__post_phonify(" ".join(phrase_phonified),language, gender))
+            text_phonified.append(" ".join(phrase_phonified))
+        return text_phonified
+    def __merge_lists(self, lists):
+        merged_string = ""
+        for list in lists:
+            for word in list:
+                merged_string += word + " "
+        return merged_string.strip()
+    def __phonify_list(self, text, language, gender, phone_dictionary):
+        # text is expected to be a list of list of strings
+        words = set(self.__merge_lists(text).split(" "))
+        non_dict_words = []
+        if language in phone_dictionary:
+            for word in words:
+                if word not in phone_dictionary[language] and (language == "english" or (not self.__is_english_word(word))):
+                    non_dict_words.append(word)
+        else:
+            non_dict_words = words
+        if len(non_dict_words) > 0:
+            print(len(non_dict_words))
+            print(non_dict_words)
+            # unified parser has to be run for the non dictionary words
+            os.makedirs("tmp", exist_ok=True)
+            timestamp = str(time.time())
+            non_dict_words_file = os.path.abspath("tmp/non_dict_words_" + timestamp)
+            out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+            with open(non_dict_words_file, "w") as f:
+                f.write("\n".join(non_dict_words))
+            if(language == 'tamil'):
+                current_directory = os.getcwd()
+                #tamil_parser_cmd = "tamil_parser.sh"
+                tamil_parser_cmd = f"{current_directory}/ssn_parser_new/tamil_parser.py"
+                #subprocess.run(["bash", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "ssn_parser"])
+                subprocess.run(["python", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, f"{current_directory}/ssn_parser_new"])
+            elif(language == 'english'):
+                phn_out_dict = {}
+                for i in range(0,len(non_dict_words)):
+                    phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
+                # Create a string representation of the dictionary
+                data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
+                print(f"data_str: {data_str}")
+                with open(out_dict_file, "w") as f:
+                    f.write(data_str)
+            else:
+                out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+                from get_phone_mapped_python import TextReplacer
+                from indic_unified_parser.uparser import wordparse
+                text_replacer=TextReplacer()
+                parsed_output_list = []
+                for word in non_dict_words:
+                    parsed_word = wordparse(word, 0, 0, 1)
+                    parsed_output_list.append(parsed_word)
+                replaced_output_list = [text_replacer.apply_replacements(parsed_word) for parsed_word in parsed_output_list]
+                with open(out_dict_file, 'w', encoding='utf-8') as file:
+                    for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
+                        line = f"{original_word}\t{formatted_word}\n"
+                        file.write(line)
+                        print(line, end='')
+            try:
+                df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
+                new_dict = df.dropna().set_index(0).to_dict('dict')[1]
+                print(new_dict)
+                if language not in phone_dictionary:
+                    phone_dictionary[language] = new_dict
+                else:
+                    phone_dictionary[language].update(new_dict)
+                # run a non-blocking child process to update the dictionary file
+                p = Process(target=add_to_dictionary, args=(new_dict, os.path.join(self.dict_location, language)))
+                p.start()
+            except Exception as err:
+                traceback.print_exc()
+        # phonify text with dictionary
+        text_phonified = []
+        for line in text:
+            line_phonified = []
+            for phrase in line:
+                phrase_phonified = []
+                for word in phrase.split(" "):
+                    if self.__is_english_word(word):
+                        if word in phone_dictionary["english"]:
+                            phrase_phonified.append(str(phone_dictionary["english"][word]))
+                        else:
+                            phrase_phonified.append(str(self.en_g2p(word)))
+                    elif word in phone_dictionary[language]:
+                        # if a word could not be parsed, skip it
+                        phrase_phonified.append(str(phone_dictionary[language][word]))
+                # line_phonified.append(self.__post_phonify(" ".join(phrase_phonified), language, gender))
+                line_phonified.append(" ".join(phrase_phonified))
+            text_phonified.append(line_phonified)
+        return text_phonified
+    def phonify(self, text, language, gender, phone_dictionary):
+        if not isinstance(text, list):
+            out = self.__phonify([text], language, gender)
+            return out[0]
+        return self.__phonify(text, language, gender, phone_dictionary)
+    def phonify_list(self, text, language, gender, phone_dictionary):
+        if isinstance(text, list):
+            return self.__phonify_list(text, language, gender, phone_dictionary)
+        else:
+            print("Error!! Expected to have a list as input.")
+class TextNormalizer:
+    def __init__(self, char_map_location=None):
+        # self.phonifier = phonifier
+        if char_map_location is None:
+            char_map_location = "charmap"
+        # this is a static set of cleaning rules to be applied
+        self.cleaning_rules = {
+            " +" : " ",
+            "^ +" : "",
+            " +$" : "",
+            "#$" : "",
+            "# +$" : "",
+        }
+        # this is the list of languages supported by num_to_words
+        self.keydict = {"english" : "en",
+            "hindi" : "hi",
+            "gujarati" : "gu",
+            "marathi" : "mr",
+            "bengali" : "bn",
+            "telugu" : "te",
+            "tamil" : "ta",
+            "kannada" : "kn",
+            "odia" : "or",
+            "punjabi" : "pa"
+        }
+        # self.g2p = G2p()
+        # print('Loading G2P model... Done!')
+    def __post_cleaning(self, text):
+        for key, replacement in self.cleaning_rules.items():
+            text = re.sub(key, replacement, text)
+        return text
+    def __post_cleaning_list(self, text):
+        # input is supposed to be a list of strings
+        output_text = []
+        for line in text:
+            for key, replacement in self.cleaning_rules.items():
+                line = re.sub(key, replacement, line)
+            output_text.append(line)
+        return output_text
+    def __check_char_type(self, str_c):
+        # Determine the type of the character
+        if str_c.isnumeric():
+            char_type = "number"
+        elif str_c in string.punctuation:
+            char_type = "punctuation"
+        elif str_c in string.whitespace:
+            char_type = "whitespace"
+        elif str_c.isalpha() and str_c.isascii():
+            char_type = "ascii"
+        else:
+            char_type = "non-ascii"
+        return char_type
+    def insert_space(self, text):
+        '''
+        Check if the text contains numbers and English words and if they are without space inserts space between them.
+        '''
+        # Initialize variables to track the previous character type and whether a space should be inserted
+        prev_char_type = None
+        next_char_type = None
+        insert_space = False
+        # Output string
+        output_string = ""
+        # Iterate through each character in the text
+        for i, c in enumerate(text):
+            # Determine the type of the character
+            char_type = self.__check_char_type(c)
+            if i == (len(text) - 1):
+                next_char_type = None
+            else:
+                next_char_type = self.__check_char_type(text[i+1])
+            # print(f"{i}: {c} is a {char_type} character and next character is a {next_char_type}")
+            # If the character type has changed from the previous character, check if a space should be inserted
+            if (char_type != prev_char_type and prev_char_type != None and char_type != "punctuation" and char_type != "whitespace"):
+                if next_char_type != "punctuation" or next_char_type != "whitespace":
+                    insert_space = True
+            # Insert a space if needed
+            if insert_space:
+                output_string += " "+c
+                insert_space = False
+            else:
+                output_string += c
+            # Update the previous character type
+            prev_char_type = char_type
+        # Print the modified text
+        output_string = re.sub(r' +', ' ', output_string)
+        return output_string
+    def insert_space_list(self, text):
+        '''
+        Expect the input to be in form of list of string.
+        Check if the text contains numbers and English words and if they are without space inserts space between them.
+        '''
+        # Output string list
+        output_list = []
+        for line in text:
+            # Initialize variables to track the previous character type and whether a space should be inserted
+            prev_char_type = None
+            next_char_type = None
+            insert_space = False
+            # Output string
+            output_string = ""
+            # Iterate through each character in the line
+            for i, c in enumerate(line):
+                # Determine the type of the character
+                char_type = self.__check_char_type(c)
+                if i == (len(line) - 1):
+                    next_char_type = None
+                else:
+                    next_char_type = self.__check_char_type(line[i+1])
+                # print(f"{i}: {c} is a {char_type} character and next character is a {next_char_type}")
+                # If the character type has changed from the previous character, check if a space should be inserted
+                if (char_type != prev_char_type and prev_char_type != None and char_type != "punctuation" and char_type != "whitespace"):
+                    if next_char_type != "punctuation" or next_char_type != "whitespace":
+                        insert_space = True
+                # Insert a space if needed
+                if insert_space:
+                    output_string += " "+c
+                    insert_space = False
+                else:
+                    output_string += c
+                # Update the previous character type
+                prev_char_type = char_type
+            # Print the modified line
+            output_string = re.sub(r' +', ' ', output_string)
+            output_list.append(output_string)
+        return output_list
+    def num2text(self, text, language):
+        if language in self.keydict.keys():
+            digits = sorted(list(map(int, re.findall(r'\d+', text))),reverse=True)
+            if digits:
+                for digit in digits:
+                    text = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', text)
+            return self.__post_cleaning(text)
+        else:
+            print(f"No num-to-char for the given language {language}.")
+            return self.__post_cleaning(text)
+    def num2text_list(self, text, language):
+        # input is supposed to be a list of strings
+        if language in self.keydict.keys():
+            output_text = []
+            for line in text:
+                digits = sorted(list(map(int, re.findall(r'\d+', line))),reverse=True)
+                if digits:
+                    for digit in digits:
+                        line = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', line)
+                output_text.append(line)
+            return self.__post_cleaning_list(output_text)
+        else:
+            print(f"No num-to-char for the given language {language}.")
+            return self.__post_cleaning_list(text)
+    def numberToTextConverter(self, text, language):
+        if language in self.keydict.keys():
+            matches = re.findall(r'\d+\.\d+|\d+', text)
+            digits = sorted([int(match) if match.isdigit() else match if re.match(r'^\d+(\.\d+)?$', match) else str(match) for match in matches], key=lambda x: float(x) if isinstance(x, str) and '.' in x else x, reverse=True)
+            if digits:
+                for digit in digits:
+                    if isinstance(digit, int):
+                        text = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language]).replace(",", "")+' ', text)
+                    else:
+                        parts = str(digit).split('.')
+                        integer_part = int(parts[0])
+                        data1 = num_to_word(integer_part, self.keydict[language]).replace(",", "")
+                        decimal_part = str(parts[1])
+                        data2 = ''
+                        for i in decimal_part:
+                            data2 = data2+' '+num_to_word(i, self.keydict[language])
+                        if language == 'hindi':
+                            final_data = f'{data1} दशमलव {data2}'
+                        elif language == 'tamil':
+                            final_data = f'{data1} புள்ளி {data2}'
+                        else:
+                            final_data = f'{data1} point {data2}'
+                        text = re.sub(str(digit), ' '+final_data+' ', text)
+            return self.__post_cleaning(text)
+        else:
+            words = {
+                '0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four',
+                '5': 'five', '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine'
+            }
+            # Use regular expression to find and replace decimal points in numbers
+            text = re.sub(r'(?<=\d)\.(?=\d)', ' point ', text)
+            # Find all occurrences of numbers with decimal points and convert them to words
+            matches = re.findall(r'point (\d+)', text)
+            for match in matches:
+                replacement = ' '.join(words[digit] for digit in match)
+                text = text.replace(f'point {match}', f'point {replacement}', 1)
+            return self.__post_cleaning(text)
+    def normalize(self, text, language):
+        return self.__post_cleaning(text)
+    def normalize_list(self, text, language):
+        # input is supposed to be a list of strings
+        return self.__post_cleaning_list(text)
+class TextPhrasifier:
+    @classmethod
+    def phrasify(cls, text):
+        phrase_list = []
+        for phrase in text.split("#"):
+            phrase = phrase.strip()
+            if phrase != "":
+                phrase_list.append(phrase)
+        return phrase_list
+class TextPhrasifier_List:
+    @classmethod
+    def phrasify(cls, text):
+        # input is supposed to be a list of strings
+        # output is list of list of strings
+        output_list = []
+        for line in text:
+            phrase_list = []
+            for phrase in line.split("#"):
+                phrase = phrase.strip()
+                if phrase != "":
+                    phrase_list.append(phrase)
+            output_list.append(phrase_list)
+        return output_list
+class DurAlignTextProcessor:
+    def __init__(self):
+        # this is a static set of cleaning rules to be applied
+        self.cleaning_rules = {
+            " +" : " ",
+           "^" : "$",
+            "$" : ".",
+        }
+        self.cleaning_rules_English = {
+            " +" : " ",
+            "$" : ".",
+        }
+    def textProcesor(self, text):
+        for key, replacement in self.cleaning_rules.items():
+            for idx in range(0,len(text)):
+                text[idx] = re.sub(key, replacement, text[idx])
+        return text
+    def textProcesorForEnglish(self, text):
+        for key, replacement in self.cleaning_rules_English.items():
+            for idx in range(0,len(text)):
+                text[idx] = re.sub(key, replacement, text[idx])
+        return text
+    def textProcesor_list(self, text):
+        # input expected in 'list of list of string' format
+        output_text = []
+        for line in text:
+            for key, replacement in self.cleaning_rules.items():
+                for idx in range(0,len(line)):
+                    line[idx] = re.sub(key, replacement, line[idx])
+            output_text.append(line)
+        return output_text
+class SharedInit:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer(),
+                phonifier = Phonifier(),
+                text_phrasefier = TextPhrasifier(),
+                post_processor = DurAlignTextProcessor()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+        self.phonifier = phonifier
+        self.text_phrasefier = text_phrasefier
+        self.post_processor = post_processor
+class TTSDurAlignPreprocessor(SharedInit):
+    def preprocess(self, text, language, gender, phone_dictionary):
+        # text = text.strip()
+        #print(text)
+        text = self.text_normalizer.numberToTextConverter(text, language)
+        text = self.text_cleaner.clean(text)
+        #print("cleaned text", text)
+        # text = self.text_normalizer.insert_space(text)
+        #text = self.text_normalizer.num2text(text, language)
+        # print(text)
+        text = self.text_normalizer.normalize(text, language)
+        # print(text)
+        phrasified_text = TextPhrasifier.phrasify(text)
+        #print("phrased",phrasified_text)
+        if language not in list(phone_dictionary.keys()):
+            phone_dictionary = self.phonifier.load_lang_dict(language, phone_dictionary)
+        #print(phone_dictionary.keys())
+        phonified_text = self.phonifier.phonify(phrasified_text, language, gender, phone_dictionary)
+        #print("phonetext",phonified_text)
+        phonified_text = self.post_processor.textProcesor(phonified_text)
+        #print(phonified_text)
+        return phonified_text, phrasified_text
+class TTSDurAlignPreprocessor_VTT(SharedInit):
+    def preprocess(self, text, language, gender):
+        # text = text.strip()
+        text = self.text_cleaner.clean_list(text)
+        # text = self.text_normalizer.insert_space_list(text)
+        text = self.text_normalizer.num2text_list(text, language)
+        text = self.text_normalizer.normalize_list(text, language)
+        phrasified_text = TextPhrasifier_List.phrasify(text)
+        phonified_text = self.phonifier.phonify_list(phrasified_text, language, gender)
+        phonified_text = self.post_processor.textProcesor_list(phonified_text)
+        return phonified_text, phrasified_text
+class CharTextPreprocessor(SharedInit):
+    def preprocess(self, text, language, gender=None, phone_dictionary=None):
+        text = text.strip()
+        text = self.text_normalizer.numberToTextConverter(text, language)
+        text = self.text_cleaner.clean(text)
+        # text = self.text_normalizer.insert_space(text)
+        #text = self.text_normalizer.num2text(text, language)
+        text = self.text_normalizer.normalize(text, language)
+        phrasified_text = TextPhrasifier.phrasify(text)
+        phonified_text = phrasified_text # No phonification for character TTS models
+        return phonified_text, phrasified_text
+class CharTextPreprocessor_VTT(SharedInit):
+    def preprocess(self, text, language, gender=None):
+        # text = text.strip()
+        text = self.text_cleaner.clean_list(text)
+        # text = self.text_normalizer.insert_space_list(text)
+        text = self.text_normalizer.num2text_list(text, language)
+        text = self.text_normalizer.normalize_list(text, language)
+        phrasified_text = TextPhrasifier_List.phrasify(text)
+        phonified_text = phrasified_text # No phonification for character TTS models
+        return phonified_text, phrasified_text
+class TTSPreprocessor(SharedInit):
+    def preprocess(self, text, language, gender, phone_dictionary):
+        text = text.strip()
+        text = self.text_normalizer.numberToTextConverter(text, language)
+        text = self.text_cleaner.clean(text)
+        # text = self.text_normalizer.insert_space(text)
+        #text = self.text_normalizer.num2text(text, language)
+        text = self.text_normalizer.normalize(text, language)
+        phrasified_text = TextPhrasifier.phrasify(text)
+        if language not in list(phone_dictionary.keys()):
+            phone_dictionary = self.phonifier.load_lang_dict(language, phone_dictionary)
+        phonified_text = self.phonifier.phonify(phrasified_text, language, gender, phone_dictionary)
+        #print(phonified_text)
+        phonified_text = self.post_processor.textProcesorForEnglish(phonified_text)
+        #print(phonified_text)
+        return phonified_text, phrasified_text
+class TTSPreprocessor_VTT(SharedInit):
+    def preprocess(self, text, language, gender):
+        # print(f"Original text: {text}")
+        text = self.text_cleaner.clean_list(text)
+        # print(f"After text cleaner: {text}")
+        # text = self.text_normalizer.insert_space_list(text)
+        # print(f"After insert space: {text}")
+        text = self.text_normalizer.num2text_list(text, language)
+        # print(f"After num2text: {text}")
+        text = self.text_normalizer.normalize_list(text, language)
+        # print(f"After text normalizer: {text}")
+        phrasified_text = TextPhrasifier_List.phrasify(text)
+        # print(f"phrasified_text: {phrasified_text}")
+        phonified_text = self.phonifier.phonify_list(phrasified_text, language, gender)
+        # print(f"phonified_text: {phonified_text}")
+        return phonified_text, phrasified_text