diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..34faeeaf6de1371d278305addfd76de6ed9e74bf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_assamese_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_assamese_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_bengali_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_bodo_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_bodo_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_gujarati_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_gujarati_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_hindi_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_hindi_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_manipuri_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_manipuri_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_nepali_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_nepali_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_punjabi_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_punjabi_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_sanskrit_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_sanskrit_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_tamil_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_tamil_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_telugu_female_48k.wav filter=lfs diff=lfs merge=lfs -text +data/input/reference_audio/phase3_telugu_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/output/1_te_phase3_telugu_male_48k.wav filter=lfs diff=lfs merge=lfs -text +data/output/2_mr_phase3_manipuri_male_48k.wav filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..fd8dd2407e1de6780b8a22a8d0e5c52f726e64d4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,42 @@ +FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime + +# Set working directory +WORKDIR /app + +# Verify Python version meets requirements (>=3.9.0, <3.12) +RUN python --version + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + git \ + libsndfile1 \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Install Cython first (crucial for building extensions) +RUN pip install --no-cache-dir Cython packaging + +# Clone the repository +RUN git clone https://github.com/PranavDBhat/LIMMITS-24-Coquiai.git /app/LIMMITS-24-Coquiai + +# Install only the main requirements (not dev/notebooks requirements) +# This directly uses the requirements.txt file from the repository +RUN cd /app/LIMMITS-24-Coquiai && \ + pip install --no-cache-dir -r requirements.txt + +# Install the package in development mode +# This will build the Cython extensions +RUN cd /app/LIMMITS-24-Coquiai && \ + pip install -e . + +# Create directories for models, input, and output +RUN mkdir -p /app/models /app/data/input /app/data/output + +# Copy the inference script +COPY infer.py /app/ + +# Set the entrypoint to run the inference script +ENTRYPOINT ["python", "infer.py"] + +# Default command (can be overridden) +CMD ["--help"] \ No newline at end of file diff --git a/build_and_run.sh b/build_and_run.sh new file mode 100644 index 0000000000000000000000000000000000000000..d13c468d3d0178f850f622b1c14fec071d4b925e --- /dev/null +++ b/build_and_run.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Ensure directories exist +mkdir -p models data/input/reference_audio data/output + +# Check if model files exist +if [ ! -f "models/best_model_479919.pth" ] || [ ! -f "models/config.json" ]; then + echo "ERROR: Model files not found in models/ directory!" + echo "Please place model files in models/ directory:" + echo "- models/best_model_479919.pth" + echo "- models/config.json" + exit 1 +fi + +# Check if sample inputs exist +if [ ! -f "data/input/texts.txt" ]; then + echo "WARNING: No texts.txt found. Creating sample file..." + echo -e "1\tte\tవడ్రంగి, క్షురక వృత్తులలో పెట్టుబడి ప్రధానమై ఇతరులు కూడా ఈ వృత్తిలో ప్రవేశించి వ్యాపారంగా మార్చేసార\tspeaker1.wav" > data/input/texts.txt + echo "Please add reference audio files to data/input/reference_audio/" +fi + +# Build the Docker image +echo "Building Docker image (this may take some time)..." +docker build -t tts-baseline . + +echo "" +echo "Build complete!" +echo "" + +docker run --gpus all \ + -v "$(pwd)/models:/app/models" \ + -v "$(pwd)/data/input:/app/data/input" \ + -v "$(pwd)/data/output:/app/data/output" \ + -v "$(pwd)/model_related:/app/model_related" \ + tts-baseline \ + --text_file /app/data/input/texts.txt \ + --ref_dir /app/data/input/reference_audio \ + --savedir /app/data/output \ + --device cuda + +# For CPU inference +# docker run \ +# -v "$(pwd)/models:/app/models" \ +# -v "$(pwd)/data/input:/app/data/input" \ +# -v "$(pwd)/data/output:/app/data/output" \ +# -v "$(pwd)/model_related:/app/model_related" \ +# tts-baseline \ +# --text_file /app/data/input/texts.txt \ +# --ref_dir /app/data/input/reference_audio \ +# --savedir /app/data/output \ +# --device cpu diff --git a/data/input/reference_audio/phase3_assamese_female_48k.wav b/data/input/reference_audio/phase3_assamese_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..d080e2413a12099bca043a5143dc241595a09d04 --- /dev/null +++ b/data/input/reference_audio/phase3_assamese_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac46ed6d99adee425d8e56f26b94672632f77b8bb4e9149538327625bf8590f +size 878636 diff --git a/data/input/reference_audio/phase3_assamese_male_48k.wav b/data/input/reference_audio/phase3_assamese_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..9292094f5ca155f41c76aefb260a6b9571839997 --- /dev/null +++ b/data/input/reference_audio/phase3_assamese_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae4ba4cd789dc4a260d2ee96ac3b56250545d148bc1b8a5ad306c32fae87b64 +size 622636 diff --git a/data/input/reference_audio/phase3_bengali_male_48k.wav b/data/input/reference_audio/phase3_bengali_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..7384bf3bc22e84b23a027b0c181902ef042143b8 --- /dev/null +++ b/data/input/reference_audio/phase3_bengali_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5774878765083e411d9d45eb1e593b2b593114d469411082b40172c36990589b +size 645164 diff --git a/data/input/reference_audio/phase3_bodo_female_48k.wav b/data/input/reference_audio/phase3_bodo_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..2c4955e98c32afae3539323609a78484ead1c037 --- /dev/null +++ b/data/input/reference_audio/phase3_bodo_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aa6b50a42e585f8a5165c0d1364f7867b178eb3c77f28cb77cc5649ff1246b7 +size 585772 diff --git a/data/input/reference_audio/phase3_bodo_male_48k.wav b/data/input/reference_audio/phase3_bodo_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..2ef0dd1727289502024d432937abe71af0b0a122 --- /dev/null +++ b/data/input/reference_audio/phase3_bodo_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7cbbab9e5fdd8f0d213278a9055ee248ef25e23489d497036c049bba33d5462 +size 528428 diff --git a/data/input/reference_audio/phase3_gujarati_female_48k.wav b/data/input/reference_audio/phase3_gujarati_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..2dc5b098e6dd5c9887e01942e32cbb6a252e5e01 --- /dev/null +++ b/data/input/reference_audio/phase3_gujarati_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92bbae9ecf28b340056a68a0c41f1827a16b6ad90d04ef5fb1466044ce8342d3 +size 438316 diff --git a/data/input/reference_audio/phase3_gujarati_male_48k.wav b/data/input/reference_audio/phase3_gujarati_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..303bdf9d063cda7d1adbe573669e3ac86cc1f20b --- /dev/null +++ b/data/input/reference_audio/phase3_gujarati_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca7b082fc6ffa75a001b462b144881c67b3a574b6fc23ffb89b2c4e0d7a9a6db +size 452652 diff --git a/data/input/reference_audio/phase3_hindi_female_48k.wav b/data/input/reference_audio/phase3_hindi_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..f80ac799a869711af834b093d56c315e52c74f13 --- /dev/null +++ b/data/input/reference_audio/phase3_hindi_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b8ca1f8cf8d14eab7f10887b5ea02d5fe91a135aef9b11b572d9fa2af1a289 +size 520236 diff --git a/data/input/reference_audio/phase3_hindi_male_48k.wav b/data/input/reference_audio/phase3_hindi_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..ee5394756e32b894ea9f271df5085241622eb6cd --- /dev/null +++ b/data/input/reference_audio/phase3_hindi_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbda05aa4e2cbfa1947a16416cc9cca2af29a6a02a947204d41ebaf31470fbc +size 428076 diff --git a/data/input/reference_audio/phase3_manipuri_female_48k.wav b/data/input/reference_audio/phase3_manipuri_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..47c2d344a428fcd5ffb3e9454905392458592da6 --- /dev/null +++ b/data/input/reference_audio/phase3_manipuri_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebbddaa5903c08c85a190ba7dbbacfa2ed0f3e81f960de2bf35053eb5efad732 +size 696364 diff --git a/data/input/reference_audio/phase3_manipuri_male_48k.wav b/data/input/reference_audio/phase3_manipuri_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..c219e11964b104f8615d6531a61568046ce83a01 --- /dev/null +++ b/data/input/reference_audio/phase3_manipuri_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d91517ef59af3ad21592f7046c6c3f9ca20844804473caa578dd89c69f5fcd +size 688172 diff --git a/data/input/reference_audio/phase3_nepali_female_48k.wav b/data/input/reference_audio/phase3_nepali_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..e323265d92d357119fb033bf2592992a4195c7a5 --- /dev/null +++ b/data/input/reference_audio/phase3_nepali_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:484f636799da0a1c7c5b24062aa53c55de5bed3ad0b7fe769b8b4dacc61e32ea +size 628780 diff --git a/data/input/reference_audio/phase3_nepali_male_48k.wav b/data/input/reference_audio/phase3_nepali_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..7d8aeae6ede4bd62cef53ee6092ded220c0d95ea --- /dev/null +++ b/data/input/reference_audio/phase3_nepali_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a21ef0dcf83df7a98c9522181bb0e6af121c58be58dedf989ad62b305c8fcc76 +size 518188 diff --git a/data/input/reference_audio/phase3_punjabi_female_48k.wav b/data/input/reference_audio/phase3_punjabi_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..52cd2bfaba3ecdbee5e38fba31a6d9e085d17753 --- /dev/null +++ b/data/input/reference_audio/phase3_punjabi_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9382ebca3054cec66e70d3264622d8896d17bcd46ec19ab17c8334fc3d0097ef +size 413740 diff --git a/data/input/reference_audio/phase3_punjabi_male_48k.wav b/data/input/reference_audio/phase3_punjabi_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..d06f685f0440bc61f92cfaddcef08c3017309dca --- /dev/null +++ b/data/input/reference_audio/phase3_punjabi_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb6c66455e0669f22a24f0b06ea4457337dd0a9f4bfc3e253d58a2f9a0953da8 +size 378924 diff --git a/data/input/reference_audio/phase3_sanskrit_female_48k.wav b/data/input/reference_audio/phase3_sanskrit_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..ceda007b4ba77bd7ec03532b7b956f30d7a37e0d --- /dev/null +++ b/data/input/reference_audio/phase3_sanskrit_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382581b53343e6bcad461cec7f24a687d9439b4388791396021b34473bd396dc +size 1557548 diff --git a/data/input/reference_audio/phase3_sanskrit_male_48k.wav b/data/input/reference_audio/phase3_sanskrit_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..1a744ba0c3a637e4ecf32330bc62b8cd85953436 --- /dev/null +++ b/data/input/reference_audio/phase3_sanskrit_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8dda08b50cd05d50b20558ada30516c483464b8241fe81a33471411b841737f +size 1234988 diff --git a/data/input/reference_audio/phase3_tamil_female_48k.wav b/data/input/reference_audio/phase3_tamil_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..6ce523d8be7dc4f1790fb0ba5d400365866f9ab1 --- /dev/null +++ b/data/input/reference_audio/phase3_tamil_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f9a67918b9466c725329998bdaed0b2279b12b9dfe43f88fd1613c8fe6411eb +size 2347052 diff --git a/data/input/reference_audio/phase3_tamil_male_48k.wav b/data/input/reference_audio/phase3_tamil_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..d6ace1a998282b8d7a741e0692ececda2eef08be --- /dev/null +++ b/data/input/reference_audio/phase3_tamil_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36990873db9fedce95bd28d78ae5689b02459ebdee9cbbd81e5f947da0be08af +size 2007084 diff --git a/data/input/reference_audio/phase3_telugu_female_48k.wav b/data/input/reference_audio/phase3_telugu_female_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..75c8a76264ff2cb0cadd01211b28cc8264af9802 --- /dev/null +++ b/data/input/reference_audio/phase3_telugu_female_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baa4775b307b9a5c80b7c6b61e302adcbef6b288890c4d54efa903f0a2f89a19 +size 571436 diff --git a/data/input/reference_audio/phase3_telugu_male_48k.wav b/data/input/reference_audio/phase3_telugu_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..6bf8554e809b026f3793be34b02fe83ef1843518 --- /dev/null +++ b/data/input/reference_audio/phase3_telugu_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b72435776f2ee7ea0ac0b6fa14419436846cf303a5ad1343e23f071c6dc34f9 +size 499756 diff --git a/data/input/texts.txt b/data/input/texts.txt new file mode 100644 index 0000000000000000000000000000000000000000..beaae0ca23b3134fa4c12d84548b07772c173b2a --- /dev/null +++ b/data/input/texts.txt @@ -0,0 +1,2 @@ +1 te వడ్రంగి, క్షురక వృత్తులలో పెట్టుబడి ప్రధానమై ఇతరులు కూడా ఈ వృత్తిలో ప్రవేశించి వ్యాపారంగా మార్చేసార phase3_telugu_male_48k.wav +2 mr जायकवाडी धरणातून तब्बल अडीच ते तीन लाख हेक्टर शेतीच्या सिंचनासाठी पाणी सोडलं जातं phase3_manipuri_male_48k.wav \ No newline at end of file diff --git a/data/output/1_te_phase3_telugu_male_48k.wav b/data/output/1_te_phase3_telugu_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..250aec8e465dbc23cc7d12aa7e2e8d521a94c06b --- /dev/null +++ b/data/output/1_te_phase3_telugu_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d92f6ba7245ff2a222132eb4bfb6cf23239ff15f3f71b5dcfad9e3addc5f1454 +size 425036 diff --git a/data/output/2_mr_phase3_manipuri_male_48k.wav b/data/output/2_mr_phase3_manipuri_male_48k.wav new file mode 100644 index 0000000000000000000000000000000000000000..16be71da3f1e5e343054a8e9780bfa8b7f4c7ce0 --- /dev/null +++ b/data/output/2_mr_phase3_manipuri_male_48k.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73dd8edae221d33baaa2276bb364e9e836d1ee39dffc8a379668bc2256796c99 +size 499276 diff --git a/infer.py b/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..73eab938df6fa01bb40df02221a01bb8bc638fcd --- /dev/null +++ b/infer.py @@ -0,0 +1,132 @@ +import torch +from TTS.api import TTS +import os +from tqdm import tqdm +import argparse + +# Parse arguments +parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis") +parser.add_argument('-t', '--text_file', type=str, required=True, + help='Path to text file containing text and audio reference files') +parser.add_argument('-r', '--ref_dir', type=str, required=True, + help='Root directory containing reference audio files') +parser.add_argument('-s', '--savedir', type=str, required=True, + help='Directory to store synthesized audio files') +parser.add_argument('-d', '--device', type=str, required=True, + help='Device to use for synthesis (cpu or cuda)') +parser.add_argument('-m', '--model_path', type=str, + default="/app/models/best_model_479919.pth", + help='Path to the model file') +parser.add_argument('-c', '--config_path', type=str, + default="/app/models/config.json", + help='Path to the config file') + +args = parser.parse_args() + +# Get device +device = args.device if args.device in ["cpu", "cuda"] else "cuda" if torch.cuda.is_available() else "cpu" +print(f"Using device: {device}") + +# Initialize TTS model +print(f"Loading model from {args.model_path} with config {args.config_path}") +tts = TTS( + model_path=args.model_path, + config_path=args.config_path, + progress_bar=False, +).to(device) + +# Create output directory +os.makedirs(args.savedir, exist_ok=True) +print(f"Output directory: {args.savedir}") + +# Read the text file +print(f"Reading text file: {args.text_file}") +with open(args.text_file, 'r') as f: + lines = f.readlines() + +# Process each line +print(f"Processing {len(lines)} entries...") +for i, line in enumerate(tqdm(lines)): + parts = line.strip().split('\t') + if len(parts) != 4: + print(f"Warning: Line {i+1} does not have 4 tab-separated parts. Skipping.") + continue + + idx, lang, text, ref_file = parts + ref_path = os.path.join(args.ref_dir, ref_file) + save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}") + + print(f"Synthesizing: {text[:30]}... using reference {ref_path}") + tts.tts_to_file(text=text, speaker_wav=ref_path, language=lang, file_path=save_path) + print(f"Saved to: {save_path}") + +print("Synthesis complete!") + +# import torch +# from TTS.api import TTS +# import os +# from tqdm import tqdm +# import argparse + +# # Get device +# device = "cuda:3" if torch.cuda.is_available() else "cpu" + + +# sentences_dict = { +# "te": ["వడ్రంగి, క్షురక వృత్తులలో పెట్టుబడి ప్రధానమై ఇతరులు కూడా ఈ వృత్తిలో ప్రవేశించి వ్యాపారంగా మార్చేసార", +# "నేను ఈ రోజు నాకు ఇష్టమైన పుస్తకాన్ని చదివాను మరియు తరువాత నా స్నేహితుడితో సినిమాకు వెళ్ళాను", +# "ఈ వేసవి సెలవులలో నేను నా కుటుంబంతో కలిసి ఒక అందమైన బీచ్‌కి వెళ్ళాలని అనుకుంటున్నాను"], +# "mr": ["जायकवाडी धरणातून तब्बल अडीच ते तीन लाख हेक्टर शेतीच्या सिंचनासाठी पाणी सोडलं जातं", +# "मी आज माझ्या आवडत्या पुस्तकाचे वाचन केले आणि नंतर माझ्या मित्रासोबत चित्रपटाला गेलो", +# "या उन्हाळी सुट्टीत मी माझ्या कुटुंबासोबत एक सुंदर समुद्रकिनाऱ्यावर जाण्याचा विचार करतो"], +# "bho": ["बिहार के बक्सर जिला के बक्सर नगर निगम क्षेत्र में गंगा नदी पर बने बक्सर पुल का उद्घाटन आज प्रधानमंत्री नरेंद्र मोदी करेंगे", +# "एन्ट्रापी कंप्यूटिंग में एन्ट्रोपी ऊ ऑपरेटिंग सिस्टम ह जे पे सरा क्रिप्टोग्राफिक फंक्शन सब काम करे लें", +# "हमार मंडराये वाली जहाज़ सर्पमीनन से भरी है"], +# } + +# tts = TTS( +# model_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/best_model_479919.pth", +# config_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/config.json", +# progress_bar=False, +# ).to(device) + + +# parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis") +# parser.add_argument('-t', '--text_file', type=str, required=True, +# help='Path to text file containing text and audio reference files') +# parser.add_argument('-r', '--ref_dir', type=str, required=True, +# help='Root directory containing reference audio files') +# parser.add_argument('-s', '--savedir', type=str, required=True, +# help='Directory to store synthesized audio files') +# parser.add_argument('-d', '--device', type=str, required=True, +# help='Device to use for synthesis (cpu or cuda)') + +# args = parser.parse_args() + + + + +# os.makedirs(args.savedir, exist_ok=True) + +# # Read the text file +# with open(args.text_file, 'r') as f: +# lines = f.readlines() + +# for line in lines: +# idx, lang, text, ref_file = line.strip().split('\t') +# ref_file = os.path.join(args.ref_dir, ref_file) +# save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}") +# tts.tts_to_file(text=text, speaker_wav=ref_file, language=lang, file_path=save_path) + + +# # ref_files = [os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/", x) for x in os.listdir("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/")] + + + +# # for ref_file in ref_files: +# # for language_key in sentences_dict.keys(): +# # for s_idx, sentence in enumerate(sentences_dict[language_key]): +# # save_path = os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_infers/", f"test_{language_key}_{s_idx}_{os.path.basename(ref_file)}") +# # tts.tts_to_file(text=sentence, speaker_wav=ref_file, language=language_key, file_path=save_path) + +# # tts.tts_to_file(text="ಹಸ್ದೇವ್ ನದಿ, ರಿಹಂಡ್ ನದಿ ಮತ್ತು ಕನ್ಹರ್ ನದಿಗಳು ಸುರ್ಗುಜಾದ ಮುಖಜ ಭೂಮಿಯಲ್ಲಿ ಹರಿಯುತ್ತವೆ.", speaker_wav="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/syspin_data/Chhattisgarhi_Male/wavs/IISc_SYSPINProject_chha_m_AGRI_00001.wav", language="kn", file_path="test_kn.wav") diff --git a/model_related/Bengali_Female/speakers.pth b/model_related/Bengali_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..22db7a9f9ec188c7158b70d63697ddf61b0c2904 --- /dev/null +++ b/model_related/Bengali_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f54ff1a806ebe607b0095b489f553546cce0b9904d38c531e78e4a86f09e67 +size 100098080 diff --git a/model_related/Bengali_Male/speakers.pth b/model_related/Bengali_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..5725107cdbce54ebeb846e1bee407e7ec751d57c --- /dev/null +++ b/model_related/Bengali_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a860399950439f46083e8e6e7e5c057e10cd7994c6232a09b33fd85f0dd1c1cb +size 119368032 diff --git a/model_related/Bhojpuri_Female/speakers.pth b/model_related/Bhojpuri_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..99844387c3254168f94f467a5f5604bcda9cda6b --- /dev/null +++ b/model_related/Bhojpuri_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80b05743e5e5c2d8cc10ac96a7e66006f7ed046fc7c7663f83971607afed31e3 +size 129451808 diff --git a/model_related/Bhojpuri_Male/speakers.pth b/model_related/Bhojpuri_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad0a3aa01f1c43b724ceb1fdaaf51141ef849de3 --- /dev/null +++ b/model_related/Bhojpuri_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b03f732526e59e5165fe17c243fdb3338dc69d6b4539cd94009e6152955511 +size 123140960 diff --git a/model_related/Chhattisgarhi_Female/speakers.pth b/model_related/Chhattisgarhi_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..f12abfe28f1e0030bf1a2cfc6103110206d934d6 --- /dev/null +++ b/model_related/Chhattisgarhi_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ab3dc8e3a8946d1a170030f7da2a829441be4a70d9dd9f9689d03360d0bc876 +size 130934816 diff --git a/model_related/Chhattisgarhi_Male/speakers.pth b/model_related/Chhattisgarhi_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..434472237cbdd776406ac7fff0bda45bb0ee5d98 --- /dev/null +++ b/model_related/Chhattisgarhi_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973c6233369aade6066051802d3d8165c9199be7bc0144ea0a4ffee18cb315cc +size 108560992 diff --git a/model_related/English_Female/speakers.pth b/model_related/English_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..4f16bb2c76cd2fe3218b9cb0b4db71ec70624232 --- /dev/null +++ b/model_related/English_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4b8a8c9e23c9ab0f2e0c053ce85e55b0f3ea34e23dccf2b463502218b7ba1f +size 113899296 diff --git a/model_related/English_Male/speakers.pth b/model_related/English_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..164ae9873809930cd1342c39eb7b1d1f1f8a9825 --- /dev/null +++ b/model_related/English_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a36e7af9cd38a35a70227b25ec4de0ea9cfdf171bf445bec5731abb65eded4 +size 118719648 diff --git a/model_related/Gujarati_Female/speakers.pth b/model_related/Gujarati_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..5328a28c5f812366cf696e84353dd2baa6e037c8 --- /dev/null +++ b/model_related/Gujarati_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e435824571f839e07984422a3528cb6243da654137560df67b03d2858a75b5 +size 35408544 diff --git a/model_related/Gujarati_Male/speakers.pth b/model_related/Gujarati_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..61b5c1f9691f2c387ffa1e0ddde0dac5693c26c1 --- /dev/null +++ b/model_related/Gujarati_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315f8348df2662c0d04297df825a02190108ede342b3b2926fb9c83b22cbb3cc +size 35875360 diff --git a/model_related/Hindi_Female/speakers.pth b/model_related/Hindi_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..cbef98e8bc1f6f24a33e9adde22c62140b0b4a77 --- /dev/null +++ b/model_related/Hindi_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d36722d9cfa2714f7682d8cd932182b0ecf71663151131a12db01352c6d32672 +size 103680032 diff --git a/model_related/Hindi_Male/speakers.pth b/model_related/Hindi_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..96c2441f8a87be6e39bb632f5dff13009c4662dc --- /dev/null +++ b/model_related/Hindi_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea4a7831a9fa5961f56bbab0d97d63df34a8189fad2d5294dee2effc5a4e2f24 +size 116714528 diff --git a/model_related/Kannada_Female/speakers.pth b/model_related/Kannada_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..abafc788cb51063b95e236bd4369b2a041cc559c --- /dev/null +++ b/model_related/Kannada_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12ac883789029ddab4f2f6cf0340a0ad52210035513ba2a6676f21f6438a4ee3 +size 80812384 diff --git a/model_related/Kannada_Male/speakers.pth b/model_related/Kannada_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..c1b05431942c7ac372aa07620ce844f6defa124a --- /dev/null +++ b/model_related/Kannada_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af52780734b88535adff48246df32ca4e5b2cf20f0c9f7efcb849d8f024eba4 +size 97633824 diff --git a/model_related/Magahi_Female/speakers.pth b/model_related/Magahi_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ea3ba64929bc455e2ef1705af811dd2cd1d7b3c --- /dev/null +++ b/model_related/Magahi_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8000b755a4ffd5e6b0a95e2a06dabd02d6f59dbae46776d9a662e214f7f93fb9 +size 145807392 diff --git a/model_related/Magahi_Male/speakers.pth b/model_related/Magahi_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..db80cfe27d0f13053f2748b1a4f798df21037d90 --- /dev/null +++ b/model_related/Magahi_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a9007a83759fb8f1cd4bcef79a5b083839d09e1703484caa9d727dd5a1bce8 +size 153767712 diff --git a/model_related/Maithili_Female/speakers.pth b/model_related/Maithili_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..ccfff07ba5f66db9eb4a056251fbecd3a65cc1d4 --- /dev/null +++ b/model_related/Maithili_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4f587283f8604bcc95cba0b6e2320041619b76a715b6b0c4253a584acc595a2 +size 163039520 diff --git a/model_related/Maithili_Male/speakers.pth b/model_related/Maithili_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..84cfd02beec09777dc1cb3c75b18cbfa0d8eb5d5 --- /dev/null +++ b/model_related/Maithili_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7f910296937f346e330e06fc5c7c0afcad3f05f252afd3e941711f2557d197b +size 152221920 diff --git a/model_related/Marathi_Female/speakers.pth b/model_related/Marathi_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..c53493fdd42299459f8c6828774175f915643a02 --- /dev/null +++ b/model_related/Marathi_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e67881879aac91f6653e96f559af5129e023e2879f76fa7bc8e24b77a8236b3 +size 103038496 diff --git a/model_related/Marathi_Male/speakers.pth b/model_related/Marathi_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f21d3e8180f772f160e7e77524a8bbc7d237d66 --- /dev/null +++ b/model_related/Marathi_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bde8d12edc5745f6c1151f7a0e53bce657a416ad16259a98da14480e330472d +size 98007520 diff --git a/model_related/Telugu_Female/speakers.pth b/model_related/Telugu_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c52de5acfaa6020ce2d0378b2d72d8e5c9a3585 --- /dev/null +++ b/model_related/Telugu_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70be39a4e6967100c91ce142d617d1ee23fbba24bd84141bfc67cc71a3cd6ed8 +size 103993632 diff --git a/model_related/Telugu_Male/speakers.pth b/model_related/Telugu_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..04515025ae3b59fb7346e0d996888783028c9b6a --- /dev/null +++ b/model_related/Telugu_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b419a9b129289c861f6d2f6d8e0f176e9a245879b5a0c43e5b75ccf0ad9ed93d +size 101432480 diff --git a/models/best_model.pth b/models/best_model.pth new file mode 100644 index 0000000000000000000000000000000000000000..4cb7263ec46776fdc6ed0678751ad646a6a7b32e --- /dev/null +++ b/models/best_model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df50d69a41b53bb81037c4ef8dcb264fb379618c1afc958211ff809dcd4d3080 +size 1043587902 diff --git a/models/config.json b/models/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e527058530cffdeb0abb3a72ea55343c51eca27 --- /dev/null +++ b/models/config.json @@ -0,0 +1,662 @@ +{ + "output_path": "/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads", + "logger_uri": null, + "run_name": "yourtts_syspin_baseline", + "project_name": "YourTTS", + "run_description": "\n - Original YourTTS trained using VCTK dataset\n ", + "print_step": 50, + "plot_step": 100, + "model_param_stats": false, + "wandb_entity": null, + "dashboard_logger": "tensorboard", + "save_on_interrupt": true, + "log_model_step": 1000, + "save_step": 10000, + "save_n_checkpoints": 10, + "save_checkpoints": true, + "save_all_best": false, + "save_best_after": 0, + "target_loss": "loss_1", + "print_eval": false, + "test_delay_epochs": 0, + "run_eval": true, + "run_eval_steps": null, + "distributed_backend": "nccl", + "distributed_url": "tcp://localhost:54321", + "mixed_precision": false, + "precision": "fp16", + "epochs": 1000, + "batch_size": 16, + "eval_batch_size": 16, + "grad_clip": [ + 1000.0, + 1000.0 + ], + "scheduler_after_epoch": true, + "lr": 0.001, + "optimizer": "AdamW", + "optimizer_params": { + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "weight_decay": 0.01 + }, + "lr_scheduler": null, + "lr_scheduler_params": {}, + "use_grad_scaler": false, + "allow_tf32": false, + "cudnn_enable": true, + "cudnn_deterministic": false, + "cudnn_benchmark": false, + "training_seed": 54321, + "model": "vits", + "num_loader_workers": 8, + "num_eval_loader_workers": 0, + "use_noise_augment": false, + "audio": { + "fft_size": 1024, + "sample_rate": 22050, + "win_length": 1024, + "hop_length": 256, + "num_mels": 80, + "mel_fmin": 0, + "mel_fmax": null + }, + "use_phonemes": false, + "phonemizer": "espeak", + "phoneme_language": "en", + "compute_input_seq_cache": true, + "text_cleaner": "multilingual_cleaners", + "enable_eos_bos_chars": false, + "test_sentences_file": "", + "phoneme_cache_path": null, + "characters": { + "characters_class": "TTS.tts.models.vits.VitsCharacters", + "vocab_dict": null, + "pad": "_", + "eos": "&", + "bos": "*", + "blank": null, + "characters": "\u0cc8\u092f\u2013\u0951\u0905\u0c1f\u0c2c\u0a8f\ufe0f\u0a82\u099dH\u0c9bM\u2026\u0997\u0926\u0cb5\u099f\u00e8\u0c89\u0917\u0c32\u0914\u09b9\u0c4cY\u0aa2\u0a8d\u0c02\u0c4b\u0c13\u09b0\u09a8\u0aac\u09a5\u0a88\u0c93\u0c47\u0936\u0a9b\u0c09\u0919\u0c2a\u0c17\u099e\u0c0b\u098aD\u0986\u0aa1 \u0ccd\u0a95\u0c12\u0aa0RI\u090a\u0cd5\u091f\u0c97\u0c36\u0cb9\u0ab3\u09b7\u0a9e\u0aaa\u09aa\u0c37\u0cb8j\u0a86Wk\u0a81\u0958\u0937\u0cab\u09be\u095eqn\\\u0ca8\u0ce0\u091b\u09aesz\u09a3\u0995\u0c2b\u0aa3\u0aa6\u00bb1\u09bfA5\u0ca5\u092c\u091d\u09c8\u09a2\u0c9e'\u0ac9\u0c16\u0acc\u0c9c\u0ccc\u0924\u0cc0E\u00e0J,\u0c8bu\u0ca2c]\u0cca\u0caa\u099aT\u0a89\u09ce\u0a98\u0ca6\u0987\u0aa5\u0a97\u0cbf\u0c98\u09c7v\u0ca7\u0901\u0c28\u09dc\u0ac2o\u0ab2\u0c40\u0c25\u0c21\u0c82\u095a\u0c95\u09cd\u0ca0\u0c1e#\u095b\u00e9\u0cb2\u0927\u0948\u0cad\u0959\u0ab6\u093e\u0cac\u0abe\u0acdl\u09df\u0ca3\u0abf\u0ab9\u0988\u09ac\u0a96\u095d\u0909\u090b\u0c31\u0993\u0945\u0983\u0c14\u0ab7\u09f0\u0a8a\u0907\u0c19\u0989\u0c48\u09c0\u0955\u094b\u09cb\u0c1d\u0cc1\u0999\u0c99\u0c2f\u0a93\u0931\u099b\u00e2\"9\u0c92\u0c30\u0998\u0c9aZ\u09a0\u0a90\u00ef\u0c07\u092b\u0c1a\u0cbd\u0cc3(\u0cdeOB2\u09b2\u0ac1\u0c3f\u0ac5\u0922\u0c8a\u0ac8\u09af\u091e\u0ac7\u0c06we\u09f7\u0c35\u0c26\u0932hQ\u0c18a\u0c8fi\u0911\u0943\u092a\u0939\u0c4d\u0c41\u09adP3C4\u098f\u0ca4\u09ab\u0c69\u09c1\u0a9f\u0972\u0970\u0a9a\u0c2e\u0941\u0a83\u0c1bx\u09b8\u0c10r\u0cb6\u092e\u0923\u0925\u0c24\u0c33\u0c38\u0cc2\u0930[\u0942\u0949\u0964\u0a85\u0ccb\u0908\u0cbe\u0cbc*\u0caf\u0aab\u0c42\u0a8b\u0a87\u0c27\u0c3e\u09a1X`\u0c0e\u0954\u0aa8\u0ab8\u093f\u0ac3\u09c3N\u0cb3\u0cb0\u0c96\u0981\u095c\u0a9cL\u0c90\u0cb1\u0aad\u0990\u0910\u0920\u0960\u0947\u09c2\u0c66U\u091aV\u09dd\u0c15\u098b\u0c46\u0ac0G\u0921\u0c9d\u0c88\u09bc\u0c22\u093c\u0c9f\u099c\u092df\u0916\u0c05\u0cc7K\u0a9d\u090f\u09a6?\u0c4a\u0902\u0c94\u0ae0\u0c87\u094a\u0a94\u0c0f\u0985\u0c08\u0abc\u0cc67\u0933\u0918\u00f4\u0928@\u0c8e\u091c\u0915\u0c238\u0996\u0cb7t\u0982\u0c43m\u0c03\u0994\u09cc\u0c39\u0940\u0ab5\u093d\u0a91\u090d\u0c2d\u09b6\u0cae\u0938bF\u0c83\u09a7\u0aa4y\u0935p\u09030{&\u0c0a\u0ca1\u0906\u0ab0\u0acb\u0c86\u094dg\u09a4\u0aae\u0950\u0c1c\u094c\u0aa7\u0934\u0aaf\u0c20\u0c85\u095fSd\u0913", + "punctuations": "!'(),-.:;? ", + "phonemes": "", + "is_unique": true, + "is_sorted": true + }, + "add_blank": true, + "batch_group_size": 32, + "loss_masking": null, + "min_audio_len": 1, + "max_audio_len": Infinity, + "min_text_len": 1, + "max_text_len": Infinity, + "compute_f0": false, + "compute_energy": false, + "compute_linear_spec": true, + "precompute_num_workers": 12, + "start_by_longest": true, + "shuffle": false, + "drop_last": false, + "datasets": [ + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Bengali_Female", + "meta_file_train": "SyspinSpeakers/Bengali_Female.tsv", + "ignored_speakers": null, + "language": "bn", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Chhattisgarhi_Male", + "meta_file_train": "SyspinSpeakers/Chhattisgarhi_Male.tsv", + "ignored_speakers": null, + "language": "hne", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Magahi_Male", + "meta_file_train": "SyspinSpeakers/Magahi_Male.tsv", + "ignored_speakers": null, + "language": "mag", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Marathi_Male", + "meta_file_train": "SyspinSpeakers/Marathi_Male.tsv", + "ignored_speakers": null, + "language": "mr", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Maithili_Female", + "meta_file_train": "SyspinSpeakers/Maithili_Female.tsv", + "ignored_speakers": null, + "language": "mai", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/English_Male", + "meta_file_train": "SyspinSpeakers/English_Male.tsv", + "ignored_speakers": null, + "language": "en", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Chhattisgarhi_Female", + "meta_file_train": "SyspinSpeakers/Chhattisgarhi_Female.tsv", + "ignored_speakers": null, + "language": "hne", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Kannada_Male", + "meta_file_train": "SyspinSpeakers/Kannada_Male.tsv", + "ignored_speakers": null, + "language": "kn", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Bhojpuri_Female", + "meta_file_train": "SyspinSpeakers/Bhojpuri_Female.tsv", + "ignored_speakers": null, + "language": "bho", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Kannada_Female", + "meta_file_train": "SyspinSpeakers/Kannada_Female.tsv", + "ignored_speakers": null, + "language": "kn", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Maithili_Male", + "meta_file_train": "SyspinSpeakers/Maithili_Male.tsv", + "ignored_speakers": null, + "language": "mai", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Telugu_Male", + "meta_file_train": "SyspinSpeakers/Telugu_Male.tsv", + "ignored_speakers": null, + "language": "te", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Telugu_Female", + "meta_file_train": "SyspinSpeakers/Telugu_Female.tsv", + "ignored_speakers": null, + "language": "te", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Bengali_Male", + "meta_file_train": "SyspinSpeakers/Bengali_Male.tsv", + "ignored_speakers": null, + "language": "bn", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Bhojpuri_Male", + "meta_file_train": "SyspinSpeakers/Bhojpuri_Male.tsv", + "ignored_speakers": null, + "language": "bho", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Gujarati_Female", + "meta_file_train": "SyspinSpeakers/Gujarati_Female.tsv", + "ignored_speakers": null, + "language": "gu", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Marathi_Female", + "meta_file_train": "SyspinSpeakers/Marathi_Female.tsv", + "ignored_speakers": null, + "language": "mr", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Hindi_Female", + "meta_file_train": "SyspinSpeakers/Hindi_Female.tsv", + "ignored_speakers": null, + "language": "hi", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Gujarati_Male", + "meta_file_train": "SyspinSpeakers/Gujarati_Male.tsv", + "ignored_speakers": null, + "language": "gu", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Hindi_Male", + "meta_file_train": "SyspinSpeakers/Hindi_Male.tsv", + "ignored_speakers": null, + "language": "hi", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Magahi_Female", + "meta_file_train": "SyspinSpeakers/Magahi_Female.tsv", + "ignored_speakers": null, + "language": "mag", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/English_Female", + "meta_file_train": "SyspinSpeakers/English_Female.tsv", + "ignored_speakers": null, + "language": "en", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + } + ], + "test_sentences": [ + [ + "\u091c\u093e\u092f\u0915\u0935\u093e\u0921\u0940 \u0927\u0930\u0923\u093e\u0924\u0942\u0928 \u0924\u092c\u094d\u092c\u0932 \u0905\u0921\u0940\u091a \u0924\u0947 \u0924\u0940\u0928 \u0932\u093e\u0916 \u0939\u0947\u0915\u094d\u091f\u0930 \u0936\u0947\u0924\u0940\u091a\u094d\u092f\u093e \u0938\u093f\u0902\u091a\u0928\u093e\u0938\u093e\u0920\u0940 \u092a\u093e\u0923\u0940 \u0938\u094b\u0921\u0932\u0902 \u091c\u093e\u0924\u0902", + "Marathi_Male", + null, + "mr" + ], + [ + "\u091c\u093e\u092f\u0915\u0935\u093e\u0921\u0940 \u0927\u0930\u0923\u093e\u0924\u0942\u0928 \u0924\u092c\u094d\u092c\u0932 \u0905\u0921\u0940\u091a \u0924\u0947 \u0924\u0940\u0928 \u0932\u093e\u0916 \u0939\u0947\u0915\u094d\u091f\u0930 \u0936\u0947\u0924\u0940\u091a\u094d\u092f\u093e \u0938\u093f\u0902\u091a\u0928\u093e\u0938\u093e\u0920\u0940 \u092a\u093e\u0923\u0940 \u0938\u094b\u0921\u0932\u0902 \u091c\u093e\u0924\u0902", + "Marathi_Female", + null, + "mr" + ], + [ + "\u0915\u0941\u091b \u0938\u092e\u092f \u092c\u093e\u0926 \u0935\u0947 \u0905\u0938\u0939\u093e\u092c\u0947 \u0938\u0941\u095e\u094d\u095e\u093e\u0939 \u0915\u0947 \u0928\u093e\u092e \u0938\u0947 \u092a\u094d\u0930\u0938\u093f\u0926\u094d\u0927 \u0939\u094b \u0917\u090f\u0964", + "Hindi_Male", + null, + "hi" + ], + [ + "\u0915\u0941\u091b \u0938\u092e\u092f \u092c\u093e\u0926 \u0935\u0947 \u0905\u0938\u0939\u093e\u092c\u0947 \u0938\u0941\u095e\u094d\u095e\u093e\u0939 \u0915\u0947 \u0928\u093e\u092e \u0938\u0947 \u092a\u094d\u0930\u0938\u093f\u0926\u094d\u0927 \u0939\u094b \u0917\u090f\u0964", + "Hindi_Female", + null, + "hi" + ], + [ + "\u0c35\u0c21\u0c4d\u0c30\u0c02\u0c17\u0c3f, \u0c15\u0c4d\u0c37\u0c41\u0c30\u0c15 \u0c35\u0c43\u0c24\u0c4d\u0c24\u0c41\u0c32\u0c32\u0c4b \u0c2a\u0c46\u0c1f\u0c4d\u0c1f\u0c41\u0c2c\u0c21\u0c3f \u0c2a\u0c4d\u0c30\u0c27\u0c3e\u0c28\u0c2e\u0c48 \u0c07\u0c24\u0c30\u0c41\u0c32\u0c41 \u0c15\u0c42\u0c21\u0c3e \u0c08 \u0c35\u0c43\u0c24\u0c4d\u0c24\u0c3f\u0c32\u0c4b \u0c2a\u0c4d\u0c30\u0c35\u0c47\u0c36\u0c3f\u0c02\u0c1a\u0c3f \u0c35\u0c4d\u0c2f\u0c3e\u0c2a\u0c3e\u0c30\u0c02\u0c17\u0c3e \u0c2e\u0c3e\u0c30\u0c4d\u0c1a\u0c47\u0c38\u0c3e\u0c30", + "Telugu_Female", + null, + "te" + ], + [ + "\u0c35\u0c21\u0c4d\u0c30\u0c02\u0c17\u0c3f, \u0c15\u0c4d\u0c37\u0c41\u0c30\u0c15 \u0c35\u0c43\u0c24\u0c4d\u0c24\u0c41\u0c32\u0c32\u0c4b \u0c2a\u0c46\u0c1f\u0c4d\u0c1f\u0c41\u0c2c\u0c21\u0c3f \u0c2a\u0c4d\u0c30\u0c27\u0c3e\u0c28\u0c2e\u0c48 \u0c07\u0c24\u0c30\u0c41\u0c32\u0c41 \u0c15\u0c42\u0c21\u0c3e \u0c08 \u0c35\u0c43\u0c24\u0c4d\u0c24\u0c3f\u0c32\u0c4b \u0c2a\u0c4d\u0c30\u0c35\u0c47\u0c36\u0c3f\u0c02\u0c1a\u0c3f \u0c35\u0c4d\u0c2f\u0c3e\u0c2a\u0c3e\u0c30\u0c02\u0c17\u0c3e \u0c2e\u0c3e\u0c30\u0c4d\u0c1a\u0c47\u0c38\u0c3e\u0c30", + "Telugu_Male", + null, + "te" + ], + [ + "\u0915\u0932\u0915\u0924\u094d\u0924\u093e \u091c\u093e\u092f \u0916\u093e\u0924\u093f\u0930 \u092a\u0941\u0937\u094d\u092a\u093e \u0939 \u0911\u0928\u0932\u093e\u0907\u0928 \u091f\u093f\u0915\u093f\u091f \u0915\u0930\u0935\u093e\u092f \u0930\u093f\u0939\u093f\u0938 \u0939\u0935\u092f", + "Chhattisgarhi_Female", + null, + "hne" + ], + [ + "\u0915\u0932\u0915\u0924\u094d\u0924\u093e \u091c\u093e\u092f \u0916\u093e\u0924\u093f\u0930 \u092a\u0941\u0937\u094d\u092a\u093e \u0939 \u0911\u0928\u0932\u093e\u0907\u0928 \u091f\u093f\u0915\u093f\u091f \u0915\u0930\u0935\u093e\u092f \u0930\u093f\u0939\u093f\u0938 \u0939\u0935\u092f", + "Chhattisgarhi_Male", + null, + "hne" + ], + [ + "\u09a8\u09bf\u09ae\u09cd\u09a8\u09cb\u0995\u09cd\u09a4 \u09b8\u09be\u09b0\u09a3\u09bf \u0985\u09ac\u09b2\u09ae\u09cd\u09ac\u09a8\u09c7 \u09ad\u09be\u09b0\u09a4\u09c0\u09df \u09ac\u09cd\u09af\u09ac\u09b8\u09cd\u09a5\u09be\u09df \u099c\u09c0\u09ac\u09bf\u0995\u09be\u09b0 \u0995\u09be\u09a0\u09be\u09ae\u09cb\u09b0 \u098f\u0995\u099f\u09bf \u09aa\u09be\u0987 \u099a\u09bf\u09a4\u09cd\u09b0 \u09a4\u09c8\u09b0\u09bf \u0995\u09b0\u09cb \u0995\u09c3\u09b7\u09bf \u09ad\u09b0\u09cd\u09a4\u09c1\u0995\u09bf\u09b0 \u09aa\u0995\u09cd\u09b7\u09c7 \u098f\u09ac\u0982 \u09ac\u09bf\u09aa\u0995\u09cd\u09b7\u09c7\u09b0 \u09af\u09c1\u0995\u09cd\u09a4\u09bf\u0997\u09c1\u09b2\u09cb \u09aa\u09b0\u09cd\u09af\u09be\u09b2\u09cb\u099a\u09a8\u09be \u0995\u09b0\u09cb\u0964", + "Bengali_Male", + null, + "bn" + ], + [ + "\u09a8\u09bf\u09ae\u09cd\u09a8\u09cb\u0995\u09cd\u09a4 \u09b8\u09be\u09b0\u09a3\u09bf \u0985\u09ac\u09b2\u09ae\u09cd\u09ac\u09a8\u09c7 \u09ad\u09be\u09b0\u09a4\u09c0\u09df \u09ac\u09cd\u09af\u09ac\u09b8\u09cd\u09a5\u09be\u09df \u099c\u09c0\u09ac\u09bf\u0995\u09be\u09b0 \u0995\u09be\u09a0\u09be\u09ae\u09cb\u09b0 \u098f\u0995\u099f\u09bf \u09aa\u09be\u0987 \u099a\u09bf\u09a4\u09cd\u09b0 \u09a4\u09c8\u09b0\u09bf \u0995\u09b0\u09cb \u0995\u09c3\u09b7\u09bf \u09ad\u09b0\u09cd\u09a4\u09c1\u0995\u09bf\u09b0 \u09aa\u0995\u09cd\u09b7\u09c7 \u098f\u09ac\u0982 \u09ac\u09bf\u09aa\u0995\u09cd\u09b7\u09c7\u09b0 \u09af\u09c1\u0995\u09cd\u09a4\u09bf\u0997\u09c1\u09b2\u09cb \u09aa\u09b0\u09cd\u09af\u09be\u09b2\u09cb\u099a\u09a8\u09be \u0995\u09b0\u09cb\u0964", + "Bengali_Female", + null, + "bn" + ], + [ + "\u0cb9\u0cb8\u0ccd\u0ca6\u0cc7\u0cb5\u0ccd \u0ca8\u0ca6\u0cbf, \u0cb0\u0cbf\u0cb9\u0c82\u0ca1\u0ccd \u0ca8\u0ca6\u0cbf \u0cae\u0ca4\u0ccd\u0ca4\u0cc1 \u0c95\u0ca8\u0ccd\u0cb9\u0cb0\u0ccd \u0ca8\u0ca6\u0cbf\u0c97\u0cb3\u0cc1 \u0cb8\u0cc1\u0cb0\u0ccd\u0c97\u0cc1\u0c9c\u0cbe\u0ca6 \u0cae\u0cc1\u0c96\u0c9c \u0cad\u0cc2\u0cae\u0cbf\u0caf\u0cb2\u0ccd\u0cb2\u0cbf \u0cb9\u0cb0\u0cbf\u0caf\u0cc1\u0ca4\u0ccd\u0ca4\u0cb5\u0cc6.", + "Kannada_Female", + null, + "kn" + ], + [ + "\u0cb9\u0cb8\u0ccd\u0ca6\u0cc7\u0cb5\u0ccd \u0ca8\u0ca6\u0cbf, \u0cb0\u0cbf\u0cb9\u0c82\u0ca1\u0ccd \u0ca8\u0ca6\u0cbf \u0cae\u0ca4\u0ccd\u0ca4\u0cc1 \u0c95\u0ca8\u0ccd\u0cb9\u0cb0\u0ccd \u0ca8\u0ca6\u0cbf\u0c97\u0cb3\u0cc1 \u0cb8\u0cc1\u0cb0\u0ccd\u0c97\u0cc1\u0c9c\u0cbe\u0ca6 \u0cae\u0cc1\u0c96\u0c9c \u0cad\u0cc2\u0cae\u0cbf\u0caf\u0cb2\u0ccd\u0cb2\u0cbf \u0cb9\u0cb0\u0cbf\u0caf\u0cc1\u0ca4\u0ccd\u0ca4\u0cb5\u0cc6.", + "Kannada_Male", + null, + "kn" + ], + [ + "the russian leader added that the united states and russia have a common interest in preventing nuclear proliferation, in north korea and elsewhere", + "English_Male", + null, + "en" + ], + [ + "the russian leader added that the united states and russia have a common interest in preventing nuclear proliferation, in north korea and elsewhere", + "English_Female", + null, + "en" + ], + [ + "\u0915\u093e\u0928\u0942\u0928 \u0915\u0947 \u0936\u093e\u0938\u0928 \u0915\u0947 \u0938\u0916\u094d\u0924 \u0905\u0928\u0941\u092a\u093e\u0932\u0928 \u0932\u094b\u0915\u0924\u0902\u0924\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0906\u0932\u094b\u091a\u0928\u093e \u092e\u0947\u0902 \u0938\u0947 \u090f\u0917\u094b \u0939\u0908", + "Magahi_Male", + null, + "mag" + ], + [ + "\u0915\u093e\u0928\u0942\u0928 \u0915\u0947 \u0936\u093e\u0938\u0928 \u0915\u0947 \u0938\u0916\u094d\u0924 \u0905\u0928\u0941\u092a\u093e\u0932\u0928 \u0932\u094b\u0915\u0924\u0902\u0924\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0906\u0932\u094b\u091a\u0928\u093e \u092e\u0947\u0902 \u0938\u0947 \u090f\u0917\u094b \u0939\u0908", + "Magahi_Female", + null, + "mag" + ], + [ + "\u092a\u093e\u0915\u0936\u093e\u0932\u093e \u0938\u0902\u092c\u0902\u0927\u0940 \u0915\u0932\u093e \u092e\u0947\u0902 \u092d\u094b\u091c\u0928 \u092a\u0915\u093e\u092c\u0948 \u0938\u093d \u0932\u092f \u0915\u093d \u0913\u0915\u0930 \u0915\u0941\u0936\u0932 \u092a\u094d\u0930\u092c\u0902\u0927\u0928 \u0914\u0930 \u092d\u093e\u0928\u0938 \u0918\u0930\u093d\u0915 \u0909\u091a\u093f\u0924 \u0930\u0916 \u0930\u0916\u093e\u0935 \u0938\u0947\u0939\u094b \u0936\u093e\u092e\u093f\u0932 \u091b\u0948\u0915", + "Maithili_Female", + null, + "mai" + ], + [ + "\u092a\u093e\u0915\u0936\u093e\u0932\u093e \u0938\u0902\u092c\u0902\u0927\u0940 \u0915\u0932\u093e \u092e\u0947\u0902 \u092d\u094b\u091c\u0928 \u092a\u0915\u093e\u092c\u0948 \u0938\u093d \u0932\u092f \u0915\u093d \u0913\u0915\u0930 \u0915\u0941\u0936\u0932 \u092a\u094d\u0930\u092c\u0902\u0927\u0928 \u0914\u0930 \u092d\u093e\u0928\u0938 \u0918\u0930\u093d\u0915 \u0909\u091a\u093f\u0924 \u0930\u0916 \u0930\u0916\u093e\u0935 \u0938\u0947\u0939\u094b \u0936\u093e\u092e\u093f\u0932 \u091b\u0948\u0915", + "Maithili_Male", + null, + "mai" + ], + [ + "\u0aa4\u0acd\u0aaf\u0abe\u0ab0\u0aac\u0abe\u0aa6 \u0aaa\u0abe\u0a9f\u0ac0\u0aa6\u0abe\u0ab0\u0acb \u0aa6\u0acd\u0ab5\u0abe\u0ab0\u0abe \u0a85\u0aa8\u0ac7\u0a95 \u0aa8\u0abe\u0aa8\u0abe\u0aae\u0acb\u0a9f\u0abe \u0a95\u0abe\u0ab0\u0acd\u0aaf\u0a95\u0acd\u0ab0\u0aae\u0acb \u0aaf\u0acb\u0a9c\u0ab5\u0abe\u0aae\u0abe\u0a82 \u0a86\u0ab5\u0aa4\u0abe \u0ab9\u0aa4\u0abe \u0a9c\u0acb\u0a95\u0ac7 \u0a8f\u0a95 \u0aaa\u0aa3 \u0a95\u0abe\u0ab0\u0acd\u0aaf\u0a95\u0acd\u0ab0\u0aae\u0aa8\u0ac7 \u0aa4\u0a82\u0aa4\u0acd\u0ab0 \u0aa6\u0acd\u0ab5\u0abe\u0ab0\u0abe \u0aae\u0a82\u0a9c\u0ac2\u0ab0\u0ac0 \u0a86\u0aaa\u0ab5\u0abe\u0aae\u0abe\u0a82 \u0a86\u0ab5\u0aa4\u0ac0 \u0aa8\u0ab9\u0acb\u0aa4", + "Gujarati_Female", + null, + "gu" + ], + [ + "\u0aa4\u0acd\u0aaf\u0abe\u0ab0\u0aac\u0abe\u0aa6 \u0aaa\u0abe\u0a9f\u0ac0\u0aa6\u0abe\u0ab0\u0acb \u0aa6\u0acd\u0ab5\u0abe\u0ab0\u0abe \u0a85\u0aa8\u0ac7\u0a95 \u0aa8\u0abe\u0aa8\u0abe\u0aae\u0acb\u0a9f\u0abe \u0a95\u0abe\u0ab0\u0acd\u0aaf\u0a95\u0acd\u0ab0\u0aae\u0acb \u0aaf\u0acb\u0a9c\u0ab5\u0abe\u0aae\u0abe\u0a82 \u0a86\u0ab5\u0aa4\u0abe \u0ab9\u0aa4\u0abe \u0a9c\u0acb\u0a95\u0ac7 \u0a8f\u0a95 \u0aaa\u0aa3 \u0a95\u0abe\u0ab0\u0acd\u0aaf\u0a95\u0acd\u0ab0\u0aae\u0aa8\u0ac7 \u0aa4\u0a82\u0aa4\u0acd\u0ab0 \u0aa6\u0acd\u0ab5\u0abe\u0ab0\u0abe \u0aae\u0a82\u0a9c\u0ac2\u0ab0\u0ac0 \u0a86\u0aaa\u0ab5\u0abe\u0aae\u0abe\u0a82 \u0a86\u0ab5\u0aa4\u0ac0 \u0aa8\u0ab9\u0acb\u0aa4", + "Gujarati_Male", + null, + "gu" + ], + [ + "\u090f\u0928\u094d\u091f\u094d\u0930\u093e\u092a\u0940 \u0915\u0902\u092a\u094d\u092f\u0942\u091f\u093f\u0902\u0917 \u092e\u0947\u0902 \u090f\u0928\u094d\u091f\u094d\u0930\u094b\u092a\u0940 \u090a \u0911\u092a\u0930\u0947\u091f\u093f\u0902\u0917 \u0938\u093f\u0938\u094d\u091f\u092e \u0939 \u091c\u0947 \u092a\u0947 \u0938\u0930\u093e \u0915\u094d\u0930\u093f\u092a\u094d\u091f\u094b\u0917\u094d\u0930\u093e\u092b\u093f\u0915 \u092b\u0902\u0915\u094d\u0936\u0928 \u0938\u092c \u0915\u093e\u092e \u0915\u0930\u0947 \u0932\u0947\u0902", + "Bhojpuri_Male", + null, + "bho" + ], + [ + "\u090f\u0928\u094d\u091f\u094d\u0930\u093e\u092a\u0940 \u0915\u0902\u092a\u094d\u092f\u0942\u091f\u093f\u0902\u0917 \u092e\u0947\u0902 \u090f\u0928\u094d\u091f\u094d\u0930\u094b\u092a\u0940 \u090a \u0911\u092a\u0930\u0947\u091f\u093f\u0902\u0917 \u0938\u093f\u0938\u094d\u091f\u092e \u0939 \u091c\u0947 \u092a\u0947 \u0938\u0930\u093e \u0915\u094d\u0930\u093f\u092a\u094d\u091f\u094b\u0917\u094d\u0930\u093e\u092b\u093f\u0915 \u092b\u0902\u0915\u094d\u0936\u0928 \u0938\u092c \u0915\u093e\u092e \u0915\u0930\u0947 \u0932\u0947\u0902", + "Bhojpuri_Female", + null, + "bho" + ] + ], + "eval_split_max_size": 256, + "eval_split_size": 0.01, + "use_speaker_weighted_sampler": false, + "speaker_weighted_sampler_alpha": 1.0, + "use_language_weighted_sampler": false, + "language_weighted_sampler_alpha": 1.0, + "use_length_weighted_sampler": false, + "length_weighted_sampler_alpha": 1.0, + "model_args": { + "num_chars": 444, + "out_channels": 513, + "spec_segment_size": 32, + "hidden_channels": 192, + "hidden_channels_ffn_text_encoder": 768, + "num_heads_text_encoder": 2, + "num_layers_text_encoder": 10, + "kernel_size_text_encoder": 3, + "dropout_p_text_encoder": 0.1, + "dropout_p_duration_predictor": 0.5, + "kernel_size_posterior_encoder": 5, + "dilation_rate_posterior_encoder": 1, + "num_layers_posterior_encoder": 16, + "kernel_size_flow": 5, + "dilation_rate_flow": 1, + "num_layers_flow": 4, + "resblock_type_decoder": "2", + "resblock_kernel_sizes_decoder": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes_decoder": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates_decoder": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel_decoder": 512, + "upsample_kernel_sizes_decoder": [ + 16, + 16, + 4, + 4 + ], + "periods_multi_period_discriminator": [ + 2, + 3, + 5, + 7, + 11 + ], + "use_sdp": true, + "noise_scale": 1.0, + "inference_noise_scale": 0.667, + "length_scale": 1.0, + "noise_scale_dp": 1.0, + "inference_noise_scale_dp": 1.0, + "max_inference_len": null, + "init_discriminator": true, + "use_spectral_norm_disriminator": false, + "use_speaker_embedding": false, + "num_speakers": 0, + "speakers_file": "/app/models/speakers.pth", + "d_vector_file": [ + "model_related/Bengali_Female/speakers.pth", + "model_related/Chhattisgarhi_Male/speakers.pth", + "model_related/Magahi_Male/speakers.pth", + "model_related/Marathi_Male/speakers.pth", + "model_related/Maithili_Female/speakers.pth", + "model_related/English_Male/speakers.pth", + "model_related/Chhattisgarhi_Female/speakers.pth", + "model_related/Kannada_Male/speakers.pth", + "model_related/Bhojpuri_Female/speakers.pth", + "model_related/Kannada_Female/speakers.pth", + "model_related/Maithili_Male/speakers.pth", + "model_related/Telugu_Male/speakers.pth", + "model_related/Telugu_Female/speakers.pth", + "model_related/Bengali_Male/speakers.pth", + "model_related/Bhojpuri_Male/speakers.pth", + "model_related/Gujarati_Female/speakers.pth", + "model_related/Marathi_Female/speakers.pth", + "model_related/Hindi_Female/speakers.pth", + "model_related/Gujarati_Male/speakers.pth", + "model_related/Hindi_Male/speakers.pth", + "model_related/Magahi_Female/speakers.pth", + "model_related/English_Female/speakers.pth" + ], + "speaker_embedding_channels": 256, + "use_d_vector_file": true, + "d_vector_dim": 512, + "detach_dp_input": true, + "use_language_embedding": true, + "embedded_language_dim": 4, + "num_languages": 11, + "language_ids_file": "/app/models/language_ids.json", + "use_speaker_encoder_as_loss": false, + "speaker_encoder_config_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json", + "speaker_encoder_model_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar", + "condition_dp_on_speaker": true, + "freeze_encoder": false, + "freeze_DP": false, + "freeze_PE": false, + "freeze_flow_decoder": false, + "freeze_waveform_decoder": false, + "encoder_sample_rate": null, + "interpolate_z": true, + "reinit_DP": false, + "reinit_text_encoder": false + }, + "lr_gen": 0.0002, + "lr_disc": 0.0002, + "lr_scheduler_gen": "ExponentialLR", + "lr_scheduler_gen_params": { + "gamma": 0.999875, + "last_epoch": -1 + }, + "lr_scheduler_disc": "ExponentialLR", + "lr_scheduler_disc_params": { + "gamma": 0.999875, + "last_epoch": -1 + }, + "kl_loss_alpha": 1.0, + "disc_loss_alpha": 1.0, + "gen_loss_alpha": 1.0, + "feat_loss_alpha": 1.0, + "mel_loss_alpha": 45.0, + "dur_loss_alpha": 1.0, + "speaker_encoder_loss_alpha": 9.0, + "return_wav": true, + "use_weighted_sampler": true, + "weighted_sampler_attrs": {}, + "weighted_sampler_multipliers": {}, + "r": 1, + "num_speakers": 0, + "use_speaker_embedding": false, + "speakers_file": "/app/models/speakers.pth", + "speaker_embedding_channels": 256, + "language_ids_file": "/app/models/language_ids.json", + "use_language_embedding": true, + "use_d_vector_file": true, + "d_vector_file": [ + "model_related/Bengali_Female/speakers.pth", + "model_related/Chhattisgarhi_Male/speakers.pth", + "model_related/Magahi_Male/speakers.pth", + "model_related/Marathi_Male/speakers.pth", + "model_related/Maithili_Female/speakers.pth", + "model_related/English_Male/speakers.pth", + "model_related/Chhattisgarhi_Female/speakers.pth", + "model_related/Kannada_Male/speakers.pth", + "model_related/Bhojpuri_Female/speakers.pth", + "model_related/Kannada_Female/speakers.pth", + "model_related/Maithili_Male/speakers.pth", + "model_related/Telugu_Male/speakers.pth", + "model_related/Telugu_Female/speakers.pth", + "model_related/Bengali_Male/speakers.pth", + "model_related/Bhojpuri_Male/speakers.pth", + "model_related/Gujarati_Female/speakers.pth", + "model_related/Marathi_Female/speakers.pth", + "model_related/Hindi_Female/speakers.pth", + "model_related/Gujarati_Male/speakers.pth", + "model_related/Hindi_Male/speakers.pth", + "model_related/Magahi_Female/speakers.pth", + "model_related/English_Female/speakers.pth" + ], + "d_vector_dim": 512 +} \ No newline at end of file diff --git a/models/language_ids.json b/models/language_ids.json new file mode 100644 index 0000000000000000000000000000000000000000..0508531c0f7c0eb580fddf43f74151156927f1af --- /dev/null +++ b/models/language_ids.json @@ -0,0 +1,13 @@ +{ + "bho": 0, + "bn": 1, + "en": 2, + "gu": 3, + "hi": 4, + "hne": 5, + "kn": 6, + "mag": 7, + "mai": 8, + "mr": 9, + "te": 10 +} \ No newline at end of file diff --git a/models/speakers.pth b/models/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..c5ec4596175b333e0dd9e00b3395225b4a0e89e7 --- /dev/null +++ b/models/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cbdf9efca833df8eb54f7366f8a6a21097b28bea6b1490b11277ea41716a6cc +size 1312