diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..383fe4dab926a398560bd5e1083f9b73d2d15ded
Binary files /dev/null and b/.DS_Store differ
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..c3dad975c4f9dfbbf8c63d6058e378389d4e7cc4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+# Ignore generated audio + specs
+outputs/
+*.wav
+Voicetech API Specification.pdf
diff --git a/Procfile b/Procfile
new file mode 100644
index 0000000000000000000000000000000000000000..464322058406a4605d0adaa32b99b155ad4ec065
--- /dev/null
+++ b/Procfile
@@ -0,0 +1 @@
+web: python download_models.py && uvicorn src.api:app --host 0.0.0.0 --port ${PORT:-8000}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0ba4dc42a9a89fc33fb1d84eaeac0213100146a4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,246 @@
+# Voice Tech for All - Multi-lingual TTS System
+
+A lightweight, multi-lingual Text-to-Speech system supporting **11 Indian languages** with **style/prosody control** and REST API.
+
+## 🎯 Hackathon: Voice Tech for All
+
+Built for the healthcare assistant use case - helping pregnant mothers in low-income communities access healthcare information in their native languages.
+
+## ✨ Features
+
+- **11 Indian Languages**: Hindi, Bengali, Marathi, Telugu, Kannada, Bhojpuri, Chhattisgarhi, Maithili, Magahi, English, **Gujarati**
+- **21 Voice Options**: Male & Female voices for each language
+- **Style/Prosody Control**: 9 presets (happy, sad, calm, excited, etc.)
+- **Pitch & Speed Control**: Fine-tune voice characteristics
+- **Lightweight**: VITS-based models optimized for fast inference
+- **REST API**: FastAPI-powered server with OpenAPI docs
+- **Text Normalization**: Handles numbers, punctuation for Indian scripts
+
+## 🚀 Quick Start
+
+### 1. Installation
+
+```bash
+# Clone and navigate
+git clone https://github.com/harshil748/VoiceAPI
+cd VoiceAPI
+
+# Create virtual environment
+python3 -m venv tts
+source tts/bin/activate
+
+# Install dependencies
+pip install -r requirements.txt
+```
+
+### 2. Download Models
+
+```bash
+# Download Hindi models (male + female)
+python -m src.cli download --lang hi
+
+# Or download a specific voice
+python -m src.cli download --voice hi_male
+
+# Gujarati uses Facebook MMS (auto-downloads on first use)
+```
+
+### 3. Synthesize Speech
+
+```bash
+# Basic synthesis
+python -m src.cli synthesize --text "नमस्ते दोस्तों" --voice hi_male --output hello.wav
+
+# Play the audio (macOS)
+afplay hello.wav
+```
+
+### 4. Start API Server
+
+```bash
+python -m src.cli serve --port 8000
+```
+
+Visit `http://localhost:8000/docs` for interactive API documentation.
+
+## 🎨 Style Presets
+
+| Preset    | Speed | Pitch | Energy | Best For                |
+| --------- | ----- | ----- | ------ | ----------------------- |
+| `default` | 1.0   | 1.0   | 1.0    | Normal speech           |
+| `slow`    | 0.75  | 1.0   | 1.0    | Elderly users, clarity  |
+| `fast`    | 1.25  | 1.0   | 1.0    | Quick information       |
+| `soft`    | 0.9   | 0.95  | 0.7    | Calming content         |
+| `loud`    | 1.0   | 1.05  | 1.3    | Alerts, emphasis        |
+| `happy`   | 1.1   | 1.1   | 1.2    | Positive messages       |
+| `sad`     | 0.85  | 0.9   | 0.8    | Empathetic responses    |
+| `calm`    | 0.9   | 0.95  | 0.85   | **Healthcare guidance** |
+| `excited` | 1.2   | 1.15  | 1.3    | Celebrations            |
+
+## 📡 API Usage
+
+### 🏆 Hackathon API - GET /Get_Inference
+
+**This is the official hackathon endpoint** that follows the Voice Tech for All specification:
+
+```python
+import requests
+
+base_url = 'http://localhost:8000/Get_Inference'
+WavPath = 'path/to/reference.wav'
+
+params = {
+    'text': 'ಮಾದರಿಯು ಸರಿಯಾಗಿ ಕಾರ್ಯನಿರ್ವಹಿಸುತ್ತಿದೆಯೇ ಎಂದು ಖಚಿತಪಡಿಸಿಕೊಳ್ಳಲು ಬಳಸಲಾಗುವ ಪರೀಕ್ಷಾ ವಾಕ್ಯ ಇದು.',
+    'lang': 'kannada',
+}
+
+with open(WavPath, "rb") as AudioFile:
+    response = requests.get(base_url, params=params, files={'speaker_wav': AudioFile})
+
+if response.status_code == 200:
+    with open('output.wav', 'wb') as f:
+        f.write(response.content)
+    print("Audio saved as 'output.wav'")
+```
+
+**Query Parameters:**
+
+| Parameter     | Type   | Required  | Description                                                                                                      |
+| ------------- | ------ | --------- | ---------------------------------------------------------------------------------------------------------------- |
+| `text`        | string | Mandatory | Input text to convert to speech. For English, text must be lowercase.                                            |
+| `lang`        | string | Mandatory | Language: bhojpuri, bengali, english, gujarati, hindi, chhattisgarhi, kannada, magahi, maithili, marathi, telugu |
+| `speaker_wav` | file   | Mandatory | Reference WAV file for speaker voice                                                                             |
+
+**Response:** `200 OK` with `Content-Type: audio/wav`
+
+---
+
+### Synthesize with Style (POST)
+
+```bash
+curl -X POST "http://localhost:8000/synthesize" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "text": "आपका दिन शुभ हो",
+    "voice": "hi_female",
+    "style": "happy",
+    "speed": 1.0,
+    "pitch": 1.0
+  }' \
+  --output speech.wav
+```
+
+### Gujarati Synthesis
+
+```bash
+curl -X POST "http://localhost:8000/synthesize" \
+  -H "Content-Type: application/json" \
+  -d '{"text": "નમસ્તે, કેમ છો?", "voice": "gu_mms", "style": "calm"}' \
+  --output gujarati.wav
+```
+
+### List Style Presets
+
+```bash
+curl http://localhost:8000/styles
+```
+
+## 🎤 Available Voices
+
+| Language      | Code | Male        | Female        | Notes        |
+| ------------- | ---- | ----------- | ------------- | ------------ |
+| Hindi         | hi   | ✅ hi_male  | ✅ hi_female  | SYSPIN       |
+| Bengali       | bn   | ✅ bn_male  | ✅ bn_female  | SYSPIN       |
+| Marathi       | mr   | ✅ mr_male  | ✅ mr_female  | SYSPIN       |
+| Telugu        | te   | ✅ te_male  | ✅ te_female  | SYSPIN       |
+| Kannada       | kn   | ✅ kn_male  | ✅ kn_female  | SYSPIN       |
+| Bhojpuri      | bho  | ✅ bho_male | ✅ bho_female | SYSPIN       |
+| Chhattisgarhi | hne  | ✅ hne_male | ✅ hne_female | SYSPIN       |
+| Maithili      | mai  | ✅ mai_male | ✅ mai_female | SYSPIN       |
+| Magahi        | mag  | ✅ mag_male | ✅ mag_female | SYSPIN       |
+| English       | en   | ✅ en_male  | ✅ en_female  | SYSPIN       |
+| **Gujarati**  | gu   | ✅ gu_mms   | -             | Facebook MMS |
+
+## 🐍 Python API
+
+```python
+from src.engine import TTSEngine
+
+# Initialize engine
+engine = TTSEngine(device="auto")
+
+# Basic synthesis
+output = engine.synthesize(
+    text="गर्भावस्था में स्वस्थ आहार महत्वपूर्ण है",
+    voice="hi_female"
+)
+
+# With style control
+output = engine.synthesize(
+    text="आपका दिन शुभ हो",
+    voice="hi_male",
+    style="happy",      # Use preset
+    pitch=1.1,          # Or manual control
+    speed=1.0,
+    energy=1.2
+)
+
+# Gujarati
+output = engine.synthesize(
+    text="સ્વસ્થ રહો, ખુશ રહો",
+    voice="gu_mms",
+    style="calm"
+)
+
+# Save to file
+engine.synthesize_to_file(
+    text="નમસ્તે",
+    output_path="hello.wav",
+    voice="gu_mms",
+    style="calm"
+)
+```
+
+## 📁 Project Structure
+
+```text
+VoiceAPI/
+├── src/
+│   ├── config.py      # Language/voice/style configurations
+│   ├── tokenizer.py   # Text tokenization & normalization
+│   ├── engine.py      # Main TTS engine with style processor
+│   ├── downloader.py  # HuggingFace model downloader
+│   ├── api.py         # FastAPI REST server
+│   └── cli.py         # Command-line interface
+├── models/            # Downloaded models
+├── dataset/           # SPICOR dataset (for fine-tuning)
+├── technical_report.md
+├── requirements.txt
+└── README.md
+```
+
+## 📊 Performance
+
+| Metric         | Value                           |
+| -------------- | ------------------------------- |
+| Languages      | 11                              |
+| Voice Variants | 21                              |
+| Style Presets  | 9                               |
+| Model Size     | ~300MB (VITS), ~145MB (MMS)     |
+| Inference Time | ~0.3s (M2 Mac, CPU)             |
+| Sample Rate    | 22050 Hz (VITS), 16000 Hz (MMS) |
+
+## 🙏 Credits
+
+- **SYSPIN Models**: [IISc Bangalore](https://huggingface.co/SYSPIN)
+- **MMS Models**: [Facebook Research](https://huggingface.co/facebook/mms-tts-guj)
+- **Architecture**: VITS (Coqui AI)
+- **Dataset**: SPICOR TTS Project, IISc SPIRE Lab
+
+## 📜 License
+
+CC BY 4.0 (SYSPIN), CC BY-NC 4.0 (MMS)
+
+---
+
+Built with ❤️ for **Voice Tech for All Hackathon**
diff --git a/download_models.py b/download_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..e80e448751f37f7ce10ff40c94de192188461998
--- /dev/null
+++ b/download_models.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+"""
+Download all required TTS models from HuggingFace
+Run this on deployment to fetch models before starting the server
+"""
+
+import os
+import sys
+
+# Add src to path
+sys.path.insert(0, os.path.dirname(__file__))
+
+from src.downloader import ModelDownloader
+from src.config import LANGUAGE_CONFIGS
+
+
+def main():
+    print("=" * 60)
+    print("Downloading TTS Models from HuggingFace...")
+    print("=" * 60)
+
+    downloader = ModelDownloader()
+
+    # Download all configured models
+    voices = list(LANGUAGE_CONFIGS.keys())
+    print(f"\nModels to download: {len(voices)}")
+    for v in voices:
+        print(f"  - {v}")
+
+    print("\n")
+
+    success = 0
+    failed = []
+
+    for voice in voices:
+        try:
+            print(f"Downloading {voice}...")
+            downloader.download_model(voice)
+            success += 1
+            print(f"  ✓ {voice} downloaded\n")
+        except Exception as e:
+            print(f"  ✗ {voice} failed: {e}\n")
+            failed.append(voice)
+
+    print("=" * 60)
+    print(f"Download complete: {success}/{len(voices)} models")
+    if failed:
+        print(f"Failed: {', '.join(failed)}")
+        return 1
+    print("=" * 60)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/models/.DS_Store b/models/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..dd1a176ef704ae26c7296ecc13817dd4a541fde2
Binary files /dev/null and b/models/.DS_Store differ
diff --git a/models/bho_female/.gitattributes b/models/bho_female/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/bho_female/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/bho_female/README.md b/models/bho_female/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/bho_female/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/bho_female/checkpoint_340000.pth b/models/bho_female/checkpoint_340000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8948e3dc7253f6b609a63abfdf085e07330a740d
--- /dev/null
+++ b/models/bho_female/checkpoint_340000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2182258024b05f739bf79002cb52cfa863605d54ee2eee5b4a5cd1fbaac797ab
+size 997764677
diff --git a/models/bho_female/config.json b/models/bho_female/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eef8cc870e9c27cf692af980a0a66cc220db8ca4
--- /dev/null
+++ b/models/bho_female/config.json
@@ -0,0 +1,257 @@
+{
+    "output_path": ".",
+    "logger_uri": null,
+    "run_name": "vits_Bhojpuri_Female_30hrs",
+    "project_name": null,
+    "run_description": "\ud83d\udc38Coqui trainer run.",
+    "print_step": 25,
+    "plot_step": 100,
+    "model_param_stats": false,
+    "wandb_entity": null,
+    "dashboard_logger": "tensorboard",
+    "log_model_step": null,
+    "save_step": 20000,
+    "save_n_checkpoints": 1000,
+    "save_checkpoints": true,
+    "save_all_best": false,
+    "save_best_after": 10000,
+    "target_loss": null,
+    "print_eval": true,
+    "test_delay_epochs": -1,
+    "run_eval": true,
+    "run_eval_steps": null,
+    "distributed_backend": "nccl",
+    "distributed_url": "tcp://localhost:54321",
+    "mixed_precision": true,
+    "epochs": 1000,
+    "batch_size": 40,
+    "eval_batch_size": 16,
+    "grad_clip": [
+        1000,
+        1000
+    ],
+    "scheduler_after_epoch": true,
+    "lr": 0.001,
+    "optimizer": "AdamW",
+    "optimizer_params": {
+        "betas": [
+            0.8,
+            0.99
+        ],
+        "eps": 1e-09,
+        "weight_decay": 0.01
+    },
+    "lr_scheduler": null,
+    "lr_scheduler_params": {},
+    "use_grad_scaler": false,
+    "cudnn_enable": true,
+    "cudnn_deterministic": false,
+    "cudnn_benchmark": false,
+    "training_seed": 54321,
+    "model": "vits",
+    "num_loader_workers": 8,
+    "num_eval_loader_workers": 4,
+    "use_noise_augment": false,
+    "audio": {
+        "fft_size": 1024,
+        "sample_rate": 22050,
+        "win_length": 1024,
+        "hop_length": 256,
+        "num_mels": 80,
+        "mel_fmin": 0,
+        "mel_fmax": null
+    },
+    "use_phonemes": false,
+    "phonemizer": null,
+    "phoneme_language": "en-us",
+    "compute_input_seq_cache": true,
+    "text_cleaner": "multilingual_cleaners",
+    "enable_eos_bos_chars": false,
+    "test_sentences_file": "",
+    "phoneme_cache_path": "./phoneme_cache",
+    "characters": {
+        "characters_class": "TTS.tts.models.vits.VitsCharacters",
+        "vocab_dict": null,
+        "pad": "<PAD>",
+        "eos": "<EOS>",
+        "bos": "<BOS>",
+        "blank": "<BLNK>",
+        "characters": "\u091a.\u0947\u0910\u0925\u092e\u0959\u091d\u0906\u0949?\u092d\u092a \u0939\u0928\u093d\u091f\u0940\u0938\u0935\u091b\u0923\u0921\u091e\u0926\u094b\u0915\u0924\u0948\u0943\u095b\u0941\u095e\u092c\u0908\u094c\u0927\u090b\u093e\u0922\u0907\u093c\u0902\u0937\u0920\u0905\u095c\u0913\u092f,\u093f\u0930\u0914\u0901\u092b\u0909\u0916\u0911\u094d\u0932\u091c\u090f\u090a\u0917\u0936\u095d\u0919\u0918\u0942",
+        "punctuations": "!\u00a1'(),-.:;\u00bf? ",
+        "phonemes": null,
+        "is_unique": true,
+        "is_sorted": true
+    },
+    "add_blank": true,
+    "batch_group_size": 5,
+    "loss_masking": null,
+    "min_audio_len": 1,
+    "max_audio_len": Infinity,
+    "min_text_len": 1,
+    "max_text_len": Infinity,
+    "compute_f0": false,
+    "compute_energy": false,
+    "compute_linear_spec": true,
+    "precompute_num_workers": 0,
+    "start_by_longest": false,
+    "shuffle": false,
+    "drop_last": false,
+    "datasets": [
+        {
+            "formatter": "syspin",
+            "dataset_name": "",
+            "path": ".",
+            "meta_file_train": "../manifests/Bhojpuri_Female/30hrs.tsv",
+            "ignored_speakers": null,
+            "language": "",
+            "phonemizer": "",
+            "meta_file_val": "",
+            "meta_file_attn_mask": ""
+        }
+    ],
+    "test_sentences": [
+        [
+            "\u090f\u0928\u094d\u091f\u094d\u0930\u093e\u092a\u0940 \u0915\u0902\u092a\u094d\u092f\u0942\u091f\u093f\u0902\u0917 \u092e\u0947\u0902 \u090f\u0928\u094d\u091f\u094d\u0930\u094b\u092a\u0940 \u090a \u0911\u092a\u0930\u0947\u091f\u093f\u0902\u0917 \u0938\u093f\u0938\u094d\u091f\u092e \u0939 \u091c\u0947 \u092a\u0947 \u0938\u0930\u093e \u0915\u094d\u0930\u093f\u092a\u094d\u091f\u094b\u0917\u094d\u0930\u093e\u092b\u093f\u0915 \u092b\u0902\u0915\u094d\u0936\u0928 \u0938\u092c \u0915\u093e\u092e \u0915\u0930\u0947 \u0932\u0947\u0902",
+            "Bhojpuri_Female",
+            null,
+            "bh"
+        ]
+    ],
+    "eval_split_max_size": null,
+    "eval_split_size": 0.01,
+    "use_speaker_weighted_sampler": false,
+    "speaker_weighted_sampler_alpha": 1.0,
+    "use_language_weighted_sampler": false,
+    "language_weighted_sampler_alpha": 1.0,
+    "use_length_weighted_sampler": false,
+    "length_weighted_sampler_alpha": 1.0,
+    "model_args": {
+        "num_chars": 85,
+        "out_channels": 513,
+        "spec_segment_size": 32,
+        "hidden_channels": 192,
+        "hidden_channels_ffn_text_encoder": 768,
+        "num_heads_text_encoder": 2,
+        "num_layers_text_encoder": 6,
+        "kernel_size_text_encoder": 3,
+        "dropout_p_text_encoder": 0.1,
+        "dropout_p_duration_predictor": 0.5,
+        "kernel_size_posterior_encoder": 5,
+        "dilation_rate_posterior_encoder": 1,
+        "num_layers_posterior_encoder": 16,
+        "kernel_size_flow": 5,
+        "dilation_rate_flow": 1,
+        "num_layers_flow": 4,
+        "resblock_type_decoder": "1",
+        "resblock_kernel_sizes_decoder": [
+            3,
+            7,
+            11
+        ],
+        "resblock_dilation_sizes_decoder": [
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ]
+        ],
+        "upsample_rates_decoder": [
+            8,
+            8,
+            2,
+            2
+        ],
+        "upsample_initial_channel_decoder": 512,
+        "upsample_kernel_sizes_decoder": [
+            16,
+            16,
+            4,
+            4
+        ],
+        "periods_multi_period_discriminator": [
+            2,
+            3,
+            5,
+            7,
+            11
+        ],
+        "use_sdp": true,
+        "noise_scale": 1.0,
+        "inference_noise_scale": 0.667,
+        "length_scale": 1,
+        "noise_scale_dp": 1.0,
+        "inference_noise_scale_dp": 1.0,
+        "max_inference_len": null,
+        "init_discriminator": true,
+        "use_spectral_norm_disriminator": false,
+        "use_speaker_embedding": false,
+        "num_speakers": 0,
+        "speakers_file": null,
+        "d_vector_file": null,
+        "speaker_embedding_channels": 256,
+        "use_d_vector_file": false,
+        "d_vector_dim": 0,
+        "detach_dp_input": true,
+        "use_language_embedding": false,
+        "embedded_language_dim": 4,
+        "num_languages": 0,
+        "language_ids_file": null,
+        "use_speaker_encoder_as_loss": false,
+        "speaker_encoder_config_path": "",
+        "speaker_encoder_model_path": "",
+        "condition_dp_on_speaker": true,
+        "freeze_encoder": false,
+        "freeze_DP": false,
+        "freeze_PE": false,
+        "freeze_flow_decoder": false,
+        "freeze_waveform_decoder": false,
+        "encoder_sample_rate": null,
+        "interpolate_z": true,
+        "reinit_DP": false,
+        "reinit_text_encoder": false
+    },
+    "lr_gen": 0.0002,
+    "lr_disc": 0.0002,
+    "lr_scheduler_gen": "ExponentialLR",
+    "lr_scheduler_gen_params": {
+        "gamma": 0.999875,
+        "last_epoch": -1
+    },
+    "lr_scheduler_disc": "ExponentialLR",
+    "lr_scheduler_disc_params": {
+        "gamma": 0.999875,
+        "last_epoch": -1
+    },
+    "kl_loss_alpha": 1.0,
+    "disc_loss_alpha": 1.0,
+    "gen_loss_alpha": 1.0,
+    "feat_loss_alpha": 1.0,
+    "mel_loss_alpha": 45.0,
+    "dur_loss_alpha": 1.0,
+    "speaker_encoder_loss_alpha": 1.0,
+    "return_wav": true,
+    "use_weighted_sampler": false,
+    "weighted_sampler_attrs": {},
+    "weighted_sampler_multipliers": {},
+    "r": 1,
+    "num_speakers": 0,
+    "use_speaker_embedding": false,
+    "speakers_file": null,
+    "speaker_embedding_channels": 256,
+    "language_ids_file": null,
+    "use_language_embedding": false,
+    "use_d_vector_file": false,
+    "d_vector_file": null,
+    "d_vector_dim": 0,
+    "github_branch": "* dev"
+}
\ No newline at end of file
diff --git a/models/bho_male/.gitattributes b/models/bho_male/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/bho_male/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/bho_male/README.md b/models/bho_male/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/bho_male/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/bho_male/checkpoint_200000.pth b/models/bho_male/checkpoint_200000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ab1bfe154a650d7a6a35e4027b3c101ac671ac5b
--- /dev/null
+++ b/models/bho_male/checkpoint_200000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4fb6ce54092c79ab526d4e9bc70514d7ea7f820b0184ef99e6ad3a7b9b72abc
+size 997766981
diff --git a/models/bho_male/config.json b/models/bho_male/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..318970a1594beec5431e955b7b2f8c3ef16e4411
--- /dev/null
+++ b/models/bho_male/config.json
@@ -0,0 +1,257 @@
+{
+    "output_path": ".",
+    "logger_uri": null,
+    "run_name": "vits_Bhojpuri_Male_30hrs",
+    "project_name": null,
+    "run_description": "\ud83d\udc38Coqui trainer run.",
+    "print_step": 25,
+    "plot_step": 100,
+    "model_param_stats": false,
+    "wandb_entity": null,
+    "dashboard_logger": "tensorboard",
+    "log_model_step": null,
+    "save_step": 20000,
+    "save_n_checkpoints": 1000,
+    "save_checkpoints": true,
+    "save_all_best": false,
+    "save_best_after": 10000,
+    "target_loss": null,
+    "print_eval": true,
+    "test_delay_epochs": -1,
+    "run_eval": true,
+    "run_eval_steps": null,
+    "distributed_backend": "nccl",
+    "distributed_url": "tcp://localhost:54321",
+    "mixed_precision": true,
+    "epochs": 500,
+    "batch_size": 40,
+    "eval_batch_size": 16,
+    "grad_clip": [
+        1000,
+        1000
+    ],
+    "scheduler_after_epoch": true,
+    "lr": 0.001,
+    "optimizer": "AdamW",
+    "optimizer_params": {
+        "betas": [
+            0.8,
+            0.99
+        ],
+        "eps": 1e-09,
+        "weight_decay": 0.01
+    },
+    "lr_scheduler": null,
+    "lr_scheduler_params": {},
+    "use_grad_scaler": false,
+    "cudnn_enable": true,
+    "cudnn_deterministic": false,
+    "cudnn_benchmark": false,
+    "training_seed": 54321,
+    "model": "vits",
+    "num_loader_workers": 8,
+    "num_eval_loader_workers": 4,
+    "use_noise_augment": false,
+    "audio": {
+        "fft_size": 1024,
+        "sample_rate": 22050,
+        "win_length": 1024,
+        "hop_length": 256,
+        "num_mels": 80,
+        "mel_fmin": 0,
+        "mel_fmax": null
+    },
+    "use_phonemes": false,
+    "phonemizer": null,
+    "phoneme_language": "en-us",
+    "compute_input_seq_cache": true,
+    "text_cleaner": "multilingual_cleaners",
+    "enable_eos_bos_chars": false,
+    "test_sentences_file": "",
+    "phoneme_cache_path": "./phoneme_cache",
+    "characters": {
+        "characters_class": "TTS.tts.models.vits.VitsCharacters",
+        "vocab_dict": null,
+        "pad": "<PAD>",
+        "eos": "<EOS>",
+        "bos": "<BOS>",
+        "blank": "<BLNK>",
+        "characters": "\u091a.\u0947\u0910\u0925\u092e\u0959\u091d\u0906\u0949?\u092d \u092a\u0939\u0928\u093d\u091f\u0938\u0935\u0940\u091b\u0923\u0921\u091e\u0926\u094b\u0915\u0924\u0948\u0943\u095b\u0941\u095e\u092c\u0908\u0946\u094c\u0927\u090b\u093e\u0922\u0907\u093c\u0902\u0905\u0937\u0920\u095c\u0913\u092f,\u093f\u0930\u0901\u0914\u092b\u0909\u0916\u0911\u094d\u0932\u091c\u090f\u090a\u0917\u0936\u095d\u0919\u0918\u0942",
+        "punctuations": "!\u00a1'(),-.:;\u00bf? ",
+        "phonemes": null,
+        "is_unique": true,
+        "is_sorted": true
+    },
+    "add_blank": true,
+    "batch_group_size": 5,
+    "loss_masking": null,
+    "min_audio_len": 1,
+    "max_audio_len": Infinity,
+    "min_text_len": 1,
+    "max_text_len": Infinity,
+    "compute_f0": false,
+    "compute_energy": false,
+    "compute_linear_spec": true,
+    "precompute_num_workers": 0,
+    "start_by_longest": false,
+    "shuffle": false,
+    "drop_last": false,
+    "datasets": [
+        {
+            "formatter": "syspin",
+            "dataset_name": "",
+            "path": ".",
+            "meta_file_train": "../manifests/Bhojpuri_Male/30hrs.tsv",
+            "ignored_speakers": null,
+            "language": "",
+            "phonemizer": "",
+            "meta_file_val": "",
+            "meta_file_attn_mask": ""
+        }
+    ],
+    "test_sentences": [
+        [
+            "\u090f\u0928\u094d\u091f\u094d\u0930\u093e\u092a\u0940 \u0915\u0902\u092a\u094d\u092f\u0942\u091f\u093f\u0902\u0917 \u092e\u0947\u0902 \u090f\u0928\u094d\u091f\u094d\u0930\u094b\u092a\u0940 \u090a \u0911\u092a\u0930\u0947\u091f\u093f\u0902\u0917 \u0938\u093f\u0938\u094d\u091f\u092e \u0939 \u091c\u0947 \u092a\u0947 \u0938\u0930\u093e \u0915\u094d\u0930\u093f\u092a\u094d\u091f\u094b\u0917\u094d\u0930\u093e\u092b\u093f\u0915 \u092b\u0902\u0915\u094d\u0936\u0928 \u0938\u092c \u0915\u093e\u092e \u0915\u0930\u0947 \u0932\u0947\u0902",
+            "Bhojpuri_Male",
+            null,
+            "bh"
+        ]
+    ],
+    "eval_split_max_size": null,
+    "eval_split_size": 0.01,
+    "use_speaker_weighted_sampler": false,
+    "speaker_weighted_sampler_alpha": 1.0,
+    "use_language_weighted_sampler": false,
+    "language_weighted_sampler_alpha": 1.0,
+    "use_length_weighted_sampler": false,
+    "length_weighted_sampler_alpha": 1.0,
+    "model_args": {
+        "num_chars": 86,
+        "out_channels": 513,
+        "spec_segment_size": 32,
+        "hidden_channels": 192,
+        "hidden_channels_ffn_text_encoder": 768,
+        "num_heads_text_encoder": 2,
+        "num_layers_text_encoder": 6,
+        "kernel_size_text_encoder": 3,
+        "dropout_p_text_encoder": 0.1,
+        "dropout_p_duration_predictor": 0.5,
+        "kernel_size_posterior_encoder": 5,
+        "dilation_rate_posterior_encoder": 1,
+        "num_layers_posterior_encoder": 16,
+        "kernel_size_flow": 5,
+        "dilation_rate_flow": 1,
+        "num_layers_flow": 4,
+        "resblock_type_decoder": "1",
+        "resblock_kernel_sizes_decoder": [
+            3,
+            7,
+            11
+        ],
+        "resblock_dilation_sizes_decoder": [
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ]
+        ],
+        "upsample_rates_decoder": [
+            8,
+            8,
+            2,
+            2
+        ],
+        "upsample_initial_channel_decoder": 512,
+        "upsample_kernel_sizes_decoder": [
+            16,
+            16,
+            4,
+            4
+        ],
+        "periods_multi_period_discriminator": [
+            2,
+            3,
+            5,
+            7,
+            11
+        ],
+        "use_sdp": true,
+        "noise_scale": 1.0,
+        "inference_noise_scale": 0.667,
+        "length_scale": 1,
+        "noise_scale_dp": 1.0,
+        "inference_noise_scale_dp": 1.0,
+        "max_inference_len": null,
+        "init_discriminator": true,
+        "use_spectral_norm_disriminator": false,
+        "use_speaker_embedding": false,
+        "num_speakers": 0,
+        "speakers_file": null,
+        "d_vector_file": null,
+        "speaker_embedding_channels": 256,
+        "use_d_vector_file": false,
+        "d_vector_dim": 0,
+        "detach_dp_input": true,
+        "use_language_embedding": false,
+        "embedded_language_dim": 4,
+        "num_languages": 0,
+        "language_ids_file": null,
+        "use_speaker_encoder_as_loss": false,
+        "speaker_encoder_config_path": "",
+        "speaker_encoder_model_path": "",
+        "condition_dp_on_speaker": true,
+        "freeze_encoder": false,
+        "freeze_DP": false,
+        "freeze_PE": false,
+        "freeze_flow_decoder": false,
+        "freeze_waveform_decoder": false,
+        "encoder_sample_rate": null,
+        "interpolate_z": true,
+        "reinit_DP": false,
+        "reinit_text_encoder": false
+    },
+    "lr_gen": 0.0002,
+    "lr_disc": 0.0002,
+    "lr_scheduler_gen": "ExponentialLR",
+    "lr_scheduler_gen_params": {
+        "gamma": 0.999875,
+        "last_epoch": -1
+    },
+    "lr_scheduler_disc": "ExponentialLR",
+    "lr_scheduler_disc_params": {
+        "gamma": 0.999875,
+        "last_epoch": -1
+    },
+    "kl_loss_alpha": 1.0,
+    "disc_loss_alpha": 1.0,
+    "gen_loss_alpha": 1.0,
+    "feat_loss_alpha": 1.0,
+    "mel_loss_alpha": 45.0,
+    "dur_loss_alpha": 1.0,
+    "speaker_encoder_loss_alpha": 1.0,
+    "return_wav": true,
+    "use_weighted_sampler": false,
+    "weighted_sampler_attrs": {},
+    "weighted_sampler_multipliers": {},
+    "r": 1,
+    "num_speakers": 0,
+    "use_speaker_embedding": false,
+    "speakers_file": null,
+    "speaker_embedding_channels": 256,
+    "language_ids_file": null,
+    "use_language_embedding": false,
+    "use_d_vector_file": false,
+    "d_vector_file": null,
+    "d_vector_dim": 0,
+    "github_branch": "* dev"
+}
\ No newline at end of file
diff --git a/models/bn_female/bn_female_vits_30hrs.pt b/models/bn_female/bn_female_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e88dbfb9975ccad5fcc43e825b39a718ac303f3
--- /dev/null
+++ b/models/bn_female/bn_female_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53208e056050bb485df9192a0d444d3fa72eefe15b2c04840e9a500e4ac1bbf4
+size 333255366
diff --git a/models/bn_female/chars.txt b/models/bn_female/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e997bcd0f621c7a99b764a001b65bddac06c94b
--- /dev/null
+++ b/models/bn_female/chars.txt
@@ -0,0 +1 @@
+ূঞংঘঔদলৌআডখরথটোৗঙঐানষঝবছঅঢ়ঁপউধঢশগয়।?িক,যঈস্ত়ফঋৈজ'ীঠৰণওৎঃমচঊড়ইুভে এ"ৃহ
diff --git a/models/bn_female/jit_infer.py b/models/bn_female/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..74f3059f55458a7f3f6d83af99a04db85da1bd7e
--- /dev/null
+++ b/models/bn_female/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="bn_female_vits_30hrs.pt"
+text = " হলেও আমাদের সবার সার্বিক শৃঙ্খলা বোধের উন্নতি হবে"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/bn_male/bn_male_vits_30hrs.pt b/models/bn_male/bn_male_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e8b838040e97148124533118ef60e8122a5caf8a
--- /dev/null
+++ b/models/bn_male/bn_male_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d8d52f0bc33ef01d733eef36fb00f1e17192b8c86123a0ccf84a24dbb80d0e
+size 333249868
diff --git a/models/bn_male/chars.txt b/models/bn_male/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3f82b65168b3f2f15030f9f41ba1f27d449394f
--- /dev/null
+++ b/models/bn_male/chars.txt
@@ -0,0 +1 @@
+ূঞংঘঔদলৌআডখরঃটোৗঙঐনাঝষবঅছঢ়ঁপউধঢশগয়।?িক,যঈসত্ৈফ়ঊজ'ীঠৎণওঋৰমচড়ভুইে থএ"ৃহ
diff --git a/models/bn_male/extra.py b/models/bn_male/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/bn_male/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/bn_male/jit_infer.py b/models/bn_male/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..839f09b0845a9b6fea0501dce553719435c46e10
--- /dev/null
+++ b/models/bn_male/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="bn_male_vits_30hrs.pt"
+text = " হলেও আমাদের সবার সার্বিক শৃঙ্খলা বোধের উন্নতি হবে"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/en_female/.gitattributes b/models/en_female/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/en_female/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/en_female/README.md b/models/en_female/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/en_female/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/en_female/chars.txt b/models/en_female/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f31392545923a9ab8ea07cd41796cbe5dcc0089
--- /dev/null
+++ b/models/en_female/chars.txt
@@ -0,0 +1 @@
+pqw'"sgufmxre?d!lcab,zk.iytoh jvn
diff --git a/models/en_female/en_female_vits_30hrs.pt b/models/en_female/en_female_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e4ab91056595dbac6ae6581c729dcb5914462e20
--- /dev/null
+++ b/models/en_female/en_female_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dfa80f08da6ca7222a16cb6d919251fb733d3f03042848a20201fa6ae0d0b9c
+size 333229574
diff --git a/models/en_female/extra.py b/models/en_female/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/en_female/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/en_female/jit_infer.py b/models/en_female/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4100e1602fec0c68cdaf80a3b84547c68f3527fb
--- /dev/null
+++ b/models/en_female/jit_infer.py
@@ -0,0 +1,33 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="en_female_vits_30hrs.pt"
+# text = " হলেও আমাদের সবার সার্বিক শৃঙ্খলা বোধের উন্নতি হবে"
+text = "My name is g p t, chat g p t"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/en_male/.gitattributes b/models/en_male/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/en_male/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/en_male/README.md b/models/en_male/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/en_male/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/en_male/chars.txt b/models/en_male/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f31392545923a9ab8ea07cd41796cbe5dcc0089
--- /dev/null
+++ b/models/en_male/chars.txt
@@ -0,0 +1 @@
+pqw'"sgufmxre?d!lcab,zk.iytoh jvn
diff --git a/models/en_male/en_male_vits_30hrs.pt b/models/en_male/en_male_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3696d77ea524c447d817292dbf0af7a06b0ce95
--- /dev/null
+++ b/models/en_male/en_male_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffa1099438a58c8a13e437d39ec304b530644156ef445032e64422d83e558666
+size 333224012
diff --git a/models/en_male/extra.py b/models/en_male/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/en_male/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/en_male/jit_infer.py b/models/en_male/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e056529b46c4be7a060acca1efaf4f06d783c11a
--- /dev/null
+++ b/models/en_male/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="en_male_vits_30hrs.pt"
+text = "This is a text to b spoken"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/gu_mms/config.json b/models/gu_mms/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f0020805fe7b3288a23eb66b7093bffc8542763a
--- /dev/null
+++ b/models/gu_mms/config.json
@@ -0,0 +1,82 @@
+{
+  "activation_dropout": 0.1,
+  "architectures": [
+    "VitsModel"
+  ],
+  "attention_dropout": 0.1,
+  "depth_separable_channels": 2,
+  "depth_separable_num_layers": 3,
+  "duration_predictor_dropout": 0.5,
+  "duration_predictor_filter_channels": 256,
+  "duration_predictor_flow_bins": 10,
+  "duration_predictor_kernel_size": 3,
+  "duration_predictor_num_flows": 4,
+  "duration_predictor_tail_bound": 5.0,
+  "ffn_dim": 768,
+  "ffn_kernel_size": 3,
+  "flow_size": 192,
+  "hidden_act": "relu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 192,
+  "initializer_range": 0.02,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "leaky_relu_slope": 0.1,
+  "model_type": "vits",
+  "noise_scale": 0.667,
+  "noise_scale_duration": 0.8,
+  "num_attention_heads": 2,
+  "num_hidden_layers": 6,
+  "num_speakers": 1,
+  "posterior_encoder_num_wavenet_layers": 16,
+  "prior_encoder_num_flows": 4,
+  "prior_encoder_num_wavenet_layers": 4,
+  "resblock_dilation_sizes": [
+    [
+      1,
+      3,
+      5
+    ],
+    [
+      1,
+      3,
+      5
+    ],
+    [
+      1,
+      3,
+      5
+    ]
+  ],
+  "resblock_kernel_sizes": [
+    3,
+    7,
+    11
+  ],
+  "sampling_rate": 16000,
+  "speaker_embedding_size": 0,
+  "speaking_rate": 1.0,
+  "spectrogram_bins": 513,
+  "torch_dtype": "float32",
+  "transformers_version": "4.33.0.dev0",
+  "upsample_initial_channel": 512,
+  "upsample_kernel_sizes": [
+    16,
+    16,
+    4,
+    4
+  ],
+  "upsample_rates": [
+    8,
+    8,
+    2,
+    2
+  ],
+  "use_bias": true,
+  "use_stochastic_duration_prediction": true,
+  "vocab_size": 60,
+  "wavenet_dilation_rate": 1,
+  "wavenet_dropout": 0.0,
+  "wavenet_kernel_size": 5,
+  "window_size": 4
+}
diff --git a/models/gu_mms/special_tokens_map.json b/models/gu_mms/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d7b57bd9216b39a1535356cfc46d4fe83c31a10d
--- /dev/null
+++ b/models/gu_mms/special_tokens_map.json
@@ -0,0 +1,4 @@
+{
+  "pad_token": "|",
+  "unk_token": "<unk>"
+}
diff --git a/models/gu_mms/tokenizer_config.json b/models/gu_mms/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..534efc7ec43b38fb928d6f8cc672f8b09c11a432
--- /dev/null
+++ b/models/gu_mms/tokenizer_config.json
@@ -0,0 +1,12 @@
+{
+  "add_blank": true,
+  "clean_up_tokenization_spaces": true,
+  "is_uroman": false,
+  "language": "guj",
+  "model_max_length": 1000000000000000019884624838656,
+  "normalize": true,
+  "pad_token": "|",
+  "phonemize": false,
+  "tokenizer_class": "VitsTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/models/gu_mms/vocab.json b/models/gu_mms/vocab.json
new file mode 100644
index 0000000000000000000000000000000000000000..97a03a93bc6bf4fa3caba55ebb651fe32ca2457c
--- /dev/null
+++ b/models/gu_mms/vocab.json
@@ -0,0 +1,62 @@
+{
+  " ": 59,
+  "'": 47,
+  "-": 56,
+  "|": 0,
+  "ં": 10,
+  "ઃ": 54,
+  "અ": 28,
+  "આ": 26,
+  "ઇ": 49,
+  "ઈ": 30,
+  "ઉ": 42,
+  "ઊ": 48,
+  "ઋ": 57,
+  "એ": 29,
+  "ઐ": 58,
+  "ઓ": 27,
+  "ક": 9,
+  "ખ": 33,
+  "ગ": 32,
+  "ઘ": 44,
+  "ચ": 39,
+  "છ": 23,
+  "જ": 18,
+  "ઝ": 51,
+  "ઞ": 50,
+  "ટ": 36,
+  "ઠ": 45,
+  "ડ": 40,
+  "ઢ": 52,
+  "ણ": 22,
+  "ત": 3,
+  "થ": 19,
+  "દ": 25,
+  "ધ": 34,
+  "ન": 4,
+  "પ": 12,
+  "ફ": 43,
+  "બ": 31,
+  "ભ": 35,
+  "મ": 7,
+  "ય": 16,
+  "ર": 5,
+  "લ": 24,
+  "ળ": 37,
+  "વ": 13,
+  "શ": 21,
+  "ષ": 41,
+  "સ": 15,
+  "હ": 17,
+  "ા": 1,
+  "િ": 20,
+  "ી": 8,
+  "ુ": 14,
+  "ૂ": 38,
+  "ૃ": 46,
+  "ે": 2,
+  "ૈ": 53,
+  "ો": 11,
+  "ૌ": 55,
+  "્": 6
+}
diff --git a/models/hi_female/__pycache__/extra.cpython-310.pyc b/models/hi_female/__pycache__/extra.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8446dd4ff3487525f86633e06f9ecbc2d761941b
Binary files /dev/null and b/models/hi_female/__pycache__/extra.cpython-310.pyc differ
diff --git a/models/hi_female/chars.txt b/models/hi_female/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55c592df71c5de3197265c7512b6159ddc22bc9b
--- /dev/null
+++ b/models/hi_female/chars.txt
@@ -0,0 +1 @@
+शदऊतसओषमऱढै?ख़ौक़ड़ःिअनठय़ज़फ़्खँे।ंऋउ'हछङझ" ुणऔघयञृएईॆीपचॉॠवगडटइ,बॅूऐफकजलग़आधोथाभढ़ऑ
diff --git a/models/hi_female/extra.py b/models/hi_female/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/hi_female/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/hi_female/hi_female_vits_30hrs.pt b/models/hi_female/hi_female_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0937c580c6c880e77982b30a971bc9c7fbf3bf8d
--- /dev/null
+++ b/models/hi_female/hi_female_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bcfb47f599b36e7cbfec27142604c366e538c17e89980a40519291f92a46327
+size 333261446
diff --git a/models/hi_female/jit_infer.py b/models/hi_female/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..99fa30c61140d20bc3eaef4ff41fdf8f6e50de34
--- /dev/null
+++ b/models/hi_female/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="hi_female_vits_30hrs.pt"
+text = "फिल्म गर्दिश में अमरीश पुरी के साथ जैकी श्रॉफ, ऐश्वर्या, डिंपल कपाड़िया"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/hi_male/chars.txt b/models/hi_male/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5600167b081855cecb9aaf12b14f4fd6fc3ddee6
--- /dev/null
+++ b/models/hi_male/chars.txt
@@ -0,0 +1 @@
+शदऊतओसषमऱढै?ख़ौक़ड़ःिअनठय़ज़फ़्खँे।ंऋउ'हछङझ" ुणऔयघञृएईॆीपचॉॠवगडटइ,बॅूऐफजकलग़आधोथाभढ़ऑ
diff --git a/models/hi_male/extra.py b/models/hi_male/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/hi_male/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/hi_male/hi_male_vits_30hrs.pt b/models/hi_male/hi_male_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4cf441fbed06adc5e914b34d605ff52b5061ced7
--- /dev/null
+++ b/models/hi_male/hi_male_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb36eca2d90214662f1647e83eb6979ead93b72f269606c6411f52959acf77a8
+size 333256012
diff --git a/models/hi_male/jit_infer.py b/models/hi_male/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..52d2c95b6e2bc4464b1cd3e6698d40d515b100a8
--- /dev/null
+++ b/models/hi_male/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="hi_male_vits_30hrs.pt"
+text = "फिल्म गर्दिश में अमरीश पुरी के साथ जैकी श्रॉफ, ऐश्वर्या, डिंपल कपाड़िया"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/hne_female/.gitattributes b/models/hne_female/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/hne_female/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/hne_female/README.md b/models/hne_female/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/hne_female/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/hne_female/ch_female_vits_30hrs.pt b/models/hne_female/ch_female_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..95af5a39c3696e8d46e320a0539e544a71e4342a
--- /dev/null
+++ b/models/hne_female/ch_female_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3393916262f03807d8338aa8dce79379582c71a0ada346457e36ea6f72a6635
+size 333255366
diff --git a/models/hne_female/chars.txt b/models/hne_female/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e313e83b700327e5ddc2798352b8a6ad642a697c
--- /dev/null
+++ b/models/hne_female/chars.txt
@@ -0,0 +1 @@
+खछगचऊुलशौढ़इणज़झैठढजफ़औ्ड़फूेानटॅयव़ऋदप.थअँऑआघहतषरसभउञडएईऐक़ िओ?धी,ॉंख़कोबमृ
diff --git a/models/hne_female/extra.py b/models/hne_female/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/hne_female/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/hne_female/jit_infer.py b/models/hne_female/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e29eb17b03cc9717602070d53ff9f1b733f788a
--- /dev/null
+++ b/models/hne_female/jit_infer.py
@@ -0,0 +1,31 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+letters="खछगचऊुलशौढ़इणज़झैठढजफ़औ्ड़फूेानटॅयव़ऋदप.थअँऑआघहतषरसभउञडएईऐक़ िओ?धी,ॉंख़कोबमृ"
+model="ch_female_vits_30hrs.pt"
+text = "पेरिविंकल के जड़, उपजी अउ पत्त्ता मन ह बिकट उपयोगी हे"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/hne_male/.gitattributes b/models/hne_male/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/hne_male/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/hne_male/README.md b/models/hne_male/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/hne_male/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/hne_male/ch_male_vits_30hrs.pt b/models/hne_male/ch_male_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c2d2e270616ab1f01c4f7721b7b51915a434e668
--- /dev/null
+++ b/models/hne_male/ch_male_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ef1fb370e1a8bc844aca171316a909461521ce6afa1371d15d5f8c765cda4d9
+size 333250252
diff --git a/models/hne_male/chars.txt b/models/hne_male/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c0bde4eaee0264627222ea932aa833565126fcd
--- /dev/null
+++ b/models/hne_male/chars.txt
@@ -0,0 +1 @@
+खछगचऊुलणशढ़इौज़झठैढजफ़औ्ड़फूेानटॅयव़ऋदप.थअँऑआघहतषरसभउञडएईऐक़ िओ?धी,ॉंख़कोबमृ
diff --git a/models/hne_male/extra.py b/models/hne_male/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/hne_male/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/hne_male/jit_infer.py b/models/hne_male/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..57a4c6d5675abd23d114fffdf5e658b33a934edb
--- /dev/null
+++ b/models/hne_male/jit_infer.py
@@ -0,0 +1,31 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+letters="खछगचऊुलणशढ़इौज़झठैढजफ़औ्ड़फूेानटॅयव़ऋदप.थअँऑआघहतषरसभउञडएईऐक़ िओ?धी,ॉंख़कोबमृ"
+model="ch_male_vits_30hrs.pt"
+text = "पेरिविंकल के जड़, उपजी अउ पत्त्ता मन ह बिकट उपयोगी हे"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/kn_female/chars.txt b/models/kn_female/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0730191ac5f596ce9cc1a9942e7cce798a55a7d
--- /dev/null
+++ b/models/kn_female/chars.txt
@@ -0,0 +1 @@
+ುಹಥದೕಜಈಇೂಕಬಎಭಐಯಘಛೊತ್ಖಗಿೃಾಓಷವಉ?ೋಂಞಔಒೆ,ಊಏಳಠಫೇೈ!ಣಪ.'ಡರಚಮಧಝಅಢಸಶ ನಲಆಟೌ"ೀ
diff --git a/models/kn_female/extra.py b/models/kn_female/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/kn_female/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/kn_female/jit_infer.py b/models/kn_female/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..837cf8312a5c6a97035ed96b08a8a31245ca8c41
--- /dev/null
+++ b/models/kn_female/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="kn_female_vits_30hrs.pt"
+text = "ಬಿಸ್ಫೆನಾಲ್ಎ ಗೆ ಶಿಶುವು ಒಡ್ಡಿಕೊಂಡಾಗ ಅದು, ಲೈಂಗಿಕವಾಗಿ ದ್ವಿರೂಪಿ ಮೆದುಳು ರಚನೆಯ ಮೇಲೆ ಗಾಡ ಪರಿಣಾಮ ಬೀರಬಹುದು ಎಂದು ವರದಿ ಹೇಳುತ್ತದೆ."
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/kn_female/kn_female_vits_30hrs.pt b/models/kn_female/kn_female_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7124249dd0a6fd2dce870de28c8e47d1d04dba89
--- /dev/null
+++ b/models/kn_female/kn_female_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49be422a46afc7714a8ea1cab589d986c3bc61939faa5f1d5d6f9f80a263c51c
+size 333252998
diff --git a/models/kn_male/.gitattributes b/models/kn_male/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/kn_male/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/kn_male/README.md b/models/kn_male/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/kn_male/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/kn_male/chars.txt b/models/kn_male/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9460740387efca17908ddc0c2e7f1167cea6e0cf
--- /dev/null
+++ b/models/kn_male/chars.txt
@@ -0,0 +1 @@
+ುಹಥದೕಜಈಇೂಕಬಭಎಐಯಘಛೊತ್ಗಖಿೃಾಉಷವಓ?ೋಂಞಔ,ೆಒಊಏಳಠಫೇೈ!ಣ.ಪ'ಡರಚಮಧಆಝಅಢಸಶ ನಲಟೌ"ೀ
diff --git a/models/kn_male/extra.py b/models/kn_male/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/kn_male/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/kn_male/jit_infer.py b/models/kn_male/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..776f1962975487e1c0b0287553447c95f2c36411
--- /dev/null
+++ b/models/kn_male/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="kn_male_vits_30hrs.pt"
+text = "ಬಿಸ್ಫೆನಾಲ್ಎ ಗೆ ಶಿಶುವು ಒಡ್ಡಿಕೊಂಡಾಗ ಅದು, ಲೈಂಗಿಕವಾಗಿ ದ್ವಿರೂಪಿ ಮೆದುಳು ರಚನೆಯ ಮೇಲೆ ಗಾಡ ಪರಿಣಾಮ ಬೀರಬಹುದು ಎಂದು ವರದಿ ಹೇಳುತ್ತದೆ."
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/kn_male/kn_male_vits_30hrs.pt b/models/kn_male/kn_male_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..85be314c5b0aabbc53801b829141e0a946554063
--- /dev/null
+++ b/models/kn_male/kn_male_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2a7e16b3509df1be518e0616bcb3ce6eb9c4e59d7fafed075ee57426befdef9
+size 333247564
diff --git a/models/mag_female/.gitattributes b/models/mag_female/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/mag_female/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/mag_female/README.md b/models/mag_female/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/mag_female/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/mag_female/chars.txt b/models/mag_female/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..533c640c612f628885d974a5f4e66ce327c0391b
--- /dev/null
+++ b/models/mag_female/chars.txt
@@ -0,0 +1 @@
+ओग?ढक़फबथुझ़ख़.तङफ़घआरोऊ'खअणढ़ूईाीनौएषदे"यभडछ, ंटहइवउम्ँठधॉपड़ऋ!ऑिऽकैऐऔशजृलञचसज़
diff --git a/models/mag_female/extra.py b/models/mag_female/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/mag_female/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/mag_female/jit_infer.py b/models/mag_female/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d7c1ad531f9be2a448f0befa3fb9f54fd8b1ddc
--- /dev/null
+++ b/models/mag_female/jit_infer.py
@@ -0,0 +1,33 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="mg_female_vits_30hrs.pt"
+# text = " হলেও আমাদের সবার সার্বিক শৃঙ্খলা বোধের উন্নতি হবে"
+text = "भेजना चाहते हैं हिंदी में मैसेज लेकिन नहीं आती टाइपिंग?"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/mag_female/mg_female_vits_30hrs.pt b/models/mag_female/mg_female_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9c539eae13ea21c733462502d3f47c8159d1e55
--- /dev/null
+++ b/models/mag_female/mg_female_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4226ca6b9bc05a27ac4b1c37c53a3f58108bc4c1f1913f9a80d16f805428178
+size 333261318
diff --git a/models/mag_male/.gitattributes b/models/mag_male/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/mag_male/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/mag_male/README.md b/models/mag_male/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/mag_male/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/mag_male/chars.txt b/models/mag_male/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b97c3e31b3241d62d3752f00dba1d841c3696e2
--- /dev/null
+++ b/models/mag_male/chars.txt
@@ -0,0 +1 @@
+ओग?ढख़फबुथझ़क़.तङफ़घआरग़ोऊ'खअढ़णूईाीनौएषदेयडभछ, ंटवहइठमउ्धँॉपड़ऋ!औिऽकैऐऑशजृलञचसज़
diff --git a/models/mag_male/extra.py b/models/mag_male/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/mag_male/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/mag_male/jit_infer.py b/models/mag_male/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..10aea2af3b5cdd72b05e8489ba41239ce40f1b61
--- /dev/null
+++ b/models/mag_male/jit_infer.py
@@ -0,0 +1,33 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="mg_male_vits_30hrs.pt"
+# text = " হলেও আমাদের সবার সার্বিক শৃঙ্খলা বোধের উন্নতি হবে"
+text = "भेजना चाहते हैं हिंदी में मैसेज लेकिन नहीं आती टाइपिंग?"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/mag_male/mg_male_vits_30hrs.pt b/models/mag_male/mg_male_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3aae249c61bd7858595e06333ac92ae7987a0f35
--- /dev/null
+++ b/models/mag_male/mg_male_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e4105200532fde9960eb22756d393aacbbb21b04adb89e4eb681f4ff4f4e7ed
+size 333255692
diff --git a/models/mai_female/.gitattributes b/models/mai_female/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/mai_female/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/mai_female/README.md b/models/mai_female/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/mai_female/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/mai_female/chars.txt b/models/mai_female/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d1e3710e0214de271c1902b987c7de333438aaf
--- /dev/null
+++ b/models/mai_female/chars.txt
@@ -0,0 +1 @@
+गफ़ङोऊूौइँउठऋऐऑलञढग़अखणयछ,फ़वमड़औिशचज़ओ?थतॅढ़ाीनदॊभडटधऽैख़क़बुझ.घआर॑'ईएषे ंह्ॠॉप!कजृस
diff --git a/models/mai_female/extra.py b/models/mai_female/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/mai_female/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/mai_female/jit_infer.py b/models/mai_female/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ee233d792b9751d0ca9ed91b18cac9d3bfa0543
--- /dev/null
+++ b/models/mai_female/jit_infer.py
@@ -0,0 +1,34 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="mt_female_vits_30hrs.pt"
+# text = " হলেও আমাদের সবার সার্বিক শৃঙ্খলা বোধের উন্নতি হবে"
+text = "भेजना चाहते हैं हिंदी में मैसेज लेकिन नहीं आती टाइपिंग?"
+
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/mai_female/mt_female_vits_30hrs.pt b/models/mai_female/mt_female_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a85de66917240f1e6179b07702eafd86dbf376d
--- /dev/null
+++ b/models/mai_female/mt_female_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e8099493e5e8d2267e5dfea922afeef1c8c680c8372480f3bbd15218f60f7d6
+size 333264262
diff --git a/models/mai_male/.gitattributes b/models/mai_male/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/mai_male/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/mai_male/README.md b/models/mai_male/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/mai_male/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/mai_male/chars.txt b/models/mai_male/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..955f58edee34cb606d7818c30ff79d99367f8644
--- /dev/null
+++ b/models/mai_male/chars.txt
@@ -0,0 +1 @@
+गफ़ङोऊूौइँउठऋऑऐलञढग़खअणयछ,फ़वमड़औिशचज़ओ?थतॅढ़ीानदभडटधऽैख़क़बुझ.घआर॑'ईएषे" ंह्ॠॉप!कजृस
diff --git a/models/mai_male/extra.py b/models/mai_male/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/mai_male/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/mai_male/jit_infer.py b/models/mai_male/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e7c6a438d62a2dba4219bcde21ab7da5845e763
--- /dev/null
+++ b/models/mai_male/jit_infer.py
@@ -0,0 +1,33 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="mt_male_vits_30hrs.pt"
+# text = " হলেও আমাদের সবার সার্বিক শৃঙ্খলা বোধের উন্নতি হবে"
+text = "भेजना चाहते हैं हिंदी में मैसेज लेकिन नहीं आती टाइपिंग?"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/mai_male/mt_male_vits_30hrs.pt b/models/mai_male/mt_male_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..817d978ac1368aaac5be6b7b23bcdee07f69fbcc
--- /dev/null
+++ b/models/mai_male/mt_male_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb77b01fea9f06c9ab51cb580509ed513d73954704fb75ef6bb1a71ede919501
+size 333258444
diff --git a/models/mr_female/.gitattributes b/models/mr_female/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/mr_female/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/mr_female/README.md b/models/mr_female/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/mr_female/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/mr_female/chars.txt b/models/mr_female/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a73a53e48ffc4d92d743c8041dfdca5c00aa49cf
--- /dev/null
+++ b/models/mr_female/chars.txt
@@ -0,0 +1 @@
+ऴॄछथई४श?अष खठदेणमघूऐवऎगसत!चफ}हयऔरए६ॲॠडोऊधट३पॊढआ,{ॐीभओॅाु२न"७ौब'ळलझिं°ँॉृय़उङ़ञै्क८.ऋऑऍॆ०ः९५इऱज१
diff --git a/models/mr_female/extra.py b/models/mr_female/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/mr_female/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/mr_female/jit_infer.py b/models/mr_female/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..dff8ceb33c0484f567226ff0d3f185b05d12d75a
--- /dev/null
+++ b/models/mr_female/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="mr_female_vits_30hrs.pt"
+text = "फिल्म गर्दिश में अमरीश पुरी के साथ जैकी श्रॉफ, ऐश्वर्या, डिंपल कपाड़िया"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/mr_female/mr_female_vits_30hrs.pt b/models/mr_female/mr_female_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..951b3484ccbe15851ef292cf74d6d297005be7e1
--- /dev/null
+++ b/models/mr_female/mr_female_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5da210ece20c171ee09f8969d1755fb475a43c2c3166a4a088f36b9aa828dbb7
+size 333273734
diff --git a/models/mr_male/.gitattributes b/models/mr_male/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/mr_male/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/mr_male/README.md b/models/mr_male/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/mr_male/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/mr_male/chars.txt b/models/mr_male/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc298ad1833e2d2d4e74009ae26353f63fca85ac
--- /dev/null
+++ b/models/mr_male/chars.txt
@@ -0,0 +1 @@
+फ़ॄछथईग़o४श?अष॰ खठदेणमघूऐवऒऎगसत!चफहयऔरएॲ६ॠख़डोढ़ऊधट३ड़पॊढक़आ,ॐीभज़ओऽॅऩाु२न"७ौब'ळलझिंँॉृय़उङ़ञै्कऋ.८ऑऍॆ०ः९५इऱज१
diff --git a/models/mr_male/extra.py b/models/mr_male/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/mr_male/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/mr_male/jit_infer.py b/models/mr_male/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..15234585333a4aec39bce3c52b1f76e0d19ccc2a
--- /dev/null
+++ b/models/mr_male/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="mr_male_vits_30hrs.pt"
+text = "फिल्म गर्दिश में अमरीश पुरी के साथ जैकी श्रॉफ, ऐश्वर्या, डिंपल कपाड़िया"
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/mr_male/mr_male_vits_30hrs.pt b/models/mr_male/mr_male_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5417eec77c8e77cf129914adf8cddbfe3da12a3
--- /dev/null
+++ b/models/mr_male/mr_male_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8de04cf53233c6018c4a1d86cad6bbabc61b14fe8601128a0b6a0ca9573d8d64
+size 333274444
diff --git a/models/te_female/.gitattributes b/models/te_female/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/te_female/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/te_female/README.md b/models/te_female/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/te_female/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/te_female/chars.txt b/models/te_female/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b1108776b56ab920bc67d48446eaa91ff89af72
--- /dev/null
+++ b/models/te_female/chars.txt
@@ -0,0 +1 @@
+న,ఇగబవహటంభరఖృౌూయీఉఢెఒపఓఠథదజఐఈణఫఛ'మషధేఔై?శిళఞలఘఆతడఊసఎ్ఏోచకు!"ః ొఝా.అ
diff --git a/models/te_female/extra.py b/models/te_female/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/te_female/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/te_female/jit_infer.py b/models/te_female/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..de184a532ee10dec8798017c719947ce4b08f9e3
--- /dev/null
+++ b/models/te_female/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="te_female_vits_30hrs.pt"
+text = "ఒక ప్రాంత జనాభాలో ఉదాహరణకు ఒక సంవత్సర కాలంలో మరణాల కంటే జననాలు ఎక్కువ ఉంటే జనాభా పెరుగుతుంది."
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/te_female/te_female_vits_30hrs.pt b/models/te_female/te_female_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c1dc677673a9c920819898641b5027a6b9c5d4e
--- /dev/null
+++ b/models/te_female/te_female_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07ed79f7b1bf93d9759e13fbbbe1e333082724f2ee5cb29cdf94f86b45e298b5
+size 333252998
diff --git a/models/te_male/.gitattributes b/models/te_male/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/models/te_male/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/te_male/README.md b/models/te_male/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b187bb7e7d837a367ccd0862441947ad412c77f7
--- /dev/null
+++ b/models/te_male/README.md
@@ -0,0 +1,3 @@
+---
+license: cc-by-4.0
+---
diff --git a/models/te_male/chars.txt b/models/te_male/chars.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fdeed32b85d15e4f4353098430011fddeea19fa1
--- /dev/null
+++ b/models/te_male/chars.txt
@@ -0,0 +1 @@
+న,ఇగబవహటంభరఖృౌూీయఉఢఒెపఓఠథదజఐఈఫణ'ఛఁమషధేఔై?శిళఊలఘఆతడఞసఎ్ఏోచకు"ఙ ొఋఱఝా.అ
diff --git a/models/te_male/extra.py b/models/te_male/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7db561351da270a7c3931bfe0afefa7bc6d4853
--- /dev/null
+++ b/models/te_male/extra.py
@@ -0,0 +1,787 @@
+from typing import Callable, Dict, List, Union
+from dataclasses import asdict, dataclass, field
+
+
+import re
+from dataclasses import replace
+from typing import Dict
+_whitespace_re = re.compile(r"\s+")
+
+from dataclasses import dataclass, field
+from typing import List
+
+# from TTS.tts.configs.shared_configs import BaseTTSConfig
+# from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+@dataclass
+class CharactersConfig():
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig():
+
+    # audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_energy: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    shuffle: bool = False
+    drop_last: bool = False
+    # dataset
+    datasets: str = None
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
+
+
+@dataclass
+class VitsAudioConfig():
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+    
+@dataclass
+class VitsArgs():
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: List[str] = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+@dataclass
+class VitsConfig(BaseTTSConfig):
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = field(default_factory=VitsAudioConfig)
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: List[str] = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        pass
+        # for key, val in self.model_args.items():
+        #     if hasattr(self, key):
+        #         self[key] = val
+
+
+
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ"
+_diacrilics = "ɚ˞ɫ"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        """Return the index of the bos character. If the bos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        """Return the index of the eos character. If the eos character is not specified, return the length of the
+        vocabulary."""
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab, self._char_to_id, self._id_to_char = None, None, None
+        if vocab is not None:
+            self._vocab = vocab
+            self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+            self._id_to_char = {
+                idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+            }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    def to_config(self):
+        return CharactersConfig(
+            vocab_dict=self._vocab,
+            pad=self.pad,
+            eos=self.eos,
+            bos=self.bos,
+            blank=self.blank,
+            is_unique=False,
+            is_sorted=False,
+        )
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def eos_id(self) -> int:
+        return self.char_to_id(self.eos) if self.eos else len(self.vocab)
+
+    @property
+    def bos_id(self) -> int:
+        return self.char_to_id(self.bos) if self.bos else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+ 
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config):
+        _pad = config.characters.pad
+        _punctuations = config.characters.punctuations
+        _letters = config.characters.characters
+        _letters_ipa = config.characters.phonemes
+        return (
+            VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+            config,
+        )
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
+        
+class TTSTokenizer:
+    def __init__(
+        self,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+    ):
+        self.text_cleaner = text_cleaner
+        self.characters = characters
+        self.not_found_characters = []
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        return token_ids
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        text = self.text_cleaner(text)
+        text = self.encode(text)
+        text = self.intersperse_blank_char(text, True)
+        return text
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        text_cleaner = multilingual_cleaners
+        CharactersClass = VitsCharacters
+        characters, new_config = CharactersClass.init_from_config(config)
+        # new_config.characters.characters_class = get_import_path(characters)
+        new_config.characters.characters_class = VitsCharacters
+        return (
+            TTSTokenizer(text_cleaner, characters),new_config)
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+def lowercase(text):
+    return text.lower()
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+def replace_symbols(text, lang="en"):
+
+    text = text.replace(";", ",")
+    text = text.replace("-", " ") if lang != "ca" else text.replace("-", "")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "ca":
+        text = text.replace("&", " i ")
+        text = text.replace("'", "")
+    return text
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
\ No newline at end of file
diff --git a/models/te_male/jit_infer.py b/models/te_male/jit_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4973b2789ddbf4cd1aea808ca107cbc9cf306765
--- /dev/null
+++ b/models/te_male/jit_infer.py
@@ -0,0 +1,32 @@
+import os
+from extra import TTSTokenizer, VitsConfig, CharactersConfig, VitsCharacters
+import torch
+import numpy as np
+
+#ch female
+with open("chars.txt", 'r') as f:
+    letters = f.read().strip('\n')
+model="te_male_vits_30hrs.pt"
+text = "ఒక ప్రాంత జనాభాలో ఉదాహరణకు ఒక సంవత్సర కాలంలో మరణాల కంటే జననాలు ఎక్కువ ఉంటే జనాభా పెరుగుతుంది."
+
+config = VitsConfig(
+    text_cleaner="multilingual_cleaners",
+    characters=CharactersConfig(
+        characters_class=VitsCharacters,
+        pad="<PAD>",
+        eos="<EOS>",
+        bos="<BOS>",
+        blank="<BLNK>",
+        characters=letters,
+        punctuations="!¡'(),-.:;¿? ",
+        phonemes=None)
+    )
+tokenizer, config = TTSTokenizer.init_from_config(config)
+
+x = tokenizer.text_to_ids(text)
+x = torch.from_numpy(np.array(x)).unsqueeze(0)
+net = torch.jit.load(model)
+with torch.no_grad():
+    out2 = net(x)
+import soundfile as sf
+sf.write("jit.wav", out2.squeeze().cpu().numpy(), 22050)
\ No newline at end of file
diff --git a/models/te_male/te_male_vits_30hrs.pt b/models/te_male/te_male_vits_30hrs.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb5dfe73ea9fc27db84496ac3d215e0bfe80b4f8
--- /dev/null
+++ b/models/te_male/te_male_vits_30hrs.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1204c5f1296cd606625cefe977409405e7201e2c87b9c7c50535b2966216cfe0
+size 333249100
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..738f7ae1f65f729cf6cadebaf098ab15347f251a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,27 @@
+# Voice Tech for All - Multi-lingual TTS System
+# Requirements for the TTS system
+
+# Core ML/Audio
+torch
+numpy
+scipy
+soundfile
+librosa  # For pitch shifting and time stretching
+
+# TTS Models
+TTS  # Coqui TTS - required for Bhojpuri .pth models
+huggingface-hub
+transformers  # Required for MMS models (Gujarati)
+
+# API Server
+fastapi
+uvicorn[standard]
+python-multipart  # Required for file uploads
+pydantic
+
+# Utilities
+tqdm
+requests
+
+# Development (optional)
+pytest
diff --git a/runtime.txt b/runtime.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6be87ab790e255b1a8c08b0d654c2bc05df2fdbd
--- /dev/null
+++ b/runtime.txt
@@ -0,0 +1 @@
+3.10.19
\ No newline at end of file
diff --git a/src/.DS_Store b/src/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..c9c570bef6279de9b7c09acec44c33d18aa26306
Binary files /dev/null and b/src/.DS_Store differ
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4003ee0b4c1c5c9817982211ad1c91feee1a3eff
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,2 @@
+# Multi-lingual TTS System
+# Voice Tech for All Hackathon
diff --git a/src/__pycache__/__init__.cpython-310.pyc b/src/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c741dad0c0bbcb1c4c201e8d0cbcbb9c71ded808
Binary files /dev/null and b/src/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/__pycache__/api.cpython-310.pyc b/src/__pycache__/api.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16b156234aca0b274df3cb03bc38b8ed29d62362
Binary files /dev/null and b/src/__pycache__/api.cpython-310.pyc differ
diff --git a/src/__pycache__/cli.cpython-310.pyc b/src/__pycache__/cli.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..739944251a5c1c0faa329c4023d4c803d6cb21b2
Binary files /dev/null and b/src/__pycache__/cli.cpython-310.pyc differ
diff --git a/src/__pycache__/config.cpython-310.pyc b/src/__pycache__/config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c071e0e22629ee3915c885d0fb27d4dad8aa61a0
Binary files /dev/null and b/src/__pycache__/config.cpython-310.pyc differ
diff --git a/src/__pycache__/downloader.cpython-310.pyc b/src/__pycache__/downloader.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc29ab7aeb2a518f9a8dd3d546457d4561bd98f1
Binary files /dev/null and b/src/__pycache__/downloader.cpython-310.pyc differ
diff --git a/src/__pycache__/engine.cpython-310.pyc b/src/__pycache__/engine.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3e9cf87f8143f705952fef7dfd048e8565bc535c
Binary files /dev/null and b/src/__pycache__/engine.cpython-310.pyc differ
diff --git a/src/__pycache__/tokenizer.cpython-310.pyc b/src/__pycache__/tokenizer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..21df1c989bc9de32f821c5550c2b0be923af6583
Binary files /dev/null and b/src/__pycache__/tokenizer.cpython-310.pyc differ
diff --git a/src/api.py b/src/api.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef858b197d4eeb470ace0d58c1153d0f3ce46688
--- /dev/null
+++ b/src/api.py
@@ -0,0 +1,541 @@
+"""
+REST API Server for Multi-lingual TTS
+FastAPI-based server with OpenAPI documentation
+
+Hackathon API Specification:
+- GET /Get_Inference with text, lang, speaker_wav parameters
+"""
+
+import os
+import io
+import time
+import logging
+import tempfile
+from typing import Optional, List
+from pathlib import Path
+import numpy as np
+
+from fastapi import (
+    FastAPI,
+    HTTPException,
+    Query,
+    Response,
+    BackgroundTasks,
+    UploadFile,
+    File,
+)
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse, FileResponse, JSONResponse
+from pydantic import BaseModel, Field
+import soundfile as sf
+
+from .engine import TTSEngine, TTSOutput
+from .config import (
+    LANGUAGE_CONFIGS,
+    get_available_languages,
+    get_available_voices,
+    STYLE_PRESETS,
+)
+
+# Language name to voice key mapping (for hackathon API)
+LANG_TO_VOICE = {
+    "hindi": "hi_female",
+    "bengali": "bn_female",
+    "marathi": "mr_female",
+    "telugu": "te_female",
+    "kannada": "kn_female",
+    "bhojpuri": "bho_female",
+    "chhattisgarhi": "hne_female",
+    "maithili": "mai_female",
+    "magahi": "mag_female",
+    "english": "en_female",
+    "gujarati": "gu_mms",
+}
+
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Initialize FastAPI app
+app = FastAPI(
+    title="Voice Tech for All - Multi-lingual TTS API",
+    description="""
+    A multi-lingual Text-to-Speech API supporting 10+ Indian languages.
+    
+    ## Features
+    - 10 Indian languages with male/female voices
+    - Real-time speech synthesis
+    - Text normalization for Indian languages
+    - Speed control
+    - Multiple audio formats (WAV, MP3)
+    
+    ## Supported Languages
+    Hindi, Bengali, Marathi, Telugu, Kannada, Bhojpuri, 
+    Chhattisgarhi, Maithili, Magahi, English
+    
+    ## Use Case
+    Built for an LLM-based healthcare assistant for pregnant mothers
+    in low-income communities.
+    """,
+    version="1.0.0",
+    contact={
+        "name": "Voice Tech for All Hackathon",
+        "url": "https://huggingface.co/SYSPIN",
+    },
+    license_info={
+        "name": "CC BY 4.0",
+        "url": "https://creativecommons.org/licenses/by/4.0/",
+    },
+)
+
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Initialize TTS Engine (lazy loading)
+_engine: Optional[TTSEngine] = None
+
+
+def get_engine() -> TTSEngine:
+    """Get or create TTS engine instance"""
+    global _engine
+    if _engine is None:
+        _engine = TTSEngine(device="auto")
+    return _engine
+
+
+# Request/Response Models
+class SynthesizeRequest(BaseModel):
+    """Request body for text synthesis"""
+
+    text: str = Field(
+        ..., description="Text to synthesize", min_length=1, max_length=5000
+    )
+    voice: str = Field(
+        "hi_male", description="Voice key (e.g., hi_male, bn_female, gu_mms)"
+    )
+    speed: float = Field(1.0, description="Speech speed (0.5-2.0)", ge=0.5, le=2.0)
+    pitch: float = Field(1.0, description="Pitch multiplier (0.5-2.0)", ge=0.5, le=2.0)
+    energy: float = Field(1.0, description="Energy/volume (0.5-2.0)", ge=0.5, le=2.0)
+    style: Optional[str] = Field(
+        None, description="Style preset (happy, sad, calm, excited, etc.)"
+    )
+    normalize: bool = Field(True, description="Apply text normalization")
+
+    class Config:
+        schema_extra = {
+            "example": {
+                "text": "નમસ્તે, હું તમારી કેવી રીતે મદદ કરી શકું?",
+                "voice": "gu_mms",
+                "speed": 1.0,
+                "pitch": 1.0,
+                "energy": 1.0,
+                "style": "calm",
+                "normalize": True,
+            }
+        }
+
+
+class SynthesizeResponse(BaseModel):
+    """Response metadata for synthesis"""
+
+    success: bool
+    duration: float
+    sample_rate: int
+    voice: str
+    text: str
+    inference_time: float
+
+
+class VoiceInfo(BaseModel):
+    """Information about a voice"""
+
+    key: str
+    name: str
+    language_code: str
+    gender: str
+    loaded: bool
+    downloaded: bool
+    model_type: str = "vits"
+
+
+class HealthResponse(BaseModel):
+    """Health check response"""
+
+    status: str
+    device: str
+    loaded_voices: List[str]
+    available_voices: int
+    style_presets: List[str]
+
+
+# API Endpoints
+@app.get("/", response_class=JSONResponse)
+async def root():
+    """API root - welcome message"""
+    return {
+        "message": "Voice Tech for All - Multi-lingual TTS API",
+        "docs": "/docs",
+        "health": "/health",
+        "synthesize": "/synthesize",
+    }
+
+
+@app.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint"""
+    engine = get_engine()
+    return HealthResponse(
+        status="healthy",
+        device=str(engine.device),
+        loaded_voices=engine.get_loaded_voices(),
+        available_voices=len(LANGUAGE_CONFIGS),
+        style_presets=list(STYLE_PRESETS.keys()),
+    )
+
+
+@app.get("/voices", response_model=List[VoiceInfo])
+async def list_voices():
+    """List all available voices"""
+    engine = get_engine()
+    voices = engine.get_available_voices()
+
+    return [
+        VoiceInfo(
+            key=key,
+            name=info["name"],
+            language_code=info["code"],
+            gender=info["gender"],
+            loaded=info["loaded"],
+            downloaded=info["downloaded"],
+            model_type=info.get("type", "vits"),
+        )
+        for key, info in voices.items()
+    ]
+
+
+@app.get("/styles")
+async def list_styles():
+    """List available style presets for prosody control"""
+    return {
+        "presets": STYLE_PRESETS,
+        "description": {
+            "speed": "Speech rate multiplier (0.5-2.0)",
+            "pitch": "Pitch multiplier (0.5-2.0), >1 = higher",
+            "energy": "Volume/energy multiplier (0.5-2.0)",
+        },
+    }
+
+
+@app.get("/languages")
+async def list_languages():
+    """List supported languages"""
+    return get_available_languages()
+
+
+@app.post("/synthesize", response_class=Response)
+async def synthesize_audio(request: SynthesizeRequest):
+    """
+    Synthesize speech from text
+
+    Returns WAV audio file directly
+    """
+    engine = get_engine()
+
+    # Validate voice
+    if request.voice not in LANGUAGE_CONFIGS:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unknown voice: {request.voice}. Use /voices to see available options.",
+        )
+
+    try:
+        start_time = time.time()
+
+        # Synthesize
+        output = engine.synthesize(
+            text=request.text,
+            voice=request.voice,
+            speed=request.speed,
+            pitch=request.pitch,
+            energy=request.energy,
+            style=request.style,
+            normalize_text=request.normalize,
+        )
+
+        inference_time = time.time() - start_time
+
+        # Convert to WAV bytes
+        buffer = io.BytesIO()
+        sf.write(buffer, output.audio, output.sample_rate, format="WAV")
+        buffer.seek(0)
+
+        # Return audio with metadata headers
+        return Response(
+            content=buffer.read(),
+            media_type="audio/wav",
+            headers={
+                "X-Duration": str(output.duration),
+                "X-Sample-Rate": str(output.sample_rate),
+                "X-Voice": output.voice,
+                "X-Style": output.style or "default",
+                "X-Inference-Time": str(inference_time),
+            },
+        )
+
+    except Exception as e:
+        logger.error(f"Synthesis error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/synthesize/stream")
+async def synthesize_stream(request: SynthesizeRequest):
+    """
+    Synthesize speech and stream the audio
+
+    Returns streaming WAV audio
+    """
+    engine = get_engine()
+
+    if request.voice not in LANGUAGE_CONFIGS:
+        raise HTTPException(status_code=400, detail=f"Unknown voice: {request.voice}")
+
+    try:
+        output = engine.synthesize(
+            text=request.text,
+            voice=request.voice,
+            speed=request.speed,
+            pitch=request.pitch,
+            energy=request.energy,
+            style=request.style,
+            normalize_text=request.normalize,
+        )
+
+        # Create streaming response
+        buffer = io.BytesIO()
+        sf.write(buffer, output.audio, output.sample_rate, format="WAV")
+        buffer.seek(0)
+
+        return StreamingResponse(
+            buffer,
+            media_type="audio/wav",
+            headers={"Content-Disposition": "attachment; filename=speech.wav"},
+        )
+
+    except Exception as e:
+        logger.error(f"Streaming error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/synthesize/get")
+async def synthesize_get(
+    text: str = Query(
+        ..., description="Text to synthesize", min_length=1, max_length=1000
+    ),
+    voice: str = Query("hi_male", description="Voice key"),
+    speed: float = Query(1.0, description="Speech speed", ge=0.5, le=2.0),
+    pitch: float = Query(1.0, description="Pitch", ge=0.5, le=2.0),
+    energy: float = Query(1.0, description="Energy", ge=0.5, le=2.0),
+    style: Optional[str] = Query(None, description="Style preset"),
+):
+    """
+    GET endpoint for simple synthesis
+
+    Useful for testing and simple integrations
+    """
+    request = SynthesizeRequest(
+        text=text, voice=voice, speed=speed, pitch=pitch, energy=energy, style=style
+    )
+    return await synthesize_audio(request)
+
+
+@app.api_route("/Get_Inference", methods=["GET", "POST"])
+async def get_inference(
+    text: str = Query(
+        ...,
+        description="The input text to be converted into speech. For English, text must be lowercase.",
+    ),
+    lang: str = Query(
+        ...,
+        description="Language of input text. Supported: bhojpuri, bengali, english, gujarati, hindi, chhattisgarhi, kannada, magahi, maithili, marathi, telugu",
+    ),
+    speaker_wav: UploadFile = File(
+        ...,
+        description="A reference WAV file representing the speaker's voice (mandatory per hackathon spec).",
+    ),
+):
+    """
+    Hackathon API - Generate speech audio from text
+
+    This endpoint follows the Voice Tech for All hackathon specification.
+
+    Supports both GET and POST methods with multipart form data.
+
+    Parameters:
+    - text: Input text to synthesize (query param)
+    - lang: Language (query param) - bhojpuri, bengali, english, gujarati, hindi, chhattisgarhi, kannada, magahi, maithili, marathi, telugu
+    - speaker_wav: Reference WAV file (multipart file upload, mandatory)
+
+    Returns:
+    - 200 OK: WAV audio file as streaming response
+    """
+    engine = get_engine()
+
+    # Normalize language name
+    lang_lower = lang.lower().strip()
+
+    # Enforce lowercase for English text (per spec)
+    if lang_lower == "english":
+        text = text.lower()
+
+    # Map language to voice
+    if lang_lower not in LANG_TO_VOICE:
+        supported = list(LANG_TO_VOICE.keys())
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unsupported language: {lang}. Supported languages: {', '.join(supported)}",
+        )
+
+    voice = LANG_TO_VOICE[lang_lower]
+
+    # Read speaker_wav (mandatory per spec)
+    # Note: Current VITS models don't support voice cloning, but we accept the file
+    # for API compatibility and validation. In future, this could be used for voice adaptation.
+    try:
+        speaker_audio_bytes = await speaker_wav.read()
+        logger.info(
+            f"Received speaker reference WAV: {len(speaker_audio_bytes)} bytes, filename: {speaker_wav.filename}"
+        )
+        # Validate it's a valid audio file (basic check)
+        if len(speaker_audio_bytes) < 44:  # Minimum WAV header size
+            raise HTTPException(
+                status_code=400,
+                detail="Invalid speaker_wav: file too small to be a valid WAV",
+            )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Could not read speaker_wav: {e}")
+        raise HTTPException(
+            status_code=400, detail=f"Failed to read speaker_wav file: {str(e)}"
+        )
+
+    try:
+        # Synthesize audio
+        output = engine.synthesize(
+            text=text,
+            voice=voice,
+            speed=1.0,
+            normalize_text=True,
+        )
+
+        # Convert to WAV bytes
+        buffer = io.BytesIO()
+        sf.write(buffer, output.audio, output.sample_rate, format="WAV")
+        buffer.seek(0)
+
+        # Return as streaming response (per spec)
+        return StreamingResponse(
+            buffer,
+            media_type="audio/wav",
+            headers={
+                "Content-Disposition": "attachment; filename=output.wav",
+                "X-Duration": str(output.duration),
+                "X-Sample-Rate": str(output.sample_rate),
+                "X-Language": lang,
+                "X-Voice": voice,
+            },
+        )
+
+    except Exception as e:
+        logger.error(f"Synthesis error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/preload")
+async def preload_voice(voice: str):
+    """Preload a voice model into memory"""
+    engine = get_engine()
+
+    if voice not in LANGUAGE_CONFIGS:
+        raise HTTPException(status_code=400, detail=f"Unknown voice: {voice}")
+
+    try:
+        engine.load_voice(voice)
+        return {"message": f"Voice {voice} loaded successfully"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/unload")
+async def unload_voice(voice: str):
+    """Unload a voice model from memory"""
+    engine = get_engine()
+    engine.unload_voice(voice)
+    return {"message": f"Voice {voice} unloaded"}
+
+
+@app.post("/batch")
+async def batch_synthesize(
+    texts: List[str], voice: str = "hi_male", speed: float = 1.0
+):
+    """
+    Synthesize multiple texts
+
+    Returns a list of base64-encoded audio
+    """
+    import base64
+
+    engine = get_engine()
+
+    if voice not in LANGUAGE_CONFIGS:
+        raise HTTPException(status_code=400, detail=f"Unknown voice: {voice}")
+
+    results = []
+    for text in texts:
+        output = engine.synthesize(text, voice, speed)
+
+        buffer = io.BytesIO()
+        sf.write(buffer, output.audio, output.sample_rate, format="WAV")
+        buffer.seek(0)
+
+        results.append(
+            {
+                "text": text,
+                "audio_base64": base64.b64encode(buffer.read()).decode(),
+                "duration": output.duration,
+            }
+        )
+
+    return results
+
+
+# Startup/Shutdown events
+@app.on_event("startup")
+async def startup_event():
+    """Initialize on startup"""
+    logger.info("Starting TTS API server...")
+    # Optionally preload default voice
+    # get_engine().load_voice("hi_male")
+
+
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Cleanup on shutdown"""
+    logger.info("Shutting down TTS API server...")
+
+
+def start_server(host: str = "0.0.0.0", port: int = 8000, reload: bool = False):
+    """Start the API server"""
+    import uvicorn
+
+    uvicorn.run("src.api:app", host=host, port=port, reload=reload, log_level="info")
+
+
+if __name__ == "__main__":
+    start_server()
diff --git a/src/cli.py b/src/cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..de24368addf29293771dc899f1a8ba1c8e0dc8d3
--- /dev/null
+++ b/src/cli.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+"""
+CLI for Voice Tech for All TTS System
+"""
+import argparse
+import sys
+import os
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Voice Tech for All - Multi-lingual TTS System",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Download Hindi models
+  python -m src.cli download --lang hi
+  
+  # Download all models
+  python -m src.cli download --all
+  
+  # Synthesize text
+  python -m src.cli synthesize --text "नमस्ते" --voice hi_male --output hello.wav
+  
+  # Start API server
+  python -m src.cli serve --port 8000
+  
+  # List available voices
+  python -m src.cli list
+        """,
+    )
+
+    subparsers = parser.add_subparsers(dest="command", help="Commands")
+
+    # Download command
+    download_parser = subparsers.add_parser("download", help="Download TTS models")
+    download_parser.add_argument("--voice", type=str, help="Specific voice to download")
+    download_parser.add_argument(
+        "--lang", type=str, help="Download all voices for a language"
+    )
+    download_parser.add_argument(
+        "--all", action="store_true", help="Download all models"
+    )
+    download_parser.add_argument(
+        "--force", action="store_true", help="Force re-download"
+    )
+
+    # Synthesize command
+    synth_parser = subparsers.add_parser("synthesize", help="Synthesize text to speech")
+    synth_parser.add_argument(
+        "--text", "-t", type=str, required=True, help="Text to synthesize"
+    )
+    synth_parser.add_argument(
+        "--voice", "-v", type=str, default="hi_male", help="Voice to use"
+    )
+    synth_parser.add_argument(
+        "--output", "-o", type=str, default="output.wav", help="Output file"
+    )
+    synth_parser.add_argument(
+        "--speed", "-s", type=float, default=1.0, help="Speech speed"
+    )
+
+    # Serve command
+    serve_parser = subparsers.add_parser("serve", help="Start API server")
+    serve_parser.add_argument(
+        "--host", type=str, default="0.0.0.0", help="Host to bind"
+    )
+    serve_parser.add_argument(
+        "--port", "-p", type=int, default=8000, help="Port to bind"
+    )
+    serve_parser.add_argument(
+        "--reload", action="store_true", help="Enable auto-reload"
+    )
+
+    # List command
+    list_parser = subparsers.add_parser("list", help="List available voices")
+
+    args = parser.parse_args()
+
+    if args.command == "download":
+        from src.downloader import ModelDownloader
+
+        downloader = ModelDownloader()
+
+        if args.voice:
+            downloader.download_model(args.voice, force=args.force)
+        elif args.lang:
+            downloader.download_language(args.lang, force=args.force)
+        elif args.all:
+            downloader.download_all_models(force=args.force)
+        else:
+            download_parser.print_help()
+
+    elif args.command == "synthesize":
+        from src.engine import TTSEngine
+
+        engine = TTSEngine()
+
+        print(f"Synthesizing: {args.text}")
+        print(f"Voice: {args.voice}")
+
+        output_path = engine.synthesize_to_file(
+            text=args.text, output_path=args.output, voice=args.voice, speed=args.speed
+        )
+        print(f"Saved to: {output_path}")
+
+    elif args.command == "serve":
+        from src.api import start_server
+
+        print(f"Starting server on {args.host}:{args.port}")
+        start_server(host=args.host, port=args.port, reload=args.reload)
+
+    elif args.command == "list":
+        from src.config import LANGUAGE_CONFIGS
+        from src.downloader import ModelDownloader
+
+        downloader = ModelDownloader()
+
+        print("\n📢 Available TTS Voices:\n")
+        print(f"{'Voice Key':<15} {'Language':<15} {'Gender':<10} {'Downloaded':<12}")
+        print("-" * 55)
+
+        for key, config in LANGUAGE_CONFIGS.items():
+            downloaded = "✓" if downloader.get_model_path(key) else "✗"
+            gender = "Male" if "male" in key else "Female"
+            print(f"{key:<15} {config.name:<15} {gender:<10} {downloaded:<12}")
+
+        print(f"\nTotal: {len(LANGUAGE_CONFIGS)} voices")
+
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..6de328486f419795bc0ff6df2a783c99204c87b4
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,211 @@
+"""
+Configuration for SYSPIN Multi-lingual TTS System
+"""
+
+from dataclasses import dataclass
+from typing import Dict, Optional
+import os
+
+# Base path for models
+MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
+
+
+@dataclass
+class LanguageConfig:
+    """Configuration for each language"""
+
+    name: str
+    code: str
+    hf_model_id: str
+    model_filename: str
+    chars_filename: str = "chars.txt"
+    sample_rate: int = 22050
+
+
+# All SYSPIN models available
+# JIT traced format (.pt + chars.txt): Hindi, Bengali, Marathi, Telugu, Kannada, etc.
+# Coqui TTS checkpoints (.pth + config.json): Bhojpuri
+LANGUAGE_CONFIGS: Dict[str, LanguageConfig] = {
+    # Hindi
+    "hi_male": LanguageConfig(
+        name="Hindi",
+        code="hi",
+        hf_model_id="SYSPIN/tts_vits_coquiai_HindiMale",
+        model_filename="hi_male_vits_30hrs.pt",
+    ),
+    "hi_female": LanguageConfig(
+        name="Hindi",
+        code="hi",
+        hf_model_id="SYSPIN/tts_vits_coquiai_HindiFemale",
+        model_filename="hi_female_vits_30hrs.pt",
+    ),
+    # Bengali
+    "bn_male": LanguageConfig(
+        name="Bengali",
+        code="bn",
+        hf_model_id="SYSPIN/tts_vits_coquiai_BengaliMale",
+        model_filename="bn_male_vits_30hrs.pt",
+    ),
+    "bn_female": LanguageConfig(
+        name="Bengali",
+        code="bn",
+        hf_model_id="SYSPIN/tts_vits_coquiai_BengaliFemale",
+        model_filename="bn_female_vits_30hrs.pt",
+    ),
+    # Marathi
+    "mr_male": LanguageConfig(
+        name="Marathi",
+        code="mr",
+        hf_model_id="SYSPIN/tts_vits_coquiai_MarathiMale",
+        model_filename="mr_male_vits_30hrs.pt",
+    ),
+    "mr_female": LanguageConfig(
+        name="Marathi",
+        code="mr",
+        hf_model_id="SYSPIN/tts_vits_coquiai_MarathiFemale",
+        model_filename="mr_female_vits_30hrs.pt",
+    ),
+    # Telugu
+    "te_male": LanguageConfig(
+        name="Telugu",
+        code="te",
+        hf_model_id="SYSPIN/tts_vits_coquiai_TeluguMale",
+        model_filename="te_male_vits_30hrs.pt",
+    ),
+    "te_female": LanguageConfig(
+        name="Telugu",
+        code="te",
+        hf_model_id="SYSPIN/tts_vits_coquiai_TeluguFemale",
+        model_filename="te_female_vits_30hrs.pt",
+    ),
+    # Kannada
+    "kn_male": LanguageConfig(
+        name="Kannada",
+        code="kn",
+        hf_model_id="SYSPIN/tts_vits_coquiai_KannadaMale",
+        model_filename="kn_male_vits_30hrs.pt",
+    ),
+    "kn_female": LanguageConfig(
+        name="Kannada",
+        code="kn",
+        hf_model_id="SYSPIN/tts_vits_coquiai_KannadaFemale",
+        model_filename="kn_female_vits_30hrs.pt",
+    ),
+    # Bhojpuri (Coqui TTS checkpoint format)
+    "bho_male": LanguageConfig(
+        name="Bhojpuri",
+        code="bho",
+        hf_model_id="SYSPIN/tts_vits_coquiai_BhojpuriMale",
+        model_filename="checkpoint_200000.pth",
+    ),
+    "bho_female": LanguageConfig(
+        name="Bhojpuri",
+        code="bho",
+        hf_model_id="SYSPIN/tts_vits_coquiai_BhojpuriFemale",
+        model_filename="checkpoint_340000.pth",
+    ),
+    # Chhattisgarhi (ISO 639-3: hne)
+    "hne_male": LanguageConfig(
+        name="Chhattisgarhi",
+        code="hne",
+        hf_model_id="SYSPIN/tts_vits_coquiai_ChhattisgarhiMale",
+        model_filename="ch_male_vits_30hrs.pt",
+    ),
+    "hne_female": LanguageConfig(
+        name="Chhattisgarhi",
+        code="hne",
+        hf_model_id="SYSPIN/tts_vits_coquiai_ChhattisgarhiFemale",
+        model_filename="ch_female_vits_30hrs.pt",
+    ),
+    # Maithili (ISO 639-3: mai)
+    "mai_male": LanguageConfig(
+        name="Maithili",
+        code="mai",
+        hf_model_id="SYSPIN/tts_vits_coquiai_MaithiliMale",
+        model_filename="mt_male_vits_30hrs.pt",
+    ),
+    "mai_female": LanguageConfig(
+        name="Maithili",
+        code="mai",
+        hf_model_id="SYSPIN/tts_vits_coquiai_MaithiliFemale",
+        model_filename="mt_female_vits_30hrs.pt",
+    ),
+    # Magahi (ISO 639-3: mag)
+    "mag_male": LanguageConfig(
+        name="Magahi",
+        code="mag",
+        hf_model_id="SYSPIN/tts_vits_coquiai_MagahiMale",
+        model_filename="mg_male_vits_30hrs.pt",
+    ),
+    "mag_female": LanguageConfig(
+        name="Magahi",
+        code="mag",
+        hf_model_id="SYSPIN/tts_vits_coquiai_MagahiFemale",
+        model_filename="mg_female_vits_30hrs.pt",
+    ),
+    # English
+    "en_male": LanguageConfig(
+        name="English",
+        code="en",
+        hf_model_id="SYSPIN/tts_vits_coquiai_EnglishMale",
+        model_filename="en_male_vits_30hrs.pt",
+    ),
+    "en_female": LanguageConfig(
+        name="English",
+        code="en",
+        hf_model_id="SYSPIN/tts_vits_coquiai_EnglishFemale",
+        model_filename="en_female_vits_30hrs.pt",
+    ),
+    # Gujarati - Using Facebook MMS model (1100+ languages)
+    "gu_mms": LanguageConfig(
+        name="Gujarati",
+        code="gu",
+        hf_model_id="facebook/mms-tts-guj",
+        model_filename="mms_guj.pt",
+        sample_rate=16000,  # MMS uses 16kHz
+    ),
+}
+
+
+# Style presets for prosody control
+STYLE_PRESETS = {
+    "default": {"speed": 1.0, "pitch": 1.0, "energy": 1.0},
+    "slow": {"speed": 0.75, "pitch": 1.0, "energy": 1.0},
+    "fast": {"speed": 1.25, "pitch": 1.0, "energy": 1.0},
+    "soft": {"speed": 0.9, "pitch": 0.95, "energy": 0.7},
+    "loud": {"speed": 1.0, "pitch": 1.05, "energy": 1.3},
+    "happy": {"speed": 1.1, "pitch": 1.1, "energy": 1.2},
+    "sad": {"speed": 0.85, "pitch": 0.9, "energy": 0.8},
+    "calm": {"speed": 0.9, "pitch": 0.95, "energy": 0.85},
+    "excited": {"speed": 1.2, "pitch": 1.15, "energy": 1.3},
+}
+
+
+def get_available_languages() -> Dict[str, str]:
+    """Returns mapping of language codes to names"""
+    seen = {}
+    for key, config in LANGUAGE_CONFIGS.items():
+        if config.code not in seen:
+            seen[config.code] = config.name
+    return seen
+
+
+def get_available_voices() -> Dict[str, Dict]:
+    """Returns all available voice configurations"""
+    return {
+        key: {
+            "name": config.name,
+            "code": config.code,
+            "gender": (
+                "male"
+                if "male" in key
+                else ("female" if "female" in key else "neutral")
+            ),
+        }
+        for key, config in LANGUAGE_CONFIGS.items()
+    }
+
+
+def get_style_presets() -> Dict[str, Dict]:
+    """Returns available style presets"""
+    return STYLE_PRESETS
diff --git a/src/downloader.py b/src/downloader.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f268f7201669df0c636d370df9f2e2cc892c475
--- /dev/null
+++ b/src/downloader.py
@@ -0,0 +1,175 @@
+"""
+Model Downloader for SYSPIN TTS Models
+Downloads models from Hugging Face Hub
+"""
+
+import os
+import logging
+from pathlib import Path
+from typing import Optional, List
+from huggingface_hub import hf_hub_download, snapshot_download
+from tqdm import tqdm
+
+from .config import LANGUAGE_CONFIGS, LanguageConfig, MODELS_DIR
+
+logger = logging.getLogger(__name__)
+
+
+class ModelDownloader:
+    """Downloads and manages SYSPIN TTS models from Hugging Face"""
+
+    def __init__(self, models_dir: str = MODELS_DIR):
+        self.models_dir = Path(models_dir)
+        self.models_dir.mkdir(parents=True, exist_ok=True)
+
+    def download_model(self, voice_key: str, force: bool = False) -> Path:
+        """
+        Download a specific voice model
+
+        Args:
+            voice_key: Key from LANGUAGE_CONFIGS (e.g., 'hi_male', 'bn_female')
+            force: Re-download even if exists
+
+        Returns:
+            Path to downloaded model directory
+        """
+        if voice_key not in LANGUAGE_CONFIGS:
+            raise ValueError(
+                f"Unknown voice: {voice_key}. Available: {list(LANGUAGE_CONFIGS.keys())}"
+            )
+
+        config = LANGUAGE_CONFIGS[voice_key]
+        model_dir = self.models_dir / voice_key
+
+        # Check if already downloaded
+        model_path = model_dir / config.model_filename
+        chars_path = model_dir / config.chars_filename
+        extra_path = model_dir / "extra.py"
+
+        if not force and model_path.exists() and chars_path.exists():
+            logger.info(f"Model {voice_key} already downloaded at {model_dir}")
+            return model_dir
+
+        logger.info(f"Downloading {voice_key} from {config.hf_model_id}...")
+
+        # Create model directory
+        model_dir.mkdir(parents=True, exist_ok=True)
+
+        try:
+            # Download all files from the repo
+            snapshot_download(
+                repo_id=config.hf_model_id,
+                local_dir=str(model_dir),
+                local_dir_use_symlinks=False,
+                allow_patterns=["*.pt", "*.pth", "*.txt", "*.py", "*.json"],
+            )
+            logger.info(f"Successfully downloaded {voice_key} to {model_dir}")
+
+        except Exception as e:
+            logger.error(f"Failed to download {voice_key}: {e}")
+            raise
+
+        return model_dir
+
+    def download_all_models(self, force: bool = False) -> List[Path]:
+        """Download all available models"""
+        downloaded = []
+
+        for voice_key in tqdm(LANGUAGE_CONFIGS.keys(), desc="Downloading models"):
+            try:
+                path = self.download_model(voice_key, force=force)
+                downloaded.append(path)
+            except Exception as e:
+                logger.warning(f"Failed to download {voice_key}: {e}")
+
+        return downloaded
+
+    def download_language(self, lang_code: str, force: bool = False) -> List[Path]:
+        """Download all voices for a specific language"""
+        downloaded = []
+
+        for voice_key, config in LANGUAGE_CONFIGS.items():
+            if config.code == lang_code:
+                try:
+                    path = self.download_model(voice_key, force=force)
+                    downloaded.append(path)
+                except Exception as e:
+                    logger.warning(f"Failed to download {voice_key}: {e}")
+
+        return downloaded
+
+    def get_model_path(self, voice_key: str) -> Optional[Path]:
+        """Get path to a downloaded model"""
+        if voice_key not in LANGUAGE_CONFIGS:
+            return None
+
+        config = LANGUAGE_CONFIGS[voice_key]
+        model_path = self.models_dir / voice_key / config.model_filename
+
+        if model_path.exists():
+            return model_path.parent
+        return None
+
+    def list_downloaded_models(self) -> List[str]:
+        """List all downloaded models"""
+        downloaded = []
+
+        for voice_key, config in LANGUAGE_CONFIGS.items():
+            model_path = self.models_dir / voice_key / config.model_filename
+            if model_path.exists():
+                downloaded.append(voice_key)
+
+        return downloaded
+
+    def get_model_size(self, voice_key: str) -> Optional[int]:
+        """Get size of downloaded model in bytes"""
+        model_path = self.get_model_path(voice_key)
+        if not model_path:
+            return None
+
+        total_size = 0
+        for f in model_path.iterdir():
+            if f.is_file():
+                total_size += f.stat().st_size
+
+        return total_size
+
+
+def download_models_cli():
+    """CLI entry point for downloading models"""
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Download SYSPIN TTS models")
+    parser.add_argument(
+        "--voice", type=str, help="Specific voice to download (e.g., hi_male)"
+    )
+    parser.add_argument(
+        "--lang", type=str, help="Download all voices for a language (e.g., hi)"
+    )
+    parser.add_argument("--all", action="store_true", help="Download all models")
+    parser.add_argument("--list", action="store_true", help="List available models")
+    parser.add_argument("--force", action="store_true", help="Force re-download")
+
+    args = parser.parse_args()
+
+    downloader = ModelDownloader()
+
+    if args.list:
+        print("Available voices:")
+        for key, config in LANGUAGE_CONFIGS.items():
+            downloaded = "✓" if downloader.get_model_path(key) else " "
+            print(f"  [{downloaded}] {key}: {config.name} ({config.code})")
+        return
+
+    if args.voice:
+        downloader.download_model(args.voice, force=args.force)
+    elif args.lang:
+        downloader.download_language(args.lang, force=args.force)
+    elif args.all:
+        downloader.download_all_models(force=args.force)
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    download_models_cli()
diff --git a/src/engine.py b/src/engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7c0fb041b3d45f1bd323127629708e6964b9b3a
--- /dev/null
+++ b/src/engine.py
@@ -0,0 +1,603 @@
+"""
+Main TTS Engine for SYSPIN Multi-lingual TTS
+Loads and runs VITS models for inference
+Supports:
+- JIT traced models (.pt) - Hindi, Bengali, Kannada, etc.
+- Coqui TTS checkpoints (.pth) - Bhojpuri, etc.
+- Facebook MMS models - Gujarati
+Includes style/prosody control
+"""
+
+import os
+import logging
+from pathlib import Path
+from typing import Dict, Optional, Union, List, Tuple, Any
+import numpy as np
+import torch
+from dataclasses import dataclass
+
+from .config import LANGUAGE_CONFIGS, LanguageConfig, MODELS_DIR, STYLE_PRESETS
+from .tokenizer import TTSTokenizer, CharactersConfig, TextNormalizer
+from .downloader import ModelDownloader
+
+logger = logging.getLogger(__name__)
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TTSOutput:
+    """Output from TTS synthesis"""
+
+    audio: np.ndarray
+    sample_rate: int
+    duration: float
+    voice: str
+    text: str
+    style: Optional[str] = None
+
+
+class StyleProcessor:
+    """
+    Simple prosody/style control via audio post-processing
+    Supports pitch shifting, speed change, and energy modification
+    """
+
+    @staticmethod
+    def apply_pitch_shift(
+        audio: np.ndarray, sample_rate: int, pitch_factor: float
+    ) -> np.ndarray:
+        """
+        Shift pitch without changing duration using phase vocoder
+        pitch_factor > 1.0 = higher pitch, < 1.0 = lower pitch
+        """
+        if pitch_factor == 1.0:
+            return audio
+
+        try:
+            import librosa
+
+            # Pitch shift in semitones
+            semitones = 12 * np.log2(pitch_factor)
+            shifted = librosa.effects.pitch_shift(
+                audio.astype(np.float32), sr=sample_rate, n_steps=semitones
+            )
+            return shifted
+        except ImportError:
+            # Fallback: simple resampling-based pitch shift (changes duration slightly)
+            from scipy import signal
+
+            # Resample to change pitch, then resample back to original length
+            stretched = signal.resample(audio, int(len(audio) / pitch_factor))
+            return signal.resample(stretched, len(audio))
+
+    @staticmethod
+    def apply_speed_change(
+        audio: np.ndarray, sample_rate: int, speed_factor: float
+    ) -> np.ndarray:
+        """
+        Change speed/tempo without changing pitch
+        speed_factor > 1.0 = faster, < 1.0 = slower
+        """
+        if speed_factor == 1.0:
+            return audio
+
+        try:
+            import librosa
+
+            # Time stretch
+            stretched = librosa.effects.time_stretch(
+                audio.astype(np.float32), rate=speed_factor
+            )
+            return stretched
+        except ImportError:
+            # Fallback: simple resampling (will also change pitch)
+            from scipy import signal
+
+            target_length = int(len(audio) / speed_factor)
+            return signal.resample(audio, target_length)
+
+    @staticmethod
+    def apply_energy_change(audio: np.ndarray, energy_factor: float) -> np.ndarray:
+        """
+        Modify audio energy/volume
+        energy_factor > 1.0 = louder, < 1.0 = softer
+        """
+        if energy_factor == 1.0:
+            return audio
+
+        # Apply gain with soft clipping to avoid distortion
+        modified = audio * energy_factor
+
+        # Soft clip using tanh for natural sound
+        if energy_factor > 1.0:
+            max_val = np.max(np.abs(modified))
+            if max_val > 0.95:
+                modified = np.tanh(modified * 2) * 0.95
+
+        return modified
+
+    @staticmethod
+    def apply_style(
+        audio: np.ndarray,
+        sample_rate: int,
+        speed: float = 1.0,
+        pitch: float = 1.0,
+        energy: float = 1.0,
+    ) -> np.ndarray:
+        """Apply all style modifications"""
+        result = audio
+
+        # Apply in order: pitch -> speed -> energy
+        if pitch != 1.0:
+            result = StyleProcessor.apply_pitch_shift(result, sample_rate, pitch)
+
+        if speed != 1.0:
+            result = StyleProcessor.apply_speed_change(result, sample_rate, speed)
+
+        if energy != 1.0:
+            result = StyleProcessor.apply_energy_change(result, energy)
+
+        return result
+
+    @staticmethod
+    def get_preset(preset_name: str) -> Dict[str, float]:
+        """Get style parameters from preset name"""
+        return STYLE_PRESETS.get(preset_name, STYLE_PRESETS["default"])
+
+
+class TTSEngine:
+    """
+    Multi-lingual TTS Engine using SYSPIN VITS models
+
+    Supports 11 Indian languages with male/female voices:
+    - Hindi, Bengali, Marathi, Telugu, Kannada
+    - Bhojpuri, Chhattisgarhi, Maithili, Magahi, English
+    - Gujarati (via Facebook MMS)
+
+    Features:
+    - Style/prosody control (pitch, speed, energy)
+    - Preset styles (happy, sad, calm, excited, etc.)
+    - JIT traced models (.pt) and Coqui TTS checkpoints (.pth)
+    """
+
+    def __init__(
+        self,
+        models_dir: str = MODELS_DIR,
+        device: str = "auto",
+        preload_voices: Optional[List[str]] = None,
+    ):
+        """
+        Initialize TTS Engine
+
+        Args:
+            models_dir: Directory containing downloaded models
+            device: Device to run inference on ('cpu', 'cuda', 'mps', or 'auto')
+            preload_voices: List of voice keys to preload into memory
+        """
+        self.models_dir = Path(models_dir)
+        self.device = self._get_device(device)
+
+        # Model cache - JIT traced models (.pt)
+        self._models: Dict[str, torch.jit.ScriptModule] = {}
+        self._tokenizers: Dict[str, TTSTokenizer] = {}
+
+        # Coqui TTS models cache (.pth checkpoints)
+        self._coqui_models: Dict[str, Any] = {}  # Stores Synthesizer objects
+
+        # MMS models cache (separate handling)
+        self._mms_models: Dict[str, Any] = {}
+        self._mms_tokenizers: Dict[str, Any] = {}
+
+        # Downloader
+        self.downloader = ModelDownloader(models_dir)
+
+        # Text normalizer
+        self.normalizer = TextNormalizer()
+
+        # Style processor
+        self.style_processor = StyleProcessor()
+
+        # Preload specified voices
+        if preload_voices:
+            for voice in preload_voices:
+                self.load_voice(voice)
+
+        logger.info(f"TTS Engine initialized on device: {self.device}")
+
+    def _get_device(self, device: str) -> torch.device:
+        """Determine the best device for inference"""
+        if device == "auto":
+            if torch.cuda.is_available():
+                return torch.device("cuda")
+            # MPS has compatibility issues with some TorchScript models
+            # Using CPU for now - still fast on Apple Silicon
+            # elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+            #     return torch.device("mps")
+            else:
+                return torch.device("cpu")
+        return torch.device(device)
+
+    def load_voice(self, voice_key: str, download_if_missing: bool = True) -> bool:
+        """
+        Load a voice model into memory
+
+        Args:
+            voice_key: Key from LANGUAGE_CONFIGS (e.g., 'hi_male')
+            download_if_missing: Download model if not found locally
+
+        Returns:
+            True if loaded successfully
+        """
+        # Check if already loaded
+        if voice_key in self._models or voice_key in self._coqui_models:
+            return True
+
+        if voice_key not in LANGUAGE_CONFIGS:
+            raise ValueError(f"Unknown voice: {voice_key}")
+
+        config = LANGUAGE_CONFIGS[voice_key]
+        model_dir = self.models_dir / voice_key
+
+        # Check if model exists, download if needed
+        if not model_dir.exists():
+            if download_if_missing:
+                logger.info(f"Model not found, downloading {voice_key}...")
+                self.downloader.download_model(voice_key)
+            else:
+                raise FileNotFoundError(f"Model directory not found: {model_dir}")
+
+        # Check for Coqui TTS checkpoint (.pth) vs JIT traced model (.pt)
+        pth_files = list(model_dir.glob("*.pth"))
+        pt_files = list(model_dir.glob("*.pt"))
+
+        if pth_files:
+            # Load as Coqui TTS checkpoint
+            return self._load_coqui_voice(voice_key, model_dir, pth_files[0])
+        elif pt_files:
+            # Load as JIT traced model
+            return self._load_jit_voice(voice_key, model_dir, pt_files[0])
+        else:
+            raise FileNotFoundError(f"No .pt or .pth model file found in {model_dir}")
+
+    def _load_jit_voice(
+        self, voice_key: str, model_dir: Path, model_path: Path
+    ) -> bool:
+        """
+        Load a JIT traced VITS model (.pt file)
+        """
+        # Load tokenizer
+        chars_path = model_dir / "chars.txt"
+        if chars_path.exists():
+            tokenizer = TTSTokenizer.from_chars_file(str(chars_path))
+        else:
+            # Try to find chars file
+            chars_files = list(model_dir.glob("*chars*.txt"))
+            if chars_files:
+                tokenizer = TTSTokenizer.from_chars_file(str(chars_files[0]))
+            else:
+                raise FileNotFoundError(f"No chars.txt found in {model_dir}")
+
+        # Load model
+        logger.info(f"Loading JIT model from {model_path}")
+        model = torch.jit.load(str(model_path), map_location=self.device)
+        model.eval()
+
+        # Cache model and tokenizer
+        self._models[voice_key] = model
+        self._tokenizers[voice_key] = tokenizer
+
+        logger.info(f"Loaded JIT voice: {voice_key}")
+        return True
+
+    def _load_coqui_voice(
+        self, voice_key: str, model_dir: Path, checkpoint_path: Path
+    ) -> bool:
+        """
+        Load a Coqui TTS checkpoint model (.pth file)
+        """
+        config_path = model_dir / "config.json"
+        if not config_path.exists():
+            raise FileNotFoundError(f"No config.json found in {model_dir}")
+
+        try:
+            from TTS.utils.synthesizer import Synthesizer
+
+            logger.info(f"Loading Coqui TTS checkpoint from {checkpoint_path}")
+
+            # Create synthesizer with checkpoint and config
+            use_cuda = self.device.type == "cuda"
+            synthesizer = Synthesizer(
+                tts_checkpoint=str(checkpoint_path),
+                tts_config_path=str(config_path),
+                use_cuda=use_cuda,
+            )
+
+            # Cache synthesizer
+            self._coqui_models[voice_key] = synthesizer
+
+            logger.info(f"Loaded Coqui voice: {voice_key}")
+            return True
+
+        except ImportError:
+            raise ImportError(
+                "Coqui TTS library not installed. " "Install it with: pip install TTS"
+            )
+
+    def _synthesize_coqui(self, text: str, voice_key: str) -> Tuple[np.ndarray, int]:
+        """
+        Synthesize using Coqui TTS model (for Bhojpuri etc.)
+        """
+        if voice_key not in self._coqui_models:
+            self.load_voice(voice_key)
+
+        synthesizer = self._coqui_models[voice_key]
+        config = LANGUAGE_CONFIGS[voice_key]
+
+        # Generate audio
+        wav = synthesizer.tts(text)
+
+        # Convert to numpy array
+        audio_np = np.array(wav, dtype=np.float32)
+        sample_rate = synthesizer.output_sample_rate
+
+        return audio_np, sample_rate
+
+    def _load_mms_voice(self, voice_key: str) -> bool:
+        """
+        Load Facebook MMS model for Gujarati
+        """
+        if voice_key in self._mms_models:
+            return True
+
+        config = LANGUAGE_CONFIGS[voice_key]
+        logger.info(f"Loading MMS model: {config.hf_model_id}")
+
+        try:
+            from transformers import VitsModel, AutoTokenizer
+
+            # Load model and tokenizer from HuggingFace
+            model = VitsModel.from_pretrained(config.hf_model_id)
+            tokenizer = AutoTokenizer.from_pretrained(config.hf_model_id)
+
+            model = model.to(self.device)
+            model.eval()
+
+            self._mms_models[voice_key] = model
+            self._mms_tokenizers[voice_key] = tokenizer
+
+            logger.info(f"Loaded MMS voice: {voice_key}")
+            return True
+
+        except Exception as e:
+            logger.error(f"Failed to load MMS model: {e}")
+            raise
+
+    def _synthesize_mms(self, text: str, voice_key: str) -> Tuple[np.ndarray, int]:
+        """
+        Synthesize using Facebook MMS model (for Gujarati)
+        """
+        if voice_key not in self._mms_models:
+            self._load_mms_voice(voice_key)
+
+        model = self._mms_models[voice_key]
+        tokenizer = self._mms_tokenizers[voice_key]
+        config = LANGUAGE_CONFIGS[voice_key]
+
+        # Tokenize
+        inputs = tokenizer(text, return_tensors="pt")
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+
+        # Generate
+        with torch.no_grad():
+            output = model(**inputs)
+
+        # Get audio
+        audio = output.waveform.squeeze().cpu().numpy()
+        return audio, config.sample_rate
+
+    def unload_voice(self, voice_key: str):
+        """Unload a voice to free memory"""
+        if voice_key in self._models:
+            del self._models[voice_key]
+            del self._tokenizers[voice_key]
+        if voice_key in self._coqui_models:
+            del self._coqui_models[voice_key]
+        if voice_key in self._mms_models:
+            del self._mms_models[voice_key]
+            del self._mms_tokenizers[voice_key]
+        torch.cuda.empty_cache() if self.device.type == "cuda" else None
+        logger.info(f"Unloaded voice: {voice_key}")
+
+    def synthesize(
+        self,
+        text: str,
+        voice: str = "hi_male",
+        speed: float = 1.0,
+        pitch: float = 1.0,
+        energy: float = 1.0,
+        style: Optional[str] = None,
+        normalize_text: bool = True,
+    ) -> TTSOutput:
+        """
+        Synthesize speech from text with style control
+
+        Args:
+            text: Input text to synthesize
+            voice: Voice key (e.g., 'hi_male', 'bn_female', 'gu_mms')
+            speed: Speech speed multiplier (0.5-2.0)
+            pitch: Pitch multiplier (0.5-2.0), >1 = higher
+            energy: Energy/volume multiplier (0.5-2.0)
+            style: Style preset name (e.g., 'happy', 'sad', 'calm')
+            normalize_text: Whether to apply text normalization
+
+        Returns:
+            TTSOutput with audio array and metadata
+        """
+        # Apply style preset if specified
+        if style and style in STYLE_PRESETS:
+            preset = STYLE_PRESETS[style]
+            speed = speed * preset["speed"]
+            pitch = pitch * preset["pitch"]
+            energy = energy * preset["energy"]
+
+        config = LANGUAGE_CONFIGS[voice]
+
+        # Normalize text
+        if normalize_text:
+            text = self.normalizer.clean_text(text, config.code)
+
+        # Check if this is an MMS model (Gujarati)
+        if "mms" in voice:
+            audio_np, sample_rate = self._synthesize_mms(text, voice)
+        # Check if this is a Coqui TTS model (Bhojpuri etc.)
+        elif voice in self._coqui_models:
+            audio_np, sample_rate = self._synthesize_coqui(text, voice)
+        else:
+            # Try to load the voice (will determine JIT vs Coqui)
+            if voice not in self._models and voice not in self._coqui_models:
+                self.load_voice(voice)
+
+            # Check again after loading
+            if voice in self._coqui_models:
+                audio_np, sample_rate = self._synthesize_coqui(text, voice)
+            else:
+                # Use JIT model (SYSPIN models)
+                model = self._models[voice]
+                tokenizer = self._tokenizers[voice]
+
+                # Tokenize
+                token_ids = tokenizer.text_to_ids(text)
+                x = torch.from_numpy(np.array(token_ids)).unsqueeze(0).to(self.device)
+
+                # Generate audio
+                with torch.no_grad():
+                    audio = model(x)
+
+                audio_np = audio.squeeze().cpu().numpy()
+                sample_rate = config.sample_rate
+
+        # Apply style modifications (pitch, speed, energy)
+        audio_np = self.style_processor.apply_style(
+            audio_np, sample_rate, speed=speed, pitch=pitch, energy=energy
+        )
+
+        # Calculate duration
+        duration = len(audio_np) / sample_rate
+
+        return TTSOutput(
+            audio=audio_np,
+            sample_rate=sample_rate,
+            duration=duration,
+            voice=voice,
+            text=text,
+            style=style,
+        )
+
+    def synthesize_to_file(
+        self,
+        text: str,
+        output_path: str,
+        voice: str = "hi_male",
+        speed: float = 1.0,
+        pitch: float = 1.0,
+        energy: float = 1.0,
+        style: Optional[str] = None,
+        normalize_text: bool = True,
+    ) -> str:
+        """
+        Synthesize speech and save to file
+
+        Args:
+            text: Input text to synthesize
+            output_path: Path to save audio file
+            voice: Voice key
+            speed: Speech speed multiplier
+            pitch: Pitch multiplier
+            energy: Energy multiplier
+            style: Style preset name
+            normalize_text: Whether to apply text normalization
+
+        Returns:
+            Path to saved file
+        """
+        import soundfile as sf
+
+        output = self.synthesize(
+            text, voice, speed, pitch, energy, style, normalize_text
+        )
+        sf.write(output_path, output.audio, output.sample_rate)
+
+        logger.info(f"Saved audio to {output_path} (duration: {output.duration:.2f}s)")
+        return output_path
+
+    def get_loaded_voices(self) -> List[str]:
+        """Get list of currently loaded voices"""
+        return (
+            list(self._models.keys())
+            + list(self._coqui_models.keys())
+            + list(self._mms_models.keys())
+        )
+
+    def get_available_voices(self) -> Dict[str, Dict]:
+        """Get all available voices with their status"""
+        voices = {}
+        for key, config in LANGUAGE_CONFIGS.items():
+            is_mms = "mms" in key
+            model_dir = self.models_dir / key
+
+            # Determine model type
+            if is_mms:
+                model_type = "mms"
+            elif model_dir.exists() and list(model_dir.glob("*.pth")):
+                model_type = "coqui"
+            else:
+                model_type = "vits"
+
+            voices[key] = {
+                "name": config.name,
+                "code": config.code,
+                "gender": (
+                    "male"
+                    if "male" in key
+                    else ("female" if "female" in key else "neutral")
+                ),
+                "loaded": key in self._models
+                or key in self._coqui_models
+                or key in self._mms_models,
+                "downloaded": is_mms or self.downloader.get_model_path(key) is not None,
+                "type": model_type,
+            }
+        return voices
+
+    def get_style_presets(self) -> Dict[str, Dict]:
+        """Get available style presets"""
+        return STYLE_PRESETS
+
+    def batch_synthesize(
+        self, texts: List[str], voice: str = "hi_male", speed: float = 1.0
+    ) -> List[TTSOutput]:
+        """Synthesize multiple texts"""
+        return [self.synthesize(text, voice, speed) for text in texts]
+
+
+# Convenience function
+def synthesize(
+    text: str, voice: str = "hi_male", output_path: Optional[str] = None
+) -> Union[TTSOutput, str]:
+    """
+    Quick synthesis function
+
+    Args:
+        text: Text to synthesize
+        voice: Voice key
+        output_path: If provided, saves to file and returns path
+
+    Returns:
+        TTSOutput if no output_path, else path to saved file
+    """
+    engine = TTSEngine()
+
+    if output_path:
+        return engine.synthesize_to_file(text, output_path, voice)
+    return engine.synthesize(text, voice)
diff --git a/src/tokenizer.py b/src/tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..2409ca8d686d0c5cb3ea5f7a74995822491a433e
--- /dev/null
+++ b/src/tokenizer.py
@@ -0,0 +1,214 @@
+"""
+TTS Tokenizer for VITS models
+Adapted from Coqui TTS for SYSPIN models
+
+CRITICAL: The vocabulary MUST be built as:
+[<PAD>] + list(punctuations) + list(characters) + [<BLNK>]
+
+Where:
+- punctuations = "!¡'(),-.:;¿? " (standard VITS punctuations)
+- characters = content of chars.txt file
+"""
+
+import re
+from typing import Dict, List, Optional
+from dataclasses import dataclass
+
+
+# Standard VITS punctuations used by SYSPIN models
+VITS_PUNCTUATIONS = "!¡'(),-.:;¿? "
+
+
+@dataclass
+class CharactersConfig:
+    """Character configuration for tokenizer"""
+
+    characters: str = ""
+    punctuations: str = VITS_PUNCTUATIONS
+    pad: str = "<PAD>"
+    eos: str = None
+    bos: str = None
+    blank: str = "<BLNK>"
+    phonemes: Optional[str] = None
+
+
+class TTSTokenizer:
+    """
+    Tokenizer for TTS models - Compatible with SYSPIN VITS models.
+
+    The vocabulary is built EXACTLY as VitsCharacters._create_vocab():
+    vocab = [<PAD>] + list(punctuations) + list(characters) + [<BLNK>]
+
+    For SYSPIN models:
+    - punctuations = "!¡'(),-.:;¿? " (13 chars)
+    - characters = content from chars.txt
+    - Total vocab = 1 + 13 + len(chars.txt) + 1
+    """
+
+    def __init__(
+        self,
+        characters: str,
+        punctuations: str = VITS_PUNCTUATIONS,
+        pad: str = "<PAD>",
+        blank: str = "<BLNK>",
+    ):
+        """
+        Initialize tokenizer.
+
+        Args:
+            characters: The characters string (from chars.txt)
+            punctuations: Punctuation characters (default: VITS standard)
+            pad: Padding token
+            blank: Blank token for CTC
+        """
+        self.characters = characters
+        self.punctuations = punctuations
+        self.pad = pad
+        self.blank = blank
+
+        # Build vocabulary: [PAD] + punctuations + characters + [BLANK]
+        self._build_vocab()
+
+    def _build_vocab(self):
+        """
+        Build vocabulary EXACTLY matching VitsCharacters._create_vocab():
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        """
+        self.vocab: List[str] = []
+        self.char_to_id: Dict[str, int] = {}
+        self.id_to_char: Dict[int, str] = {}
+
+        # Build vocab in exact order
+        # 1. PAD token
+        self.vocab.append(self.pad)
+
+        # 2. Punctuations
+        for char in self.punctuations:
+            self.vocab.append(char)
+
+        # 3. Characters from chars.txt
+        for char in self.characters:
+            self.vocab.append(char)
+
+        # 4. BLANK token
+        self.vocab.append(self.blank)
+
+        # Build mappings
+        for idx, char in enumerate(self.vocab):
+            self.char_to_id[char] = idx
+            self.id_to_char[idx] = char
+
+        self.vocab_size = len(self.vocab)
+        self.pad_id = self.char_to_id[self.pad]
+        self.blank_id = self.char_to_id[self.blank]
+
+    def text_to_ids(self, text: str, add_blank: bool = True) -> List[int]:
+        """
+        Convert text to token IDs with interspersed blanks.
+
+        Matches TTSTokenizer.text_to_ids() from extra.py:
+        1. Clean text with multilingual_cleaners
+        2. Encode to IDs
+        3. Intersperse blank tokens
+        """
+        # Apply multilingual_cleaners
+        text = self._clean_text(text)
+
+        # Encode characters to IDs
+        char_ids = []
+        for char in text:
+            if char in self.char_to_id:
+                char_ids.append(self.char_to_id[char])
+            # Skip unknown characters (matching original behavior)
+
+        # Intersperse blank tokens
+        if add_blank:
+            result = [self.blank_id] * (len(char_ids) * 2 + 1)
+            result[1::2] = char_ids
+            return result
+
+        return char_ids
+
+    def ids_to_text(self, ids: List[int]) -> str:
+        """Convert token IDs back to text"""
+        chars = []
+        for idx in ids:
+            if idx in self.id_to_char:
+                char = self.id_to_char[idx]
+                if char not in [self.pad, self.blank]:
+                    chars.append(char)
+        return "".join(chars)
+
+    def _clean_text(self, text: str) -> str:
+        """
+        Text cleaning matching multilingual_cleaners from extra.py:
+        1. lowercase
+        2. replace_symbols
+        3. remove_aux_symbols
+        4. collapse_whitespace
+        """
+        text = text.lower()
+        text = self._replace_symbols(text)
+        text = self._remove_aux_symbols(text)
+        text = re.sub(r"\s+", " ", text).strip()
+        return text
+
+    def _replace_symbols(self, text: str) -> str:
+        """Replace symbols matching extra.py replace_symbols()"""
+        text = text.replace(";", ",")
+        text = text.replace("-", " ")
+        text = text.replace(":", ",")
+        return text
+
+    def _remove_aux_symbols(self, text: str) -> str:
+        """Remove auxiliary symbols matching extra.py remove_aux_symbols()"""
+        text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+        return text
+
+    @classmethod
+    def from_chars_file(cls, chars_file: str) -> "TTSTokenizer":
+        """
+        Create tokenizer from chars.txt file.
+
+        This matches the jit_infer.py setup:
+        - characters = content of chars.txt
+        - punctuations = "!¡'(),-.:;¿? " (standard VITS punctuations)
+
+        Vocab will be: [<PAD>] + punctuations + characters + [<BLNK>]
+        """
+        with open(chars_file, "r", encoding="utf-8") as f:
+            characters = f.read().strip("\n")
+
+        return cls(
+            characters=characters,
+            punctuations=VITS_PUNCTUATIONS,
+            pad="<PAD>",
+            blank="<BLNK>",
+        )
+
+
+class TextNormalizer:
+    """Text normalizer for Indian languages"""
+
+    @staticmethod
+    def normalize_numbers(text: str, lang: str = "hi") -> str:
+        """Convert numbers to words"""
+        pattern = r"\{(\d+)\}\{([^}]+)\}"
+        text = re.sub(pattern, r"\2", text)
+        return text
+
+    @staticmethod
+    def normalize_punctuation(text: str) -> str:
+        """Normalize punctuation marks"""
+        text = re.sub(r'["""]', '"', text)
+        text = re.sub(r"[''']", "'", text)
+        text = re.sub(r"[–—]", "-", text)
+        return text
+
+    @staticmethod
+    def clean_text(text: str, lang: str = "hi") -> str:
+        """Full text cleaning pipeline"""
+        text = TextNormalizer.normalize_numbers(text, lang)
+        text = TextNormalizer.normalize_punctuation(text)
+        text = re.sub(r"\s+", " ", text).strip()
+        return text
diff --git a/technical_report.md b/technical_report.md
new file mode 100644
index 0000000000000000000000000000000000000000..2a061cb7b3c39bc594e47cabc97468832d036009
--- /dev/null
+++ b/technical_report.md
@@ -0,0 +1,410 @@
+# Voice Tech for All: Technical Report
+
+## Multi-lingual Text-to-Speech System with Style Transfer
+
+**Hackathon**: Voice Tech for All  
+**Date**: December 2025
+
+---
+
+## Executive Summary
+
+We present a **multi-lingual Text-to-Speech (TTS) system** supporting **11 Indian languages** with **style/prosody control** capabilities. The system is designed for deployment as a healthcare assistant for pregnant mothers in low-income communities, making health information accessible in native languages.
+
+### Key Achievements
+
+| Metric                 | Value                                                                                                       |
+| ---------------------- | ----------------------------------------------------------------------------------------------------------- |
+| Languages Supported    | 11 (Hindi, Bengali, Marathi, Telugu, Kannada, Bhojpuri, Chhattisgarhi, Maithili, Magahi, English, Gujarati) |
+| Voice Variants         | 21 (male + female for each language)                                                                        |
+| Style Presets          | 9 (default, slow, fast, soft, loud, happy, sad, calm, excited)                                              |
+| Average Inference Time | ~0.3s (CPU, Apple M2)                                                                                       |
+| Model Size             | ~300MB per voice (VITS), ~145MB (MMS)                                                                       |
+| API Latency            | <500ms for typical sentences                                                                                |
+
+---
+
+## 1. System Architecture
+
+### 1.1 Overview
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    REST API Server (FastAPI)                 │
+├─────────────────────────────────────────────────────────────┤
+│  ┌──────────┐  ┌──────────────┐  ┌─────────────────────────┐│
+│  │/synthesize│  │ /voices     │  │ /styles               ││
+│  │ /stream   │  │ /languages  │  │ /health               ││
+│  └──────────┘  └──────────────┘  └─────────────────────────┘│
+├─────────────────────────────────────────────────────────────┤
+│                      TTS Engine                              │
+│  ┌─────────────────┐  ┌─────────────────┐  ┌──────────────┐ │
+│  │ Text Normalizer │→ │ Tokenizer       │→ │ VITS/MMS    │ │
+│  │ (Indian scripts)│  │ (char-to-ID)    │  │ Inference   │ │
+│  └─────────────────┘  └─────────────────┘  └──────────────┘ │
+│                              ↓                               │
+│  ┌─────────────────────────────────────────────────────────┐│
+│  │              Style Processor (Prosody Control)          ││
+│  │  • Pitch Shifting (librosa)                             ││
+│  │  • Time Stretching (speed control)                      ││
+│  │  • Energy/Volume Modification                           ││
+│  └─────────────────────────────────────────────────────────┘│
+├─────────────────────────────────────────────────────────────┤
+│                    Model Repository                          │
+│  ┌────────────────────┐  ┌────────────────────────────────┐ │
+│  │ SYSPIN VITS Models │  │ Facebook MMS Models            │ │
+│  │ (10 languages)     │  │ (Gujarati)                     │ │
+│  └────────────────────┘  └────────────────────────────────┘ │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 1.2 Component Details
+
+#### Text Normalizer
+
+- Handles Indian script peculiarities
+- Converts number notations: `{100}{एकसो}` → `एकसो`
+- Normalizes punctuation across scripts
+- Handles code-switching (Hindi in English text)
+
+#### VITS Models (SYSPIN)
+
+- **Architecture**: Conditional Variational Autoencoder with Adversarial Learning
+- **Training Data**: 20-30 hours per speaker from IISc Bangalore
+- **Output**: 22050 Hz, 16-bit PCM
+- **Languages**: Hindi, Bengali, Marathi, Telugu, Kannada, Bhojpuri, Chhattisgarhi, Maithili, Magahi, English
+
+#### MMS Model (Facebook)
+
+- **Architecture**: VITS-based, trained on MMS corpus
+- **Output**: 16000 Hz
+- **Languages**: Gujarati (and 1100+ others available)
+- **Model Size**: 145MB
+
+#### Style Processor
+
+- **Pitch Shifting**: Using librosa phase vocoder
+- **Time Stretching**: WSOLA algorithm via librosa
+- **Energy Control**: Soft clipping with tanh for natural sound
+
+---
+
+## 2. API Specification
+
+### 2.1 Endpoints
+
+| Endpoint             | Method | Description                      |
+| -------------------- | ------ | -------------------------------- |
+| `/`                  | GET    | API info and documentation links |
+| `/health`            | GET    | System health and loaded models  |
+| `/voices`            | GET    | List all available voices        |
+| `/languages`         | GET    | List supported languages         |
+| `/styles`            | GET    | List style presets               |
+| `/synthesize`        | POST   | Generate speech from text        |
+| `/synthesize/get`    | GET    | Simple synthesis (for testing)   |
+| `/synthesize/stream` | POST   | Streaming audio response         |
+| `/preload`           | POST   | Preload voice into memory        |
+| `/batch`             | POST   | Batch synthesis                  |
+
+### 2.2 Synthesis Request
+
+```json
+{
+	"text": "નમસ્તે, હું તમારી કેવી રીતે મદદ કરી શકું?",
+	"voice": "gu_mms",
+	"speed": 1.0,
+	"pitch": 1.0,
+	"energy": 1.0,
+	"style": "calm",
+	"normalize": true
+}
+```
+
+### 2.3 Style Presets
+
+| Preset  | Speed | Pitch | Energy | Use Case               |
+| ------- | ----- | ----- | ------ | ---------------------- |
+| default | 1.0   | 1.0   | 1.0    | Normal speech          |
+| slow    | 0.75  | 1.0   | 1.0    | Elderly users, clarity |
+| fast    | 1.25  | 1.0   | 1.0    | Quick information      |
+| soft    | 0.9   | 0.95  | 0.7    | Calming content        |
+| loud    | 1.0   | 1.05  | 1.3    | Alerts, emphasis       |
+| happy   | 1.1   | 1.1   | 1.2    | Positive messages      |
+| sad     | 0.85  | 0.9   | 0.8    | Empathetic responses   |
+| calm    | 0.9   | 0.95  | 0.85   | Healthcare guidance    |
+| excited | 1.2   | 1.15  | 1.3    | Celebrations           |
+
+---
+
+## 3. Supported Languages
+
+| Language      | Code | Voices       | Model Type   | Sample Rate |
+| ------------- | ---- | ------------ | ------------ | ----------- |
+| Hindi         | hi   | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| Bengali       | bn   | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| Marathi       | mr   | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| Telugu        | te   | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| Kannada       | kn   | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| Bhojpuri      | bho  | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| Chhattisgarhi | hne  | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| Maithili      | mai  | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| Magahi        | mag  | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| English       | en   | Male, Female | SYSPIN VITS  | 22050 Hz    |
+| Gujarati      | gu   | Neutral      | Facebook MMS | 16000 Hz    |
+
+---
+
+## 4. Implementation Details
+
+### 4.1 Technology Stack
+
+| Component         | Technology                               |
+| ----------------- | ---------------------------------------- |
+| Backend Framework | FastAPI                                  |
+| ML Framework      | PyTorch                                  |
+| TTS Models        | VITS (Coqui AI / SYSPIN), MMS (Facebook) |
+| Audio Processing  | librosa, soundfile, scipy                |
+| Model Hub         | Hugging Face Hub                         |
+| API Documentation | OpenAPI/Swagger                          |
+
+### 4.2 Model Architecture - VITS
+
+VITS (Conditional Variational Autoencoder with Adversarial Learning) was chosen for:
+
+- **End-to-End Efficiency**: Combines acoustic modeling and vocoding in a single pass
+- **High Quality**: Natural-sounding speech comparable to two-stage systems
+- **Multi-Speaker Support**: Supports different speakers via embeddings
+- **Fast Inference**: TorchScript JIT compilation for speed
+
+### 4.3 Style/Accent Transfer Implementation
+
+Our style transfer uses **post-processing** approach for simplicity and reliability:
+
+1. **Pitch Shifting**: Phase vocoder via librosa
+
+   ```python
+   semitones = 12 * np.log2(pitch_factor)
+   shifted = librosa.effects.pitch_shift(audio, sr=sr, n_steps=semitones)
+   ```
+
+2. **Time Stretching**: WSOLA algorithm
+
+   ```python
+   stretched = librosa.effects.time_stretch(audio, rate=speed_factor)
+   ```
+
+3. **Energy Control**: Soft clipping for natural sound
+   ```python
+   modified = audio * energy_factor
+   if energy_factor > 1.0:
+       modified = np.tanh(modified * 2) * 0.95  # Soft clip
+   ```
+
+### 4.4 Key Design Decisions
+
+1. **TorchScript Models**: JIT-compiled for faster inference
+2. **Lazy Loading**: Models loaded on-demand to minimize memory
+3. **CPU Fallback**: Apple Silicon MPS compatibility issues handled
+4. **Streaming Support**: Progressive audio delivery for real-time apps
+
+---
+
+## 5. Usage Examples
+
+### 5.1 Python API
+
+```python
+from src.engine import TTSEngine
+
+# Initialize engine
+engine = TTSEngine(device="auto")
+
+# Basic synthesis
+output = engine.synthesize(
+    text="गर्भावस्था में स्वस्थ आहार बहुत महत्वपूर्ण है",
+    voice="hi_female"
+)
+
+# With style control
+output = engine.synthesize(
+    text="आपका दिन शुभ हो",
+    voice="hi_male",
+    style="happy",
+    pitch=1.1
+)
+
+# Gujarati
+output = engine.synthesize(
+    text="સ્વસ્થ રહો, ખુશ રહો",
+    voice="gu_mms",
+    style="calm"
+)
+```
+
+### 5.2 REST API
+
+```bash
+# Basic synthesis
+curl -X POST "http://localhost:8000/synthesize" \
+  -H "Content-Type: application/json" \
+  -d '{"text": "नमस्ते", "voice": "hi_male"}' \
+  --output speech.wav
+
+# With style
+curl -X POST "http://localhost:8000/synthesize" \
+  -H "Content-Type: application/json" \
+  -d '{"text": "आपका स्वागत है", "voice": "hi_female", "style": "happy"}' \
+  --output welcome.wav
+
+# Gujarati
+curl -X POST "http://localhost:8000/synthesize" \
+  -H "Content-Type: application/json" \
+  -d '{"text": "નમસ્તે", "voice": "gu_mms"}' \
+  --output gujarati.wav
+```
+
+### 5.3 Command Line
+
+```bash
+# Download models
+python -m src.cli download --voice hi_male
+python -m src.cli download --lang hi  # All Hindi voices
+
+# Synthesize
+python -m src.cli synthesize --text "नमस्ते" --voice hi_male --output hello.wav
+
+# Start server
+python -m src.cli serve --port 8000
+```
+
+---
+
+## 6. Healthcare Use Case
+
+### 6.1 Target Application
+
+The TTS system is designed for integration with an **LLM-based healthcare assistant** for pregnant mothers in low-income communities.
+
+### 6.2 Key Features for Healthcare
+
+1. **Multi-lingual Support**: Information in native languages
+2. **Calm Style Preset**: Reassuring tone for medical guidance
+3. **Slow Speed Option**: Clear pronunciation for instructions
+4. **Low Latency**: Real-time conversational responses
+
+### 6.3 Example Healthcare Dialogue
+
+```
+User: "ગર્ભાવસ્થામાં શું ખાવું જોઈએ?"
+
+System Response (TTS with calm style in Gujarati):
+"ગર્ભાવસ્થામાં તમારે પ્રોટીન, આયર્ન અને ફોલિક એસિડથી ભરપૂર
+ખોરાક લેવો જોઈએ. દાળ, પાલક, ઈંડા અને દૂધ સારા વિકલ્પો છે."
+```
+
+---
+
+## 7. Performance Benchmarks
+
+| Test                    | Time  | Notes                              |
+| ----------------------- | ----- | ---------------------------------- |
+| Hindi synthesis (short) | 0.25s | "नमस्ते"                           |
+| Hindi synthesis (long)  | 0.45s | 50-word sentence                   |
+| Gujarati MMS            | 0.35s | First load includes model download |
+| Style processing        | +0.1s | Pitch + speed adjustment           |
+| API round-trip          | 0.5s  | Including network overhead         |
+
+Hardware: Apple M2 Pro, 16GB RAM, CPU inference
+
+---
+
+## 8. Deployment
+
+### 8.1 Quick Start
+
+```bash
+# Clone repository
+git clone https://github.com/harshil748/VoiceAPI
+cd VoiceAPI
+
+# Setup environment
+python3 -m venv tts
+source tts/bin/activate
+pip install -r requirements.txt
+
+# Download a model
+python -m src.cli download --voice hi_male
+
+# Start server
+python -m src.cli serve --port 8000
+```
+
+### 8.2 Docker
+
+```dockerfile
+FROM python:3.10-slim
+WORKDIR /app
+COPY . .
+RUN pip install -r requirements.txt
+RUN python -m src.cli download --lang hi
+EXPOSE 8000
+CMD ["python", "-m", "src.cli", "serve"]
+```
+
+---
+
+## 9. Limitations and Future Work
+
+### 9.1 Current Limitations
+
+1. **Model Size**: Each VITS model is ~300MB
+2. **MPS Compatibility**: Apple Silicon MPS not fully supported
+3. **Real-time Streaming**: Limited to sentence-level
+4. **Gujarati Gender**: MMS has only neutral voice
+
+### 9.2 Future Improvements
+
+1. **Model Quantization**: INT8 for smaller size
+2. **Voice Cloning**: Reference audio-based synthesis
+3. **SSML Support**: Markup language for fine control
+4. **More Languages**: Odia, Assamese, Punjabi
+5. **Fine-tuning**: Custom voice training on SPICOR data
+
+---
+
+## 10. Credits
+
+### Model Sources
+
+| Source                  | Models                | License      |
+| ----------------------- | --------------------- | ------------ |
+| SYSPIN (IISc Bangalore) | VITS for 10 languages | CC BY 4.0    |
+| Facebook MMS            | Gujarati VITS         | CC BY-NC 4.0 |
+
+### Dataset
+
+- **SPICOR TTS Project**: IISc SPIRE Lab, Bangalore
+- **Audio Quality**: 48kHz, 24-bit, mono
+
+### Frameworks
+
+- Coqui TTS, Hugging Face Transformers, FastAPI, librosa
+
+---
+
+## 11. Conclusion
+
+We have developed a comprehensive multi-lingual TTS system that:
+
+✅ Supports **11 Indian languages** with 21 voice variants  
+✅ Provides **9 style presets** for prosody control  
+✅ Offers a **REST API** with OpenAPI documentation  
+✅ Achieves **<500ms latency** for typical sentences  
+✅ Is **production-ready** with proper error handling
+
+The system is well-suited for the healthcare assistant use case, providing clear, natural-sounding speech in native languages to help pregnant mothers access healthcare information.
+
+---
+
+**Repository**: https://github.com/harshil748/VoiceAPI  
+**API Documentation**: http://localhost:8000/docs
diff --git a/tests/test_basic.py b/tests/test_basic.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f64d250cbc4a17489fe8b6a2c4aabdb75b9c352
--- /dev/null
+++ b/tests/test_basic.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+"""
+Quick test script to verify the TTS system works
+"""
+import sys
+import os
+
+# Add src to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+def test_basic():
+    """Basic functionality test"""
+    print("=" * 50)
+    print("🧪 Testing Voice Tech for All TTS System")
+    print("=" * 50)
+
+    # Test 1: Import modules
+    print("\n1. Testing imports...")
+    try:
+        from src.config import LANGUAGE_CONFIGS, get_available_voices
+        from src.tokenizer import TTSTokenizer, CharactersConfig, TextNormalizer
+        from src.downloader import ModelDownloader
+        from src.engine import TTSEngine
+
+        print("   ✅ All imports successful")
+    except ImportError as e:
+        print(f"   ❌ Import error: {e}")
+        return False
+
+    # Test 2: Configuration
+    print("\n2. Testing configuration...")
+    voices = get_available_voices()
+    print(f"   ✅ Found {len(voices)} voice configurations")
+    print(f"   Languages: {set(v['code'] for v in voices.values())}")
+
+    # Test 3: Tokenizer
+    print("\n3. Testing tokenizer...")
+    config = CharactersConfig(
+        characters="abcdefghijklmnopqrstuvwxyz", punctuations="!.,? "
+    )
+    tokenizer = TTSTokenizer(config)
+    ids = tokenizer.text_to_ids("hello world")
+    text_back = tokenizer.ids_to_text(ids)
+    print(f"   ✅ Tokenizer works: 'hello world' -> {len(ids)} tokens")
+
+    # Test 4: Text normalizer
+    print("\n4. Testing text normalizer...")
+    normalizer = TextNormalizer()
+    test_text = "Price is {100}{एकसो} rupees"
+    normalized = normalizer.clean_text(test_text)
+    print(f"   ✅ Normalized: '{test_text}' -> '{normalized}'")
+
+    # Test 5: Model downloader
+    print("\n5. Testing model downloader...")
+    downloader = ModelDownloader()
+    downloaded = downloader.list_downloaded_models()
+    print(f"   ✅ Downloaded models: {downloaded if downloaded else 'None yet'}")
+
+    # Test 6: Engine initialization
+    print("\n6. Testing TTS engine...")
+    try:
+        engine = TTSEngine()
+        print(f"   ✅ Engine initialized on device: {engine.device}")
+    except Exception as e:
+        print(f"   ⚠️ Engine init warning: {e}")
+
+    print("\n" + "=" * 50)
+    print("✅ All basic tests passed!")
+    print("=" * 50)
+
+    print("\n📋 Next steps:")
+    print("   1. Download a model: python -m src.cli download --voice hi_male")
+    print(
+        "   2. Synthesize: python -m src.cli synthesize --text 'नमस्ते' --voice hi_male"
+    )
+    print("   3. Start server: python -m src.cli serve")
+
+    return True
+
+
+def test_synthesis():
+    """Test actual synthesis (requires downloaded model)"""
+    from src.engine import TTSEngine
+    from src.downloader import ModelDownloader
+
+    downloader = ModelDownloader()
+    downloaded = downloader.list_downloaded_models()
+
+    if not downloaded:
+        print("\n⚠️ No models downloaded yet.")
+        print("Run: python -m src.cli download --voice hi_male")
+        return
+
+    voice = downloaded[0]
+    print(f"\n🎤 Testing synthesis with voice: {voice}")
+
+    engine = TTSEngine()
+
+    # Test synthesis
+    test_texts = {
+        "hi": "नमस्ते, मैं आपकी कैसे मदद कर सकता हूं?",
+        "en": "Hello, how can I help you today?",
+        "bn": "নমস্কার, আজ আমি আপনাকে কীভাবে সাহায্য করতে পারি?",
+    }
+
+    # Get language for this voice
+    from src.config import LANGUAGE_CONFIGS
+
+    lang = LANGUAGE_CONFIGS[voice].code
+
+    text = test_texts.get(lang, test_texts["en"])
+
+    print(f"   Text: {text}")
+    output = engine.synthesize(text, voice)
+    print(f"   ✅ Generated {output.duration:.2f}s of audio")
+
+    # Save test file
+    test_output = "test_output.wav"
+    engine.synthesize_to_file(text, test_output, voice)
+    print(f"   ✅ Saved to: {test_output}")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1 and sys.argv[1] == "--full":
+        test_basic()
+        test_synthesis()
+    else:
+        test_basic()