diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..d7b2fdea400155c17901eda4ced5644319e6bfe4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,25 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +reference_audio/bengali_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/bengali_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/bhojpuri_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/bhojpuri_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/chhattisgarhi_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/chhattisgarhi_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/english_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/english_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/gujarati_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/gujarati_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/hindi_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/hindi_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/kannada_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/kannada_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/magahi_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/magahi_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/maithili_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/maithili_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/marathi_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/marathi_male.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/telugu_female.wav filter=lfs diff=lfs merge=lfs -text +reference_audio/telugu_male.wav filter=lfs diff=lfs merge=lfs -text diff --git a/API_Main.py b/API_Main.py new file mode 100644 index 0000000000000000000000000000000000000000..35cadb6e5d05739e02f07391aca88e9fa84feff0 --- /dev/null +++ b/API_Main.py @@ -0,0 +1,108 @@ +import torch +import string +import random +import uvicorn +import numpy as np +from io import BytesIO +from TTS.api import TTS +from fastapi import FastAPI +from scipy.io.wavfile import write +from fastapi.responses import Response, JSONResponse + + +device = "cuda" if torch.cuda.is_available() else "cpu" +print(f"Using device: {device}") + +MODEL_PATH = "models/best_model.pth" +CONFIG_PATH = "models/config.json" + +print(f"Loading model") +tts = TTS( + model_path=MODEL_PATH, + config_path=CONFIG_PATH, + progress_bar=False, +).to(device) + +sample_rate = 22050 + +ref_path = { + "chhattisgarhi_male" : "reference_audio/chhattisgarhi_male.wav", + "chhattisgarhi_female" : "reference_audio/chhattisgarhi_female.wav", + "kannada_male" : "reference_audio/kannada_male.wav", + "kannada_female" : "reference_audio/kannada_female.wav", + "maithili_male" : "reference_audio/maithili_male.wav", + "maithili_female" : "reference_audio/maithili_female.wav", + "telugu_male" : "reference_audio/telugu_male.wav", + "telugu_female" : "reference_audio/telugu_female.wav", + "bengali_male" : "reference_audio/bengali_male.wav", + "bengali_female" : "reference_audio/bengali_female.wav", + "bhojpuri_male" : "reference_audio/bhojpuri_male.wav", + "bhojpuri_female" : "reference_audio/bhojpuri_female.wav", + "marathi_female" : "reference_audio/marathi_female.wav", + "marathi_male" : "reference_audio/marathi_male.wav", + "gujarati_male" : "reference_audio/gujarati_male.wav", + "gujarati_female" : "reference_audio/gujarati_female.wav", + "hindi_male" : "reference_audio/hindi_male.wav", + "hindi_female" : "reference_audio/hindi_female.wav", + "magahi_female" : "reference_audio/magahi_female.wav", + "magahi_male" : "reference_audio/magahi_male.wav", + "english_female" : "reference_audio/english_female.wav", + "english_male" : "reference_audio/english_male.wav", +} + +languageCODE = { + "bhojpuri": "bho", + "bengali": "bn", + "english": "en", + "gujarati": "gu", + "hindi": "hi", + "chhattisgarhi": "hne", + "kannada": "kn", + "magahi": "mag", + "maithili": "mai", + "marathi": "mr", + "telugu": "te" +} + +app = FastAPI() +@app.get("/Get_Inference") +async def Inference(text : str, lang : str, speaker : str): + + if not text or not lang or not speaker: + return JSONResponse({"comment" : "Missing Field."}, status_code = 422) + + spk = speaker.lower() + lan = lang.lower() + + if spk not in ref_path: + return JSONResponse({"comment" : "Speaker not present in the system."}, status_code = 422) + + if lan not in languageCODE or lan not in languageCODE.values(): + return JSONResponse({"comment" : "Language not present in the system."}, status_code = 422) + + + wav = np.array(tts.tts(text=text, speaker_wav=ref_path[speaker], language = languageCODE[lan] if lan not in languageCODE.values() else lan)) + wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) + wav_norm = wav_norm.astype(np.int16) + + wav_buffer = BytesIO() + write(wav_buffer, sample_rate, wav_norm) + wav_buffer.seek(0) + wav_buffer.name = lang + "_" + speaker + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits + string.ascii_lowercase) for _ in range(7)) + ".wav" + return Response(wav_buffer.read()) + + +def start_server(): + print('Starting Server...') + + uvicorn.run( + "API_Main:app", + host = "0.0.0.0", + port = 8080, + workers = 1, + log_level="debug", + reload=False, + ) + +if __name__ == "__main__": + start_server() \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..02e9db565512d5398f300239f4dc535ff24091a9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime + +WORKDIR /app + +RUN python --version + +RUN apt-get update && apt-get install -y \ + git \ + libsndfile1 \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir Cython packaging + +RUN pip install --no-cache-dir fastapi python-multipart uvicorn + +RUN git clone https://github.com/PranavDBhat/LIMMITS-24-Coquiai.git /app/LIMMITS-24-Coquiai + +RUN cd /app/LIMMITS-24-Coquiai && \ + pip install --no-cache-dir -r requirements.txt + +RUN cd /app/LIMMITS-24-Coquiai && \ + pip install -e . + +COPY ./ ./ + +RUN [ "python", "API_Main.py" ] diff --git a/README.md b/README.md index 7be5fc7f47d5db027d120b8024982df93db95b74..b0551d6a198d875f26429eb828a0426e73cf1509 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,169 @@ ---- -license: mit ---- +# SYSPIN Hackathon TTS API Documentation + +## Overview + +This API provides a Text-to-Speech (TTS) service that converts input text into speech audio. It supports multiple Indian languages and offers voice customization through predefined male and female speaker references. + +--- + +## Endpoint: `/Get_Inference` + +* **Method**: `GET` +* **Description**: Generates speech audio from the provided text using the specified language and speaker. + +### Query Parameters + +| Parameter | Type | Required | Description | | +| --------- | ------ | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------- | +| `text` | string | Yes | The input text to be converted into speech. | | +| `lang` | string | Yes | The language of the input text. Acceptable values include: `bhojpuri`, `bengali`, `english`, `gujarati`, `hindi`, `chhattisgarhi`, `kannada`, `magahi`, `maithili`, `marathi`, `telugu`. | | +| `speaker` | string | Yes | The desired speaker's voice. Format: `_`. For example: `hindi_male`, `english_female`. Refer to the available speakers below. | + +### Available Speakers + +| Language | Language codes | Male Speaker | Female Speaker | | +| ------------- | -------- | ------------------- | --------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | +| chhattisgarhi | hne | chhattisgarhi\_male | chhattisgarhi\_female | | +| kannada | kn | kannada\_male | kannada\_female | | +| maithili | mai | maithili\_male | maithili\_female | | +| telugu | te | telugu\_male | telugu\_female | | +| bengali | bn | bengali\_male | bengali\_female | | +| bhojpuri | bho | bhojpuri\_male | bhojpuri\_female | | +| marathi | mr | marathi\_male | marathi\_female | | +| gujarati | gu | gujarati\_male | gujarati\_female | | +| hindi | hi | hindi\_male | hindi\_female | | +| magahi | mag | magahi\_male | magahi\_female | | +| english | en | english\_male | english\_female | + +### Responses + +* **200 OK**: Returns a WAV audio file as a streaming response containing the synthesized speech. +* **422 Unprocessable Entity**: Returned when: + + * Any of the required query parameters (`text`, `lang`, `speaker`) are missing. + * The specified `lang` is not supported. + * The specified `speaker` is not available. + + + +## Running the Server + +To start the FastAPI server: + +```bash +docker build -t your_image_name ./ +docker run -d -p 8080:8080 your_image_name +``` + +## Hosting on a GPU + +To run your FastAPI-based Text-to-Speech (TTS) server inside a Docker container with GPU support, follow these steps: + +--- + +## Prerequisites + +1. **NVIDIA GPU**: Ensure your system has an NVIDIA GPU installed. + +2. **NVIDIA Drivers**: Install the appropriate NVIDIA drivers for your GPU. + +3. **Docker**: Install Docker on your system. + +4. **NVIDIA Container Toolkit**: Install the NVIDIA Container Toolkit to enable GPU support in Docker containers. + +--- + +## Installation Steps + +### 1. Install NVIDIA Drivers + +Ensure that the NVIDIA drivers compatible with your GPU are installed on your system. + +### 2. Install Docker + +If Docker is not already installed, you can install it by following the official Docker installation guide for your operating system. + +### 3. Install NVIDIA Container Toolkit + +The NVIDIA Container Toolkit allows Docker containers to utilize the GPU. + +**For Ubuntu:** + +```bash +# Add the package repositories +distribution=$(. /etc/os-release;echo $ID$VERSION_ID) +curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - +curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \ + sudo tee /etc/apt/sources.list.d/nvidia-docker.list + +# Update the package lists +sudo apt-get update + +# Install the NVIDIA Container Toolkit +sudo apt-get install -y nvidia-container-toolkit + +# Restart the Docker daemon to apply changes +sudo systemctl restart docker +``` + +**For other operating systems:** Refer to the [NVIDIA Container Toolkit installation guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) for detailed instructions. + +### 4. Verify GPU Access in Docker + +To confirm that Docker can access your GPU, run the following command: + +```bash +docker run --rm --gpus all nvidia/cuda:12.4.0-base-ubuntu22.04 nvidia-smi +``` + + +## Running Your FastAPI TTS Server with GPU Support + +Assuming your FastAPI TTS application is containerized and ready to run: + +1. **Build Your Docker Image** + +Navigate to the directory containing your `Dockerfile` and build the Docker image: + +```bash +docker build -t your_image_name . +``` + + +2. **Run the Docker Container with GPU Support** + +Start the container with GPU access enabled: + +```bash +docker run --gpus all -p 8080:8080 your_image_name +``` + +## Example API Call + +```python +import requests + +# Define the base URL of your API +base_url = 'http://localhost:8080/Get_Inference' + +# Set up the query parameters +params = { + 'text': 'Hello world', + 'lang': 'english', + 'speaker': 'english_female' +} + +# Send the GET request +response = requests.get(base_url, params=params) + +# Check if the request was successful +if response.status_code == 200: + # Save the audio content to a file + with open('output.wav', 'wb') as f: + f.write(response.content) + print("Audio saved as 'output.wav'") +else: + # Print the error message + print(f"Request failed with status code {response.status_code}") + print("Response:", response.text) +``` diff --git a/model_related/Bengali_Female/speakers.pth b/model_related/Bengali_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4fa3e8fd621abdb4d20d2f8b80b92bb36da6d4b --- /dev/null +++ b/model_related/Bengali_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e148bf409293b45ae4fd27d516232fc7f66068f57ffdab78eeb46fcc56fb843 +size 134 diff --git a/model_related/Bengali_Male/speakers.pth b/model_related/Bengali_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..32116b6c43bfdc541274735b425efbc557448216 --- /dev/null +++ b/model_related/Bengali_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eab41b51845880320ed1fc60145d91f489cd34e2c62623a113ef3c4f56c3bc70 +size 134 diff --git a/model_related/Bhojpuri_Female/speakers.pth b/model_related/Bhojpuri_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..8b72305276ecbe010f546e2f6558d7dba1e071a9 --- /dev/null +++ b/model_related/Bhojpuri_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0758d5c58c0c3b2375b876bd3fa319bc9678af9b295d676065b67f93ffd870b8 +size 134 diff --git a/model_related/Bhojpuri_Male/speakers.pth b/model_related/Bhojpuri_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..705352b36d355c38d86d50307cc774fdde3fccb5 --- /dev/null +++ b/model_related/Bhojpuri_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd0b455b4f26bb30aa785aa82cb767268df7ecd7687eebedeb5143ff0c45736e +size 134 diff --git a/model_related/Chhattisgarhi_Female/speakers.pth b/model_related/Chhattisgarhi_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..f46cb8d38a98bf9cd7082c961e6c24cc344dbfa9 --- /dev/null +++ b/model_related/Chhattisgarhi_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf3fe4309d65f0f0fbbc2f2b3aff3928482818e32ff69575f50049f27ee6b3d +size 134 diff --git a/model_related/Chhattisgarhi_Male/speakers.pth b/model_related/Chhattisgarhi_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..3f224471be629d05eb6e2fdc28336881d25313ac --- /dev/null +++ b/model_related/Chhattisgarhi_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9593a0c877baea24a42f7ab59b49c7a1911e21ad5ba1a5bd1f9f1d736b8ede79 +size 134 diff --git a/model_related/English_Female/speakers.pth b/model_related/English_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..f71f6f46c934e8af80fe0efaaef58676837f5cca --- /dev/null +++ b/model_related/English_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d54951c45b4bc40ec0d4aef4adfdd74c3569b1b8ddb7f278f36cf7bb1f0ebfaa +size 134 diff --git a/model_related/English_Male/speakers.pth b/model_related/English_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..9144022dd0b7eff86c5dd88df5fa7d7b50d6e4cd --- /dev/null +++ b/model_related/English_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3977b874607e6e96b95870e9abdf92b1729ca913c29142f9a12bf048cf156bda +size 134 diff --git a/model_related/Gujarati_Female/speakers.pth b/model_related/Gujarati_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..905514f25196f72a39a9d53f953f2e76ac877b93 --- /dev/null +++ b/model_related/Gujarati_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fcfb056e543d6cacebc2fbeee09c7b7dce47073f90b364d2bca4bb74b9c5af9 +size 133 diff --git a/model_related/Gujarati_Male/speakers.pth b/model_related/Gujarati_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..fee28c71a6483aee610c3724f9c2bce098912f0b --- /dev/null +++ b/model_related/Gujarati_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7175678de9331b704704942edb94b0047d7389594ca8b474e61cc7d9b9340081 +size 133 diff --git a/model_related/Hindi_Female/speakers.pth b/model_related/Hindi_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..84327b8c9ac2f4707e920bff1ae6d8807f93e216 --- /dev/null +++ b/model_related/Hindi_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0830857099353182c42e709b2fe7df2ee590cec4a1fc57549fdc1550ce6a7108 +size 134 diff --git a/model_related/Hindi_Male/speakers.pth b/model_related/Hindi_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..58a54993f315e85a236246858f81451cd2fa3b4e --- /dev/null +++ b/model_related/Hindi_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221cd9b955f2fc8adb79675929fe0cc6a49479e774f2e4f160587934fcf3c2e3 +size 134 diff --git a/model_related/Kannada_Female/speakers.pth b/model_related/Kannada_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..e536e9cc6c1966d0641a42b8352ba02b911eb170 --- /dev/null +++ b/model_related/Kannada_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14b146984304ef7a2d653ccaa75cfac547075dee2ee8678074a4a1609f050090 +size 133 diff --git a/model_related/Kannada_Male/speakers.pth b/model_related/Kannada_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..42fb113d38a13e851f8a84e79d60f1a0bfa76eac --- /dev/null +++ b/model_related/Kannada_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30be0891dea3f5ddd51b2a76c76d5b1e83a6957c21ae1cfd0ada5f9895e12c8 +size 133 diff --git a/model_related/Magahi_Female/speakers.pth b/model_related/Magahi_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..71e08e8e9569047dba4c0908aa7282158847514f --- /dev/null +++ b/model_related/Magahi_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc6765fa7972cd5afaee8f60e38c9413797dbb25b240453a04d29fcf87170ef +size 134 diff --git a/model_related/Magahi_Male/speakers.pth b/model_related/Magahi_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0c0aef625b1f109d27059eaf945a0409c108a62 --- /dev/null +++ b/model_related/Magahi_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd4c2775d591ee6d50519c9456cf62026082ed2574fb2321fa47db3a0844b6c4 +size 134 diff --git a/model_related/Maithili_Female/speakers.pth b/model_related/Maithili_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..26699c4f05972bd5d05f193397c9bada91483334 --- /dev/null +++ b/model_related/Maithili_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa059452f7ff378eb48c086458f6a89bc09d3575ecb4d9b08dd013feff8aa961 +size 134 diff --git a/model_related/Maithili_Male/speakers.pth b/model_related/Maithili_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b743667b0bd96a97c3639836122900d4fee5602 --- /dev/null +++ b/model_related/Maithili_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57519e60ae79d12606e4856facadbdf83d89fc2473127298ba7f7d22b4a0aad7 +size 134 diff --git a/model_related/Marathi_Female/speakers.pth b/model_related/Marathi_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3453334df830a43b3b3f8148a141df90762fcd9 --- /dev/null +++ b/model_related/Marathi_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5af64238bfd96d9d451eead5b119fdfdfee524ef636724e94835b295f9a54c +size 134 diff --git a/model_related/Marathi_Male/speakers.pth b/model_related/Marathi_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..efa57bf2338cfff28433892a9c71b96659f148fe --- /dev/null +++ b/model_related/Marathi_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb72a8ee86e5fb9ecda480028e586e11d856c9cd33b168b479c542df581b4c00 +size 133 diff --git a/model_related/Telugu_Female/speakers.pth b/model_related/Telugu_Female/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..526267e7e5b57fe8a27d12783ac1b7ccd87293e0 --- /dev/null +++ b/model_related/Telugu_Female/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c2542da9e6ff71aa439e85d795fefbc02543fda1f6ac92e4f2a1a1e61bbe21 +size 134 diff --git a/model_related/Telugu_Male/speakers.pth b/model_related/Telugu_Male/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d20d7ba63d8b5ee82b7368c856bb48ac0c96f40 --- /dev/null +++ b/model_related/Telugu_Male/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:510e82da22f608832af21a289709f3b801d6659d7cbd12962bb1743319b145f7 +size 134 diff --git a/models/best_model.pth b/models/best_model.pth new file mode 100644 index 0000000000000000000000000000000000000000..8f58f9729e13bb653fcde895eeadfb9bf4eebe08 --- /dev/null +++ b/models/best_model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f100ad6435878949e41d458c6c49b401326a8bf4d020fd9cb5aa41b09e4f5d +size 135 diff --git a/models/config.json b/models/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e527058530cffdeb0abb3a72ea55343c51eca27 --- /dev/null +++ b/models/config.json @@ -0,0 +1,662 @@ +{ + "output_path": "/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads", + "logger_uri": null, + "run_name": "yourtts_syspin_baseline", + "project_name": "YourTTS", + "run_description": "\n - Original YourTTS trained using VCTK dataset\n ", + "print_step": 50, + "plot_step": 100, + "model_param_stats": false, + "wandb_entity": null, + "dashboard_logger": "tensorboard", + "save_on_interrupt": true, + "log_model_step": 1000, + "save_step": 10000, + "save_n_checkpoints": 10, + "save_checkpoints": true, + "save_all_best": false, + "save_best_after": 0, + "target_loss": "loss_1", + "print_eval": false, + "test_delay_epochs": 0, + "run_eval": true, + "run_eval_steps": null, + "distributed_backend": "nccl", + "distributed_url": "tcp://localhost:54321", + "mixed_precision": false, + "precision": "fp16", + "epochs": 1000, + "batch_size": 16, + "eval_batch_size": 16, + "grad_clip": [ + 1000.0, + 1000.0 + ], + "scheduler_after_epoch": true, + "lr": 0.001, + "optimizer": "AdamW", + "optimizer_params": { + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "weight_decay": 0.01 + }, + "lr_scheduler": null, + "lr_scheduler_params": {}, + "use_grad_scaler": false, + "allow_tf32": false, + "cudnn_enable": true, + "cudnn_deterministic": false, + "cudnn_benchmark": false, + "training_seed": 54321, + "model": "vits", + "num_loader_workers": 8, + "num_eval_loader_workers": 0, + "use_noise_augment": false, + "audio": { + "fft_size": 1024, + "sample_rate": 22050, + "win_length": 1024, + "hop_length": 256, + "num_mels": 80, + "mel_fmin": 0, + "mel_fmax": null + }, + "use_phonemes": false, + "phonemizer": "espeak", + "phoneme_language": "en", + "compute_input_seq_cache": true, + "text_cleaner": "multilingual_cleaners", + "enable_eos_bos_chars": false, + "test_sentences_file": "", + "phoneme_cache_path": null, + "characters": { + "characters_class": "TTS.tts.models.vits.VitsCharacters", + "vocab_dict": null, + "pad": "_", + "eos": "&", + "bos": "*", + "blank": null, + "characters": "\u0cc8\u092f\u2013\u0951\u0905\u0c1f\u0c2c\u0a8f\ufe0f\u0a82\u099dH\u0c9bM\u2026\u0997\u0926\u0cb5\u099f\u00e8\u0c89\u0917\u0c32\u0914\u09b9\u0c4cY\u0aa2\u0a8d\u0c02\u0c4b\u0c13\u09b0\u09a8\u0aac\u09a5\u0a88\u0c93\u0c47\u0936\u0a9b\u0c09\u0919\u0c2a\u0c17\u099e\u0c0b\u098aD\u0986\u0aa1 \u0ccd\u0a95\u0c12\u0aa0RI\u090a\u0cd5\u091f\u0c97\u0c36\u0cb9\u0ab3\u09b7\u0a9e\u0aaa\u09aa\u0c37\u0cb8j\u0a86Wk\u0a81\u0958\u0937\u0cab\u09be\u095eqn\\\u0ca8\u0ce0\u091b\u09aesz\u09a3\u0995\u0c2b\u0aa3\u0aa6\u00bb1\u09bfA5\u0ca5\u092c\u091d\u09c8\u09a2\u0c9e'\u0ac9\u0c16\u0acc\u0c9c\u0ccc\u0924\u0cc0E\u00e0J,\u0c8bu\u0ca2c]\u0cca\u0caa\u099aT\u0a89\u09ce\u0a98\u0ca6\u0987\u0aa5\u0a97\u0cbf\u0c98\u09c7v\u0ca7\u0901\u0c28\u09dc\u0ac2o\u0ab2\u0c40\u0c25\u0c21\u0c82\u095a\u0c95\u09cd\u0ca0\u0c1e#\u095b\u00e9\u0cb2\u0927\u0948\u0cad\u0959\u0ab6\u093e\u0cac\u0abe\u0acdl\u09df\u0ca3\u0abf\u0ab9\u0988\u09ac\u0a96\u095d\u0909\u090b\u0c31\u0993\u0945\u0983\u0c14\u0ab7\u09f0\u0a8a\u0907\u0c19\u0989\u0c48\u09c0\u0955\u094b\u09cb\u0c1d\u0cc1\u0999\u0c99\u0c2f\u0a93\u0931\u099b\u00e2\"9\u0c92\u0c30\u0998\u0c9aZ\u09a0\u0a90\u00ef\u0c07\u092b\u0c1a\u0cbd\u0cc3(\u0cdeOB2\u09b2\u0ac1\u0c3f\u0ac5\u0922\u0c8a\u0ac8\u09af\u091e\u0ac7\u0c06we\u09f7\u0c35\u0c26\u0932hQ\u0c18a\u0c8fi\u0911\u0943\u092a\u0939\u0c4d\u0c41\u09adP3C4\u098f\u0ca4\u09ab\u0c69\u09c1\u0a9f\u0972\u0970\u0a9a\u0c2e\u0941\u0a83\u0c1bx\u09b8\u0c10r\u0cb6\u092e\u0923\u0925\u0c24\u0c33\u0c38\u0cc2\u0930[\u0942\u0949\u0964\u0a85\u0ccb\u0908\u0cbe\u0cbc*\u0caf\u0aab\u0c42\u0a8b\u0a87\u0c27\u0c3e\u09a1X`\u0c0e\u0954\u0aa8\u0ab8\u093f\u0ac3\u09c3N\u0cb3\u0cb0\u0c96\u0981\u095c\u0a9cL\u0c90\u0cb1\u0aad\u0990\u0910\u0920\u0960\u0947\u09c2\u0c66U\u091aV\u09dd\u0c15\u098b\u0c46\u0ac0G\u0921\u0c9d\u0c88\u09bc\u0c22\u093c\u0c9f\u099c\u092df\u0916\u0c05\u0cc7K\u0a9d\u090f\u09a6?\u0c4a\u0902\u0c94\u0ae0\u0c87\u094a\u0a94\u0c0f\u0985\u0c08\u0abc\u0cc67\u0933\u0918\u00f4\u0928@\u0c8e\u091c\u0915\u0c238\u0996\u0cb7t\u0982\u0c43m\u0c03\u0994\u09cc\u0c39\u0940\u0ab5\u093d\u0a91\u090d\u0c2d\u09b6\u0cae\u0938bF\u0c83\u09a7\u0aa4y\u0935p\u09030{&\u0c0a\u0ca1\u0906\u0ab0\u0acb\u0c86\u094dg\u09a4\u0aae\u0950\u0c1c\u094c\u0aa7\u0934\u0aaf\u0c20\u0c85\u095fSd\u0913", + "punctuations": "!'(),-.:;? ", + "phonemes": "", + "is_unique": true, + "is_sorted": true + }, + "add_blank": true, + "batch_group_size": 32, + "loss_masking": null, + "min_audio_len": 1, + "max_audio_len": Infinity, + "min_text_len": 1, + "max_text_len": Infinity, + "compute_f0": false, + "compute_energy": false, + "compute_linear_spec": true, + "precompute_num_workers": 12, + "start_by_longest": true, + "shuffle": false, + "drop_last": false, + "datasets": [ + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Bengali_Female", + "meta_file_train": "SyspinSpeakers/Bengali_Female.tsv", + "ignored_speakers": null, + "language": "bn", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Chhattisgarhi_Male", + "meta_file_train": "SyspinSpeakers/Chhattisgarhi_Male.tsv", + "ignored_speakers": null, + "language": "hne", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Magahi_Male", + "meta_file_train": "SyspinSpeakers/Magahi_Male.tsv", + "ignored_speakers": null, + "language": "mag", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Marathi_Male", + "meta_file_train": "SyspinSpeakers/Marathi_Male.tsv", + "ignored_speakers": null, + "language": "mr", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Maithili_Female", + "meta_file_train": "SyspinSpeakers/Maithili_Female.tsv", + "ignored_speakers": null, + "language": "mai", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/English_Male", + "meta_file_train": "SyspinSpeakers/English_Male.tsv", + "ignored_speakers": null, + "language": "en", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Chhattisgarhi_Female", + "meta_file_train": "SyspinSpeakers/Chhattisgarhi_Female.tsv", + "ignored_speakers": null, + "language": "hne", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Kannada_Male", + "meta_file_train": "SyspinSpeakers/Kannada_Male.tsv", + "ignored_speakers": null, + "language": "kn", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Bhojpuri_Female", + "meta_file_train": "SyspinSpeakers/Bhojpuri_Female.tsv", + "ignored_speakers": null, + "language": "bho", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Kannada_Female", + "meta_file_train": "SyspinSpeakers/Kannada_Female.tsv", + "ignored_speakers": null, + "language": "kn", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Maithili_Male", + "meta_file_train": "SyspinSpeakers/Maithili_Male.tsv", + "ignored_speakers": null, + "language": "mai", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Telugu_Male", + "meta_file_train": "SyspinSpeakers/Telugu_Male.tsv", + "ignored_speakers": null, + "language": "te", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Telugu_Female", + "meta_file_train": "SyspinSpeakers/Telugu_Female.tsv", + "ignored_speakers": null, + "language": "te", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Bengali_Male", + "meta_file_train": "SyspinSpeakers/Bengali_Male.tsv", + "ignored_speakers": null, + "language": "bn", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Bhojpuri_Male", + "meta_file_train": "SyspinSpeakers/Bhojpuri_Male.tsv", + "ignored_speakers": null, + "language": "bho", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Gujarati_Female", + "meta_file_train": "SyspinSpeakers/Gujarati_Female.tsv", + "ignored_speakers": null, + "language": "gu", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Marathi_Female", + "meta_file_train": "SyspinSpeakers/Marathi_Female.tsv", + "ignored_speakers": null, + "language": "mr", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Hindi_Female", + "meta_file_train": "SyspinSpeakers/Hindi_Female.tsv", + "ignored_speakers": null, + "language": "hi", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Gujarati_Male", + "meta_file_train": "SyspinSpeakers/Gujarati_Male.tsv", + "ignored_speakers": null, + "language": "gu", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Hindi_Male", + "meta_file_train": "SyspinSpeakers/Hindi_Male.tsv", + "ignored_speakers": null, + "language": "hi", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/Magahi_Female", + "meta_file_train": "SyspinSpeakers/Magahi_Female.tsv", + "ignored_speakers": null, + "language": "mag", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + }, + { + "formatter": "syspin_ml", + "dataset_name": "", + "path": "model_related/English_Female", + "meta_file_train": "SyspinSpeakers/English_Female.tsv", + "ignored_speakers": null, + "language": "en", + "phonemizer": "", + "meta_file_val": "", + "meta_file_attn_mask": "" + } + ], + "test_sentences": [ + [ + "\u091c\u093e\u092f\u0915\u0935\u093e\u0921\u0940 \u0927\u0930\u0923\u093e\u0924\u0942\u0928 \u0924\u092c\u094d\u092c\u0932 \u0905\u0921\u0940\u091a \u0924\u0947 \u0924\u0940\u0928 \u0932\u093e\u0916 \u0939\u0947\u0915\u094d\u091f\u0930 \u0936\u0947\u0924\u0940\u091a\u094d\u092f\u093e \u0938\u093f\u0902\u091a\u0928\u093e\u0938\u093e\u0920\u0940 \u092a\u093e\u0923\u0940 \u0938\u094b\u0921\u0932\u0902 \u091c\u093e\u0924\u0902", + "Marathi_Male", + null, + "mr" + ], + [ + "\u091c\u093e\u092f\u0915\u0935\u093e\u0921\u0940 \u0927\u0930\u0923\u093e\u0924\u0942\u0928 \u0924\u092c\u094d\u092c\u0932 \u0905\u0921\u0940\u091a \u0924\u0947 \u0924\u0940\u0928 \u0932\u093e\u0916 \u0939\u0947\u0915\u094d\u091f\u0930 \u0936\u0947\u0924\u0940\u091a\u094d\u092f\u093e \u0938\u093f\u0902\u091a\u0928\u093e\u0938\u093e\u0920\u0940 \u092a\u093e\u0923\u0940 \u0938\u094b\u0921\u0932\u0902 \u091c\u093e\u0924\u0902", + "Marathi_Female", + null, + "mr" + ], + [ + "\u0915\u0941\u091b \u0938\u092e\u092f \u092c\u093e\u0926 \u0935\u0947 \u0905\u0938\u0939\u093e\u092c\u0947 \u0938\u0941\u095e\u094d\u095e\u093e\u0939 \u0915\u0947 \u0928\u093e\u092e \u0938\u0947 \u092a\u094d\u0930\u0938\u093f\u0926\u094d\u0927 \u0939\u094b \u0917\u090f\u0964", + "Hindi_Male", + null, + "hi" + ], + [ + "\u0915\u0941\u091b \u0938\u092e\u092f \u092c\u093e\u0926 \u0935\u0947 \u0905\u0938\u0939\u093e\u092c\u0947 \u0938\u0941\u095e\u094d\u095e\u093e\u0939 \u0915\u0947 \u0928\u093e\u092e \u0938\u0947 \u092a\u094d\u0930\u0938\u093f\u0926\u094d\u0927 \u0939\u094b \u0917\u090f\u0964", + "Hindi_Female", + null, + "hi" + ], + [ + "\u0c35\u0c21\u0c4d\u0c30\u0c02\u0c17\u0c3f, \u0c15\u0c4d\u0c37\u0c41\u0c30\u0c15 \u0c35\u0c43\u0c24\u0c4d\u0c24\u0c41\u0c32\u0c32\u0c4b \u0c2a\u0c46\u0c1f\u0c4d\u0c1f\u0c41\u0c2c\u0c21\u0c3f \u0c2a\u0c4d\u0c30\u0c27\u0c3e\u0c28\u0c2e\u0c48 \u0c07\u0c24\u0c30\u0c41\u0c32\u0c41 \u0c15\u0c42\u0c21\u0c3e \u0c08 \u0c35\u0c43\u0c24\u0c4d\u0c24\u0c3f\u0c32\u0c4b \u0c2a\u0c4d\u0c30\u0c35\u0c47\u0c36\u0c3f\u0c02\u0c1a\u0c3f \u0c35\u0c4d\u0c2f\u0c3e\u0c2a\u0c3e\u0c30\u0c02\u0c17\u0c3e \u0c2e\u0c3e\u0c30\u0c4d\u0c1a\u0c47\u0c38\u0c3e\u0c30", + "Telugu_Female", + null, + "te" + ], + [ + "\u0c35\u0c21\u0c4d\u0c30\u0c02\u0c17\u0c3f, \u0c15\u0c4d\u0c37\u0c41\u0c30\u0c15 \u0c35\u0c43\u0c24\u0c4d\u0c24\u0c41\u0c32\u0c32\u0c4b \u0c2a\u0c46\u0c1f\u0c4d\u0c1f\u0c41\u0c2c\u0c21\u0c3f \u0c2a\u0c4d\u0c30\u0c27\u0c3e\u0c28\u0c2e\u0c48 \u0c07\u0c24\u0c30\u0c41\u0c32\u0c41 \u0c15\u0c42\u0c21\u0c3e \u0c08 \u0c35\u0c43\u0c24\u0c4d\u0c24\u0c3f\u0c32\u0c4b \u0c2a\u0c4d\u0c30\u0c35\u0c47\u0c36\u0c3f\u0c02\u0c1a\u0c3f \u0c35\u0c4d\u0c2f\u0c3e\u0c2a\u0c3e\u0c30\u0c02\u0c17\u0c3e \u0c2e\u0c3e\u0c30\u0c4d\u0c1a\u0c47\u0c38\u0c3e\u0c30", + "Telugu_Male", + null, + "te" + ], + [ + "\u0915\u0932\u0915\u0924\u094d\u0924\u093e \u091c\u093e\u092f \u0916\u093e\u0924\u093f\u0930 \u092a\u0941\u0937\u094d\u092a\u093e \u0939 \u0911\u0928\u0932\u093e\u0907\u0928 \u091f\u093f\u0915\u093f\u091f \u0915\u0930\u0935\u093e\u092f \u0930\u093f\u0939\u093f\u0938 \u0939\u0935\u092f", + "Chhattisgarhi_Female", + null, + "hne" + ], + [ + "\u0915\u0932\u0915\u0924\u094d\u0924\u093e \u091c\u093e\u092f \u0916\u093e\u0924\u093f\u0930 \u092a\u0941\u0937\u094d\u092a\u093e \u0939 \u0911\u0928\u0932\u093e\u0907\u0928 \u091f\u093f\u0915\u093f\u091f \u0915\u0930\u0935\u093e\u092f \u0930\u093f\u0939\u093f\u0938 \u0939\u0935\u092f", + "Chhattisgarhi_Male", + null, + "hne" + ], + [ + "\u09a8\u09bf\u09ae\u09cd\u09a8\u09cb\u0995\u09cd\u09a4 \u09b8\u09be\u09b0\u09a3\u09bf \u0985\u09ac\u09b2\u09ae\u09cd\u09ac\u09a8\u09c7 \u09ad\u09be\u09b0\u09a4\u09c0\u09df \u09ac\u09cd\u09af\u09ac\u09b8\u09cd\u09a5\u09be\u09df \u099c\u09c0\u09ac\u09bf\u0995\u09be\u09b0 \u0995\u09be\u09a0\u09be\u09ae\u09cb\u09b0 \u098f\u0995\u099f\u09bf \u09aa\u09be\u0987 \u099a\u09bf\u09a4\u09cd\u09b0 \u09a4\u09c8\u09b0\u09bf \u0995\u09b0\u09cb \u0995\u09c3\u09b7\u09bf \u09ad\u09b0\u09cd\u09a4\u09c1\u0995\u09bf\u09b0 \u09aa\u0995\u09cd\u09b7\u09c7 \u098f\u09ac\u0982 \u09ac\u09bf\u09aa\u0995\u09cd\u09b7\u09c7\u09b0 \u09af\u09c1\u0995\u09cd\u09a4\u09bf\u0997\u09c1\u09b2\u09cb \u09aa\u09b0\u09cd\u09af\u09be\u09b2\u09cb\u099a\u09a8\u09be \u0995\u09b0\u09cb\u0964", + "Bengali_Male", + null, + "bn" + ], + [ + "\u09a8\u09bf\u09ae\u09cd\u09a8\u09cb\u0995\u09cd\u09a4 \u09b8\u09be\u09b0\u09a3\u09bf \u0985\u09ac\u09b2\u09ae\u09cd\u09ac\u09a8\u09c7 \u09ad\u09be\u09b0\u09a4\u09c0\u09df \u09ac\u09cd\u09af\u09ac\u09b8\u09cd\u09a5\u09be\u09df \u099c\u09c0\u09ac\u09bf\u0995\u09be\u09b0 \u0995\u09be\u09a0\u09be\u09ae\u09cb\u09b0 \u098f\u0995\u099f\u09bf \u09aa\u09be\u0987 \u099a\u09bf\u09a4\u09cd\u09b0 \u09a4\u09c8\u09b0\u09bf \u0995\u09b0\u09cb \u0995\u09c3\u09b7\u09bf \u09ad\u09b0\u09cd\u09a4\u09c1\u0995\u09bf\u09b0 \u09aa\u0995\u09cd\u09b7\u09c7 \u098f\u09ac\u0982 \u09ac\u09bf\u09aa\u0995\u09cd\u09b7\u09c7\u09b0 \u09af\u09c1\u0995\u09cd\u09a4\u09bf\u0997\u09c1\u09b2\u09cb \u09aa\u09b0\u09cd\u09af\u09be\u09b2\u09cb\u099a\u09a8\u09be \u0995\u09b0\u09cb\u0964", + "Bengali_Female", + null, + "bn" + ], + [ + "\u0cb9\u0cb8\u0ccd\u0ca6\u0cc7\u0cb5\u0ccd \u0ca8\u0ca6\u0cbf, \u0cb0\u0cbf\u0cb9\u0c82\u0ca1\u0ccd \u0ca8\u0ca6\u0cbf \u0cae\u0ca4\u0ccd\u0ca4\u0cc1 \u0c95\u0ca8\u0ccd\u0cb9\u0cb0\u0ccd \u0ca8\u0ca6\u0cbf\u0c97\u0cb3\u0cc1 \u0cb8\u0cc1\u0cb0\u0ccd\u0c97\u0cc1\u0c9c\u0cbe\u0ca6 \u0cae\u0cc1\u0c96\u0c9c \u0cad\u0cc2\u0cae\u0cbf\u0caf\u0cb2\u0ccd\u0cb2\u0cbf \u0cb9\u0cb0\u0cbf\u0caf\u0cc1\u0ca4\u0ccd\u0ca4\u0cb5\u0cc6.", + "Kannada_Female", + null, + "kn" + ], + [ + "\u0cb9\u0cb8\u0ccd\u0ca6\u0cc7\u0cb5\u0ccd \u0ca8\u0ca6\u0cbf, \u0cb0\u0cbf\u0cb9\u0c82\u0ca1\u0ccd \u0ca8\u0ca6\u0cbf \u0cae\u0ca4\u0ccd\u0ca4\u0cc1 \u0c95\u0ca8\u0ccd\u0cb9\u0cb0\u0ccd \u0ca8\u0ca6\u0cbf\u0c97\u0cb3\u0cc1 \u0cb8\u0cc1\u0cb0\u0ccd\u0c97\u0cc1\u0c9c\u0cbe\u0ca6 \u0cae\u0cc1\u0c96\u0c9c \u0cad\u0cc2\u0cae\u0cbf\u0caf\u0cb2\u0ccd\u0cb2\u0cbf \u0cb9\u0cb0\u0cbf\u0caf\u0cc1\u0ca4\u0ccd\u0ca4\u0cb5\u0cc6.", + "Kannada_Male", + null, + "kn" + ], + [ + "the russian leader added that the united states and russia have a common interest in preventing nuclear proliferation, in north korea and elsewhere", + "English_Male", + null, + "en" + ], + [ + "the russian leader added that the united states and russia have a common interest in preventing nuclear proliferation, in north korea and elsewhere", + "English_Female", + null, + "en" + ], + [ + "\u0915\u093e\u0928\u0942\u0928 \u0915\u0947 \u0936\u093e\u0938\u0928 \u0915\u0947 \u0938\u0916\u094d\u0924 \u0905\u0928\u0941\u092a\u093e\u0932\u0928 \u0932\u094b\u0915\u0924\u0902\u0924\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0906\u0932\u094b\u091a\u0928\u093e \u092e\u0947\u0902 \u0938\u0947 \u090f\u0917\u094b \u0939\u0908", + "Magahi_Male", + null, + "mag" + ], + [ + "\u0915\u093e\u0928\u0942\u0928 \u0915\u0947 \u0936\u093e\u0938\u0928 \u0915\u0947 \u0938\u0916\u094d\u0924 \u0905\u0928\u0941\u092a\u093e\u0932\u0928 \u0932\u094b\u0915\u0924\u0902\u0924\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0906\u0932\u094b\u091a\u0928\u093e \u092e\u0947\u0902 \u0938\u0947 \u090f\u0917\u094b \u0939\u0908", + "Magahi_Female", + null, + "mag" + ], + [ + "\u092a\u093e\u0915\u0936\u093e\u0932\u093e \u0938\u0902\u092c\u0902\u0927\u0940 \u0915\u0932\u093e \u092e\u0947\u0902 \u092d\u094b\u091c\u0928 \u092a\u0915\u093e\u092c\u0948 \u0938\u093d \u0932\u092f \u0915\u093d \u0913\u0915\u0930 \u0915\u0941\u0936\u0932 \u092a\u094d\u0930\u092c\u0902\u0927\u0928 \u0914\u0930 \u092d\u093e\u0928\u0938 \u0918\u0930\u093d\u0915 \u0909\u091a\u093f\u0924 \u0930\u0916 \u0930\u0916\u093e\u0935 \u0938\u0947\u0939\u094b \u0936\u093e\u092e\u093f\u0932 \u091b\u0948\u0915", + "Maithili_Female", + null, + "mai" + ], + [ + "\u092a\u093e\u0915\u0936\u093e\u0932\u093e \u0938\u0902\u092c\u0902\u0927\u0940 \u0915\u0932\u093e \u092e\u0947\u0902 \u092d\u094b\u091c\u0928 \u092a\u0915\u093e\u092c\u0948 \u0938\u093d \u0932\u092f \u0915\u093d \u0913\u0915\u0930 \u0915\u0941\u0936\u0932 \u092a\u094d\u0930\u092c\u0902\u0927\u0928 \u0914\u0930 \u092d\u093e\u0928\u0938 \u0918\u0930\u093d\u0915 \u0909\u091a\u093f\u0924 \u0930\u0916 \u0930\u0916\u093e\u0935 \u0938\u0947\u0939\u094b \u0936\u093e\u092e\u093f\u0932 \u091b\u0948\u0915", + "Maithili_Male", + null, + "mai" + ], + [ + "\u0aa4\u0acd\u0aaf\u0abe\u0ab0\u0aac\u0abe\u0aa6 \u0aaa\u0abe\u0a9f\u0ac0\u0aa6\u0abe\u0ab0\u0acb \u0aa6\u0acd\u0ab5\u0abe\u0ab0\u0abe \u0a85\u0aa8\u0ac7\u0a95 \u0aa8\u0abe\u0aa8\u0abe\u0aae\u0acb\u0a9f\u0abe \u0a95\u0abe\u0ab0\u0acd\u0aaf\u0a95\u0acd\u0ab0\u0aae\u0acb \u0aaf\u0acb\u0a9c\u0ab5\u0abe\u0aae\u0abe\u0a82 \u0a86\u0ab5\u0aa4\u0abe \u0ab9\u0aa4\u0abe \u0a9c\u0acb\u0a95\u0ac7 \u0a8f\u0a95 \u0aaa\u0aa3 \u0a95\u0abe\u0ab0\u0acd\u0aaf\u0a95\u0acd\u0ab0\u0aae\u0aa8\u0ac7 \u0aa4\u0a82\u0aa4\u0acd\u0ab0 \u0aa6\u0acd\u0ab5\u0abe\u0ab0\u0abe \u0aae\u0a82\u0a9c\u0ac2\u0ab0\u0ac0 \u0a86\u0aaa\u0ab5\u0abe\u0aae\u0abe\u0a82 \u0a86\u0ab5\u0aa4\u0ac0 \u0aa8\u0ab9\u0acb\u0aa4", + "Gujarati_Female", + null, + "gu" + ], + [ + "\u0aa4\u0acd\u0aaf\u0abe\u0ab0\u0aac\u0abe\u0aa6 \u0aaa\u0abe\u0a9f\u0ac0\u0aa6\u0abe\u0ab0\u0acb \u0aa6\u0acd\u0ab5\u0abe\u0ab0\u0abe \u0a85\u0aa8\u0ac7\u0a95 \u0aa8\u0abe\u0aa8\u0abe\u0aae\u0acb\u0a9f\u0abe \u0a95\u0abe\u0ab0\u0acd\u0aaf\u0a95\u0acd\u0ab0\u0aae\u0acb \u0aaf\u0acb\u0a9c\u0ab5\u0abe\u0aae\u0abe\u0a82 \u0a86\u0ab5\u0aa4\u0abe \u0ab9\u0aa4\u0abe \u0a9c\u0acb\u0a95\u0ac7 \u0a8f\u0a95 \u0aaa\u0aa3 \u0a95\u0abe\u0ab0\u0acd\u0aaf\u0a95\u0acd\u0ab0\u0aae\u0aa8\u0ac7 \u0aa4\u0a82\u0aa4\u0acd\u0ab0 \u0aa6\u0acd\u0ab5\u0abe\u0ab0\u0abe \u0aae\u0a82\u0a9c\u0ac2\u0ab0\u0ac0 \u0a86\u0aaa\u0ab5\u0abe\u0aae\u0abe\u0a82 \u0a86\u0ab5\u0aa4\u0ac0 \u0aa8\u0ab9\u0acb\u0aa4", + "Gujarati_Male", + null, + "gu" + ], + [ + "\u090f\u0928\u094d\u091f\u094d\u0930\u093e\u092a\u0940 \u0915\u0902\u092a\u094d\u092f\u0942\u091f\u093f\u0902\u0917 \u092e\u0947\u0902 \u090f\u0928\u094d\u091f\u094d\u0930\u094b\u092a\u0940 \u090a \u0911\u092a\u0930\u0947\u091f\u093f\u0902\u0917 \u0938\u093f\u0938\u094d\u091f\u092e \u0939 \u091c\u0947 \u092a\u0947 \u0938\u0930\u093e \u0915\u094d\u0930\u093f\u092a\u094d\u091f\u094b\u0917\u094d\u0930\u093e\u092b\u093f\u0915 \u092b\u0902\u0915\u094d\u0936\u0928 \u0938\u092c \u0915\u093e\u092e \u0915\u0930\u0947 \u0932\u0947\u0902", + "Bhojpuri_Male", + null, + "bho" + ], + [ + "\u090f\u0928\u094d\u091f\u094d\u0930\u093e\u092a\u0940 \u0915\u0902\u092a\u094d\u092f\u0942\u091f\u093f\u0902\u0917 \u092e\u0947\u0902 \u090f\u0928\u094d\u091f\u094d\u0930\u094b\u092a\u0940 \u090a \u0911\u092a\u0930\u0947\u091f\u093f\u0902\u0917 \u0938\u093f\u0938\u094d\u091f\u092e \u0939 \u091c\u0947 \u092a\u0947 \u0938\u0930\u093e \u0915\u094d\u0930\u093f\u092a\u094d\u091f\u094b\u0917\u094d\u0930\u093e\u092b\u093f\u0915 \u092b\u0902\u0915\u094d\u0936\u0928 \u0938\u092c \u0915\u093e\u092e \u0915\u0930\u0947 \u0932\u0947\u0902", + "Bhojpuri_Female", + null, + "bho" + ] + ], + "eval_split_max_size": 256, + "eval_split_size": 0.01, + "use_speaker_weighted_sampler": false, + "speaker_weighted_sampler_alpha": 1.0, + "use_language_weighted_sampler": false, + "language_weighted_sampler_alpha": 1.0, + "use_length_weighted_sampler": false, + "length_weighted_sampler_alpha": 1.0, + "model_args": { + "num_chars": 444, + "out_channels": 513, + "spec_segment_size": 32, + "hidden_channels": 192, + "hidden_channels_ffn_text_encoder": 768, + "num_heads_text_encoder": 2, + "num_layers_text_encoder": 10, + "kernel_size_text_encoder": 3, + "dropout_p_text_encoder": 0.1, + "dropout_p_duration_predictor": 0.5, + "kernel_size_posterior_encoder": 5, + "dilation_rate_posterior_encoder": 1, + "num_layers_posterior_encoder": 16, + "kernel_size_flow": 5, + "dilation_rate_flow": 1, + "num_layers_flow": 4, + "resblock_type_decoder": "2", + "resblock_kernel_sizes_decoder": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes_decoder": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates_decoder": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel_decoder": 512, + "upsample_kernel_sizes_decoder": [ + 16, + 16, + 4, + 4 + ], + "periods_multi_period_discriminator": [ + 2, + 3, + 5, + 7, + 11 + ], + "use_sdp": true, + "noise_scale": 1.0, + "inference_noise_scale": 0.667, + "length_scale": 1.0, + "noise_scale_dp": 1.0, + "inference_noise_scale_dp": 1.0, + "max_inference_len": null, + "init_discriminator": true, + "use_spectral_norm_disriminator": false, + "use_speaker_embedding": false, + "num_speakers": 0, + "speakers_file": "/app/models/speakers.pth", + "d_vector_file": [ + "model_related/Bengali_Female/speakers.pth", + "model_related/Chhattisgarhi_Male/speakers.pth", + "model_related/Magahi_Male/speakers.pth", + "model_related/Marathi_Male/speakers.pth", + "model_related/Maithili_Female/speakers.pth", + "model_related/English_Male/speakers.pth", + "model_related/Chhattisgarhi_Female/speakers.pth", + "model_related/Kannada_Male/speakers.pth", + "model_related/Bhojpuri_Female/speakers.pth", + "model_related/Kannada_Female/speakers.pth", + "model_related/Maithili_Male/speakers.pth", + "model_related/Telugu_Male/speakers.pth", + "model_related/Telugu_Female/speakers.pth", + "model_related/Bengali_Male/speakers.pth", + "model_related/Bhojpuri_Male/speakers.pth", + "model_related/Gujarati_Female/speakers.pth", + "model_related/Marathi_Female/speakers.pth", + "model_related/Hindi_Female/speakers.pth", + "model_related/Gujarati_Male/speakers.pth", + "model_related/Hindi_Male/speakers.pth", + "model_related/Magahi_Female/speakers.pth", + "model_related/English_Female/speakers.pth" + ], + "speaker_embedding_channels": 256, + "use_d_vector_file": true, + "d_vector_dim": 512, + "detach_dp_input": true, + "use_language_embedding": true, + "embedded_language_dim": 4, + "num_languages": 11, + "language_ids_file": "/app/models/language_ids.json", + "use_speaker_encoder_as_loss": false, + "speaker_encoder_config_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json", + "speaker_encoder_model_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar", + "condition_dp_on_speaker": true, + "freeze_encoder": false, + "freeze_DP": false, + "freeze_PE": false, + "freeze_flow_decoder": false, + "freeze_waveform_decoder": false, + "encoder_sample_rate": null, + "interpolate_z": true, + "reinit_DP": false, + "reinit_text_encoder": false + }, + "lr_gen": 0.0002, + "lr_disc": 0.0002, + "lr_scheduler_gen": "ExponentialLR", + "lr_scheduler_gen_params": { + "gamma": 0.999875, + "last_epoch": -1 + }, + "lr_scheduler_disc": "ExponentialLR", + "lr_scheduler_disc_params": { + "gamma": 0.999875, + "last_epoch": -1 + }, + "kl_loss_alpha": 1.0, + "disc_loss_alpha": 1.0, + "gen_loss_alpha": 1.0, + "feat_loss_alpha": 1.0, + "mel_loss_alpha": 45.0, + "dur_loss_alpha": 1.0, + "speaker_encoder_loss_alpha": 9.0, + "return_wav": true, + "use_weighted_sampler": true, + "weighted_sampler_attrs": {}, + "weighted_sampler_multipliers": {}, + "r": 1, + "num_speakers": 0, + "use_speaker_embedding": false, + "speakers_file": "/app/models/speakers.pth", + "speaker_embedding_channels": 256, + "language_ids_file": "/app/models/language_ids.json", + "use_language_embedding": true, + "use_d_vector_file": true, + "d_vector_file": [ + "model_related/Bengali_Female/speakers.pth", + "model_related/Chhattisgarhi_Male/speakers.pth", + "model_related/Magahi_Male/speakers.pth", + "model_related/Marathi_Male/speakers.pth", + "model_related/Maithili_Female/speakers.pth", + "model_related/English_Male/speakers.pth", + "model_related/Chhattisgarhi_Female/speakers.pth", + "model_related/Kannada_Male/speakers.pth", + "model_related/Bhojpuri_Female/speakers.pth", + "model_related/Kannada_Female/speakers.pth", + "model_related/Maithili_Male/speakers.pth", + "model_related/Telugu_Male/speakers.pth", + "model_related/Telugu_Female/speakers.pth", + "model_related/Bengali_Male/speakers.pth", + "model_related/Bhojpuri_Male/speakers.pth", + "model_related/Gujarati_Female/speakers.pth", + "model_related/Marathi_Female/speakers.pth", + "model_related/Hindi_Female/speakers.pth", + "model_related/Gujarati_Male/speakers.pth", + "model_related/Hindi_Male/speakers.pth", + "model_related/Magahi_Female/speakers.pth", + "model_related/English_Female/speakers.pth" + ], + "d_vector_dim": 512 +} \ No newline at end of file diff --git a/models/language_ids.json b/models/language_ids.json new file mode 100644 index 0000000000000000000000000000000000000000..0508531c0f7c0eb580fddf43f74151156927f1af --- /dev/null +++ b/models/language_ids.json @@ -0,0 +1,13 @@ +{ + "bho": 0, + "bn": 1, + "en": 2, + "gu": 3, + "hi": 4, + "hne": 5, + "kn": 6, + "mag": 7, + "mai": 8, + "mr": 9, + "te": 10 +} \ No newline at end of file diff --git a/models/speakers.pth b/models/speakers.pth new file mode 100644 index 0000000000000000000000000000000000000000..20cde27ed33b492d39bc46e725dbb88f34d868d6 --- /dev/null +++ b/models/speakers.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d56ede16a2fa6a1575002ce54919618748a8280c7ae529ebaf2767505016128 +size 129 diff --git a/reference_audio/bengali_female.wav b/reference_audio/bengali_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..fd8886fe661d44ef2505a81b38c60ef146140254 --- /dev/null +++ b/reference_audio/bengali_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f84af0a054bc5ba740d34a1e66bef87351f44ab1f3d512cec8f810a99b7ab6 +size 297516 diff --git a/reference_audio/bengali_male.wav b/reference_audio/bengali_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..88b9bc02492000dec9533bc27e6a198eaec256ba --- /dev/null +++ b/reference_audio/bengali_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:930dc0c17d6af6fa1985918e6e0bf56f88905cab46c3ec186c11af4f1096cae1 +size 268332 diff --git a/reference_audio/bhojpuri_female.wav b/reference_audio/bhojpuri_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..eb4436718f092f3d4181e2375eb38d61991cec5e --- /dev/null +++ b/reference_audio/bhojpuri_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01dabe9e3e1b3dbbf41d84c7c4d6e9fe852ae14e585d4d5fe969669681963e90 +size 248108 diff --git a/reference_audio/bhojpuri_male.wav b/reference_audio/bhojpuri_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..6faa8d707da2b6071d73f031da3a9c7c1e21ae47 --- /dev/null +++ b/reference_audio/bhojpuri_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2aa4e3b4b29fb21c5327abbbf469afab8bc0d3844407d20142f0d00dd4a2a1 +size 291372 diff --git a/reference_audio/chhattisgarhi_female.wav b/reference_audio/chhattisgarhi_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..3865124ba47e6d56d9278a6803b9b06d1857a9e7 --- /dev/null +++ b/reference_audio/chhattisgarhi_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c3745dd76f07c36bcc4787e1fde6616bf206d79b5802e74743ea240cab8705 +size 271916 diff --git a/reference_audio/chhattisgarhi_male.wav b/reference_audio/chhattisgarhi_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..10ea72f2f040fc56f6762af3f74e90cc0124adba --- /dev/null +++ b/reference_audio/chhattisgarhi_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e701dcc6ac9438b40009e3f5ae5514c29724b5ef65fe4401358e8bddc25c7bf +size 259884 diff --git a/reference_audio/english_female.wav b/reference_audio/english_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..ab295ec9ec750f28359c68ec4df2eeaa6befc9ce --- /dev/null +++ b/reference_audio/english_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6aa5f72c8810df348f05b63561a94e65b3f56211b571680be0aa2379502a8e +size 291884 diff --git a/reference_audio/english_male.wav b/reference_audio/english_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..4307e6ab7fea7c1a1892dcfbc15a35552f858177 --- /dev/null +++ b/reference_audio/english_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293fa875e14b220dcfa2f634801a814c78a306205c398a4b5f06fd1291a8cf59 +size 114732 diff --git a/reference_audio/gujarati_female.wav b/reference_audio/gujarati_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..913a8888bd4e1c84393f70ec49e0d515a703862c --- /dev/null +++ b/reference_audio/gujarati_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3f3dcdb4b8a593c42bee2b85ac5c37fb2f182b91ec2dbef7efcef3af42b7c2 +size 1033260 diff --git a/reference_audio/gujarati_male.wav b/reference_audio/gujarati_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..bc759ff7c65d3ce227d5f1b643110af1059cef78 --- /dev/null +++ b/reference_audio/gujarati_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0de01b4153576afdb1b7de4e72a299737f44c8ce67f73882e0278afaec90bcb +size 206892 diff --git a/reference_audio/hindi_female.wav b/reference_audio/hindi_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..29eba840c59b4987b88d1281d7ba6d9b38726710 --- /dev/null +++ b/reference_audio/hindi_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b629d66b52a63c3c9aeafeb1b19abf701969ce3d40ad0fb3019cee3d5b6cd167 +size 252460 diff --git a/reference_audio/hindi_male.wav b/reference_audio/hindi_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..45d34423a4af31dd7afc9fae54b5aa85c381a673 --- /dev/null +++ b/reference_audio/hindi_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9907ea9933f7700a72a852109c9e958ef57555f3d49de80ac8b4e0fe35a0338 +size 215084 diff --git a/reference_audio/kannada_female.wav b/reference_audio/kannada_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..0dbfd42f9ab898f6a385d650238a868ed78c486c --- /dev/null +++ b/reference_audio/kannada_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a2c9c19255e246b78aa968c3c3dcc01942e42f0e4bb10cfe31265b4a0ab698 +size 638508 diff --git a/reference_audio/kannada_male.wav b/reference_audio/kannada_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..06ecacb2cebde00d14d6c069f8be85190b7d7841 --- /dev/null +++ b/reference_audio/kannada_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0fada49154dae650adb4fbea0c83f16222eecd83591d11af55e8d68de91fba0 +size 203308 diff --git a/reference_audio/magahi_female.wav b/reference_audio/magahi_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..f18a623b2ac3fb9cc69e1acb861ed47b60aef618 --- /dev/null +++ b/reference_audio/magahi_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1cbb361ed678f9e37d7f54d763014c98d2935d3e371e7a8e327684029af7cea +size 212524 diff --git a/reference_audio/magahi_male.wav b/reference_audio/magahi_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..f02eb90739eb2b1b8fb13158ce51d50372185205 --- /dev/null +++ b/reference_audio/magahi_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12d7a67d7379cb4c565e26b8c3b3b02e27c6c4df3577209684a732ac3c1ec1e7 +size 229932 diff --git a/reference_audio/maithili_female.wav b/reference_audio/maithili_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..103ab12fb2210e8ebe571eda2cc410c6502e7969 --- /dev/null +++ b/reference_audio/maithili_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4abbc686a785b50fe403cde53abc0993e0af0ccce899eb2bc5721807a20b08d0 +size 124460 diff --git a/reference_audio/maithili_male.wav b/reference_audio/maithili_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..be2e7d3b246b6c723c142aebba1c07eb51566dd0 --- /dev/null +++ b/reference_audio/maithili_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0de0229bb6358008f6cafc02019d00fd820240a5b8c1314e8b67985ccd3215d1 +size 198700 diff --git a/reference_audio/marathi_female.wav b/reference_audio/marathi_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..d5c968801bf1b51672a79e8439f03cdb7aa37d87 --- /dev/null +++ b/reference_audio/marathi_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4252210496fced3d6c744ec938b3bb235ba8d4ea7ead9174e883cf7b23d251b0 +size 401452 diff --git a/reference_audio/marathi_male.wav b/reference_audio/marathi_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..8737d184d6ece7eb8a799e084526cb0540cc4bb6 --- /dev/null +++ b/reference_audio/marathi_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2091c5bdeafdba6166db62a64f2468453bc9249d1a1076b693c90c456baf03b +size 437292 diff --git a/reference_audio/telugu_female.wav b/reference_audio/telugu_female.wav new file mode 100644 index 0000000000000000000000000000000000000000..556f345cdfbe406296d24ac2b15d2123457fa356 --- /dev/null +++ b/reference_audio/telugu_female.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a643e9a758077a33a7bd2540e6616f8a8a7b0301850307b94cf73808ac46f9 +size 464940 diff --git a/reference_audio/telugu_male.wav b/reference_audio/telugu_male.wav new file mode 100644 index 0000000000000000000000000000000000000000..e27618a99be3b86245e7067a402639a5893e964a --- /dev/null +++ b/reference_audio/telugu_male.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d3fbd86eb4adb51e7fe561a7d65b26f468c0b3158380042dadd1708e448ea7 +size 422956 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..1363737a1a47a6620a0750efd9a3a448095ad893 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +fastapi +uvicorn +python-multipart