Update app.py
Browse files
app.py
CHANGED
|
@@ -144,34 +144,28 @@ EXAMPLES = [
|
|
| 144 |
"audio_name": "KAN_F (Happy)",
|
| 145 |
"audio_url": "https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/KAN_F_HAPPY_00001.wav",
|
| 146 |
"ref_text": "ನಮ್ ಫ್ರಿಜ್ಜಲ್ಲಿ ಕೂಲಿಂಗ್ ಸಮಸ್ಯೆ ಆಗಿ ನಾನ್ ಭಾಳ ದಿನದಿಂದ ಒದ್ದಾಡ್ತಿದ್ದೆ, ಆದ್ರೆ ಅದ್ನೀಗ ಮೆಕಾನಿಕ್ ಆಗಿರೋ ನಿಮ್ ಸಹಾಯ್ದಿಂದ ಬಗೆಹರಿಸ್ಕೋಬೋದು ಅಂತಾಗಿ ನಿರಾಳ ಆಯ್ತು ನಂಗೆ.",
|
| 147 |
-
"synth_text": "ଦାକ୍ତର ଔଷଧ ଲେଖିଦେଲେ ମଧ୍ୟ। ଆପଣ କିଣିଦେଲେ ମଧ୍ୟ,ଘରକୁ ବି ଆଣିଦେଲେ।
|
| 148 |
},
|
| 149 |
]
|
| 150 |
|
| 151 |
-
#
|
| 152 |
-
for
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
# build list – never contains None
|
| 160 |
-
examples = []
|
| 161 |
-
for ex in EXAMPLES:
|
| 162 |
-
if ex.get("audio_data") is not None:
|
| 163 |
-
examples.append([ex["synth_text"], (ex["sample_rate"], ex["audio_data"]), ex["ref_text"]])
|
| 164 |
-
|
| 165 |
-
# ---------- GRADIO UI ----------
|
| 166 |
with gr.Blocks() as iface:
|
| 167 |
gr.Markdown(
|
| 168 |
"""
|
| 169 |
-
# **IndicF5: High-Quality Text-to-Speech for Indian Languages
|
| 170 |
[](https://huggingface.co/ai4bharat/IndicF5)
|
| 171 |
We release **IndicF5**, a **near-human polyglot** **Text-to-Speech (TTS)** model trained on **1417 hours** of high-quality speech from **[Rasa](https://huggingface.co/datasets/ai4bharat/Rasa), [IndicTTS](https://www.iitm.ac.in/donlab/indictts/database), [LIMMITS](https://sites.google.com/view/limmits24/), and [IndicVoices-R](https://huggingface.co/datasets/ai4bharat/indicvoices_r)**.
|
| 172 |
IndicF5 supports **11 Indian languages**:
|
| 173 |
**Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu.**
|
| 174 |
-
|
|
|
|
| 175 |
"""
|
| 176 |
)
|
| 177 |
|
|
@@ -185,10 +179,18 @@ with gr.Blocks() as iface:
|
|
| 185 |
with gr.Column():
|
| 186 |
output_audio = gr.Audio(label="Generated Speech", type="numpy")
|
| 187 |
|
| 188 |
-
#
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
submit_btn.click(synthesize_speech, inputs=[text_input, ref_audio_input, ref_text_input], outputs=[output_audio])
|
| 193 |
|
|
|
|
| 194 |
iface.launch(share=True)
|
|
|
|
| 144 |
"audio_name": "KAN_F (Happy)",
|
| 145 |
"audio_url": "https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/KAN_F_HAPPY_00001.wav",
|
| 146 |
"ref_text": "ನಮ್ ಫ್ರಿಜ್ಜಲ್ಲಿ ಕೂಲಿಂಗ್ ಸಮಸ್ಯೆ ಆಗಿ ನಾನ್ ಭಾಳ ದಿನದಿಂದ ಒದ್ದಾಡ್ತಿದ್ದೆ, ಆದ್ರೆ ಅದ್ನೀಗ ಮೆಕಾನಿಕ್ ಆಗಿರೋ ನಿಮ್ ಸಹಾಯ್ದಿಂದ ಬಗೆಹರಿಸ್ಕೋಬೋದು ಅಂತಾಗಿ ನಿರಾಳ ಆಯ್ತು ನಂಗೆ.",
|
| 147 |
+
"synth_text": "ଦାକ୍ତର ଔଷଧ ଲେଖିଦେଲେ ମଧ୍ୟ। ଆପଣ କିଣିଦେଲେ ମଧ୍ୟ,ଘରକୁ ବି ଆଣିଦେଲେ।"
|
| 148 |
},
|
| 149 |
]
|
| 150 |
|
| 151 |
+
# Preload all example audios
|
| 152 |
+
for example in EXAMPLES:
|
| 153 |
+
sample_rate, audio_data = load_audio_from_url(example["audio_url"])
|
| 154 |
+
example["sample_rate"] = sample_rate
|
| 155 |
+
example["audio_data"] = audio_data
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
# Define Gradio interface with layout adjustments
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
with gr.Blocks() as iface:
|
| 160 |
gr.Markdown(
|
| 161 |
"""
|
| 162 |
+
# **IndicF5: High-Quality Text-to-Speech for Indian Languages**
|
| 163 |
[](https://huggingface.co/ai4bharat/IndicF5)
|
| 164 |
We release **IndicF5**, a **near-human polyglot** **Text-to-Speech (TTS)** model trained on **1417 hours** of high-quality speech from **[Rasa](https://huggingface.co/datasets/ai4bharat/Rasa), [IndicTTS](https://www.iitm.ac.in/donlab/indictts/database), [LIMMITS](https://sites.google.com/view/limmits24/), and [IndicVoices-R](https://huggingface.co/datasets/ai4bharat/indicvoices_r)**.
|
| 165 |
IndicF5 supports **11 Indian languages**:
|
| 166 |
**Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu.**
|
| 167 |
+
|
| 168 |
+
Generate speech using a reference prompt audio and its corresponding text.
|
| 169 |
"""
|
| 170 |
)
|
| 171 |
|
|
|
|
| 179 |
with gr.Column():
|
| 180 |
output_audio = gr.Audio(label="Generated Speech", type="numpy")
|
| 181 |
|
| 182 |
+
# Add multiple examples
|
| 183 |
+
examples = [
|
| 184 |
+
[ex["synth_text"], (ex["sample_rate"], ex["audio_data"]), ex["ref_text"]] for ex in EXAMPLES
|
| 185 |
+
]
|
| 186 |
+
|
| 187 |
+
gr.Examples(
|
| 188 |
+
examples=examples,
|
| 189 |
+
inputs=[text_input, ref_audio_input, ref_text_input],
|
| 190 |
+
label="Choose an example:"
|
| 191 |
+
)
|
| 192 |
|
| 193 |
submit_btn.click(synthesize_speech, inputs=[text_input, ref_audio_input, ref_text_input], outputs=[output_audio])
|
| 194 |
|
| 195 |
+
|
| 196 |
iface.launch(share=True)
|