sathvikt commited on
Commit
e8bdfd6
·
verified ·
1 Parent(s): 0f7ed95

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +82 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import torch
4
+ import soundfile as sf
5
+ import tempfile
6
+ from parler_tts import ParlerTTSForConditionalGeneration
7
+ from transformers import AutoTokenizer
8
+ import os
9
+ from huggingface_hub import login
10
+ login(token=os.getenv("HF_TOKEN"))
11
+
12
+ MODEL_NAME = "ai4bharat/indic-parler-tts"
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+
15
+ print("🚀 Using device:", device)
16
+ print("⏳ Loading Kannada TTS model...")
17
+
18
+ model = ParlerTTSForConditionalGeneration.from_pretrained(
19
+ MODEL_NAME
20
+ ).to(device)
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
+
24
+ description_tokenizer = AutoTokenizer.from_pretrained(
25
+ model.config.text_encoder._name_or_path
26
+ )
27
+
28
+ print("✅ Model loaded successfully")
29
+
30
+
31
+ # =========================================================
32
+ # TTS FUNCTION
33
+ # =========================================================
34
+ def generate_kannada_tts(prompt_text):
35
+ prompt_text = str(prompt_text).strip()
36
+
37
+ if not prompt_text:
38
+ return None
39
+
40
+ description = (
41
+ "A calm Kannada male speaker with natural pronunciation, "
42
+ "clear studio quality audio, smooth narration, "
43
+ "and no background noise."
44
+ )
45
+
46
+ description_inputs = description_tokenizer(
47
+ description,
48
+ return_tensors="pt"
49
+ ).to(device)
50
+
51
+ prompt_inputs = tokenizer(
52
+ prompt_text,
53
+ return_tensors="pt"
54
+ ).to(device)
55
+
56
+ with torch.no_grad():
57
+ generation = model.generate(
58
+ input_ids=description_inputs.input_ids,
59
+ prompt_input_ids=prompt_inputs.input_ids
60
+ )
61
+
62
+ audio = generation.cpu().numpy().squeeze()
63
+
64
+ temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
65
+ sf.write(temp_wav.name, audio, model.config.sampling_rate)
66
+
67
+ return temp_wav.name
68
+
69
+
70
+
71
+ demo = gr.Interface(
72
+ fn=generate_kannada_tts,
73
+ inputs=gr.Textbox(
74
+ label="Enter Kannada Text",
75
+ placeholder="ನಮಸ್ಕಾರ, ನನ್ನ ಹೆಸರು ಅಥ್ಮಿಕ"
76
+ ),
77
+ outputs=gr.Audio(label="Generated Kannada Speech"),
78
+ title="Kannada Text To Speech using AI4Bharat",
79
+ description="Deep Learning based Kannada TTS model for project presentation"
80
+ )
81
+
82
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ git+https://github.com/huggingface/parler-tts.git
3
+ soundfile
4
+ transformers
5
+ accelerate
6
+ sentencepiece
7
+ torch