sathvikt commited on
Commit
02c124a
·
verified ·
1 Parent(s): e8bdfd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -51
app.py CHANGED
@@ -1,82 +1,80 @@
1
-
2
  import gradio as gr
3
  import torch
4
  import soundfile as sf
5
  import tempfile
6
- from parler_tts import ParlerTTSForConditionalGeneration
7
- from transformers import AutoTokenizer
8
- import os
9
- from huggingface_hub import login
10
- login(token=os.getenv("HF_TOKEN"))
11
 
12
- MODEL_NAME = "ai4bharat/indic-parler-tts"
 
 
 
 
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  print("🚀 Using device:", device)
16
- print("⏳ Loading Kannada TTS model...")
17
 
18
- model = ParlerTTSForConditionalGeneration.from_pretrained(
19
- MODEL_NAME
20
- ).to(device)
21
-
22
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
-
24
- description_tokenizer = AutoTokenizer.from_pretrained(
25
- model.config.text_encoder._name_or_path
26
  )
27
 
28
- print("✅ Model loaded successfully")
29
 
30
 
31
  # =========================================================
32
  # TTS FUNCTION
33
  # =========================================================
34
- def generate_kannada_tts(prompt_text):
35
- prompt_text = str(prompt_text).strip()
 
36
 
37
- if not prompt_text:
38
  return None
39
 
40
- description = (
41
- "A calm Kannada male speaker with natural pronunciation, "
42
- "clear studio quality audio, smooth narration, "
43
- "and no background noise."
44
  )
45
 
46
- description_inputs = description_tokenizer(
47
- description,
48
- return_tensors="pt"
49
- ).to(device)
50
-
51
- prompt_inputs = tokenizer(
52
- prompt_text,
53
- return_tensors="pt"
54
- ).to(device)
55
-
56
- with torch.no_grad():
57
- generation = model.generate(
58
- input_ids=description_inputs.input_ids,
59
- prompt_input_ids=prompt_inputs.input_ids
60
- )
61
 
62
- audio = generation.cpu().numpy().squeeze()
 
 
 
 
63
 
64
- temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
65
- sf.write(temp_wav.name, audio, model.config.sampling_rate)
66
 
67
  return temp_wav.name
68
 
69
 
70
-
 
 
71
  demo = gr.Interface(
72
- fn=generate_kannada_tts,
73
- inputs=gr.Textbox(
74
- label="Enter Kannada Text",
75
- placeholder="ನಮಸ್ಕಾರ, ನನ್ನ ಹೆಸರು ಅಥ್ಮಿಕ"
76
- ),
 
 
 
 
 
 
 
 
 
 
77
  outputs=gr.Audio(label="Generated Kannada Speech"),
78
- title="Kannada Text To Speech using AI4Bharat",
79
- description="Deep Learning based Kannada TTS model for project presentation"
80
  )
81
 
82
- demo.launch()
 
1
+ import os
2
  import gradio as gr
3
  import torch
4
  import soundfile as sf
5
  import tempfile
6
+ from transformers import AutoModel
 
 
 
 
7
 
8
+ # =========================================================
9
+ # CONFIG
10
+ # =========================================================
11
+ MODEL_NAME = "ai4bharat/IndicF5"
12
+ HF_TOKEN = os.getenv("HF_TOKEN")
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  print("🚀 Using device:", device)
16
+ print("⏳ Loading IndicF5 model...")
17
 
18
+ model = AutoModel.from_pretrained(
19
+ MODEL_NAME,
20
+ trust_remote_code=True,
21
+ token=HF_TOKEN
 
 
 
 
22
  )
23
 
24
+ print("✅ IndicF5 model loaded")
25
 
26
 
27
  # =========================================================
28
  # TTS FUNCTION
29
  # =========================================================
30
+ def generate_indicf5_tts(text, ref_audio, ref_text):
31
+ if not text.strip():
32
+ return None
33
 
34
+ if ref_audio is None:
35
  return None
36
 
37
+ audio = model(
38
+ text,
39
+ ref_audio_path=ref_audio,
40
+ ref_text=ref_text
41
  )
42
 
43
+ temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ # normalize if int16
46
+ import numpy as np
47
+ audio = np.array(audio)
48
+ if audio.dtype == np.int16:
49
+ audio = audio.astype(np.float32) / 32768.0
50
 
51
+ sf.write(temp_wav.name, audio, 24000)
 
52
 
53
  return temp_wav.name
54
 
55
 
56
+ # =========================================================
57
+ # UI
58
+ # =========================================================
59
  demo = gr.Interface(
60
+ fn=generate_indicf5_tts,
61
+ inputs=[
62
+ gr.Textbox(
63
+ label="Text to Synthesize (Kannada)",
64
+ placeholder="ನಮಸ್ಕಾರ, ಇದು ನನ್ನ ಕನ್ನಡ TTS ಪ್ರಾಜೆಕ್ಟ್"
65
+ ),
66
+ gr.Audio(
67
+ type="filepath",
68
+ label="Reference Prompt Audio"
69
+ ),
70
+ gr.Textbox(
71
+ label="Reference Audio Transcript",
72
+ placeholder="Reference audio spoken text"
73
+ )
74
+ ],
75
  outputs=gr.Audio(label="Generated Kannada Speech"),
76
+ title="IndicF5 Kannada Text To Speech",
77
+ description="Near-human Kannada TTS using AI4Bharat IndicF5"
78
  )
79
 
80
+ demo.launch()