aishitdharwal commited on
Commit
cd3586a
·
1 Parent(s): f8ab240
Files changed (2) hide show
  1. app.py +19 -5
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,17 +1,31 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import numpy as np
4
 
5
- # Initialize the model
 
6
  pipe = pipeline(model="suno/bark-small")
7
 
8
  def text_to_speech(text):
 
 
 
 
 
 
 
 
9
  # Generate audio from text
10
- output = pipe(text)
 
 
 
11
 
12
- # Normalize audio to prevent clipping
13
  audio = output["audio"]
 
14
  audio = audio / np.max(np.abs(audio)) # Normalize to [-1, 1]
 
15
 
16
  return (output["sampling_rate"], audio)
17
 
@@ -22,7 +36,7 @@ demo = gr.Interface(
22
  label="Text to speak",
23
  placeholder="Enter the text you want to convert to speech...",
24
  ),
25
- outputs=gr.Audio(label="Generated Speech", type="numpy"), # Specify numpy type
26
  title="Text to Speech with Bark-small",
27
  description="Convert text to speech using the Suno Bark-small model",
28
  examples=[
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoProcessor
3
  import numpy as np
4
 
5
+ # Initialize the model and processor
6
+ processor = AutoProcessor.from_pretrained("suno/bark-small")
7
  pipe = pipeline(model="suno/bark-small")
8
 
9
  def text_to_speech(text):
10
+ # Prepare the input with proper attention mask
11
+ inputs = processor(
12
+ text,
13
+ return_tensors="pt",
14
+ padding=True,
15
+ return_attention_mask=True
16
+ )
17
+
18
  # Generate audio from text
19
+ output = pipe(
20
+ text,
21
+ attention_mask=inputs.attention_mask
22
+ )
23
 
24
+ # Normalize and scale audio to int16 range
25
  audio = output["audio"]
26
+ audio = np.float32(audio) # Ensure float32 type
27
  audio = audio / np.max(np.abs(audio)) # Normalize to [-1, 1]
28
+ audio = (audio * 32767).astype(np.int16) # Convert to int16 range
29
 
30
  return (output["sampling_rate"], audio)
31
 
 
36
  label="Text to speak",
37
  placeholder="Enter the text you want to convert to speech...",
38
  ),
39
+ outputs=gr.Audio(label="Generated Speech"),
40
  title="Text to Speech with Bark-small",
41
  description="Convert text to speech using the Suno Bark-small model",
42
  examples=[
requirements.txt CHANGED
@@ -2,4 +2,5 @@ gradio
2
  transformers
3
  torch
4
  accelerate
5
- numpy
 
 
2
  transformers
3
  torch
4
  accelerate
5
+ numpy
6
+ scipy