Spaces:

aishitdharwal
/

tta

Sleeping

aishitdharwal commited on Feb 16, 2025

Commit

cd3586a

1 Parent(s): f8ab240

add app

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,17 +1,31 @@
 import gradio as gr
-from transformers import pipeline
 import numpy as np
-# Initialize the model
 pipe = pipeline(model="suno/bark-small")
 def text_to_speech(text):
     # Generate audio from text
-    output = pipe(text)
-    # Normalize audio to prevent clipping
     audio = output["audio"]
     audio = audio / np.max(np.abs(audio))  # Normalize to [-1, 1]
     return (output["sampling_rate"], audio)
@@ -22,7 +36,7 @@ demo = gr.Interface(
         label="Text to speak",
         placeholder="Enter the text you want to convert to speech...",
     ),
-    outputs=gr.Audio(label="Generated Speech", type="numpy"),  # Specify numpy type
     title="Text to Speech with Bark-small",
     description="Convert text to speech using the Suno Bark-small model",
     examples=[

 import gradio as gr
+from transformers import pipeline, AutoProcessor
 import numpy as np
+# Initialize the model and processor
+processor = AutoProcessor.from_pretrained("suno/bark-small")
 pipe = pipeline(model="suno/bark-small")
 def text_to_speech(text):
+    # Prepare the input with proper attention mask
+    inputs = processor(
+        text,
+        return_tensors="pt",
+        padding=True,
+        return_attention_mask=True
+    )
     # Generate audio from text
+    output = pipe(
+        text,
+        attention_mask=inputs.attention_mask
+    )
+    # Normalize and scale audio to int16 range
     audio = output["audio"]
+    audio = np.float32(audio)  # Ensure float32 type
     audio = audio / np.max(np.abs(audio))  # Normalize to [-1, 1]
+    audio = (audio * 32767).astype(np.int16)  # Convert to int16 range
     return (output["sampling_rate"], audio)
         label="Text to speak",
         placeholder="Enter the text you want to convert to speech...",
     ),
+    outputs=gr.Audio(label="Generated Speech"),
     title="Text to Speech with Bark-small",
     description="Convert text to speech using the Suno Bark-small model",
     examples=[

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ gradio
 transformers
 torch
 accelerate
-numpy

 transformers
 torch
 accelerate
+numpy
+scipy