Spaces:
Runtime error
Runtime error
File size: 2,800 Bytes
6c20eaa 38b530f 3ac0359 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 809a47f 38b530f 6c20eaa 38b530f 6c20eaa 809a47f 26107f3 809a47f 26107f3 809a47f 26107f3 6c20eaa 3ac0359 809a47f 3ac0359 809a47f 3ac0359 809a47f 38b530f 3ac0359 38b530f 6c20eaa 38b530f 6c20eaa 809a47f 3ac0359 809a47f 6c20eaa 3ac0359 38b530f 809a47f 38b530f 6c20eaa 38b530f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import gradio as gr
import tensorflow as tf
import numpy as np
import nltk
from nltk.corpus import cmudict
from scipy.io.wavfile import write
# Download required NLTK data
nltk.download('averaged_perceptron_tagger')
nltk.download('cmudict')
# Load your model
model = tf.keras.models.load_model('audio_model.h5')
# Preprocess input text
def preprocess_text(text):
"""
Process the input text to prepare it for the model.
"""
d = cmudict.dict()
words = text.lower().split()
phonemes = []
for word in words:
if word in d:
phonemes.append(d[word][0])
else:
# Use a placeholder for words not found in cmudict
phonemes.append(['UNKNOWN'])
# Flatten the list of phonemes
flattened_phonemes = [p for sublist in phonemes for p in sublist]
# Create dummy feature vectors (this should be replaced with actual feature extraction)
num_features = 13
sequence_length = len(flattened_phonemes)
input_data = np.random.rand(sequence_length, num_features) # Placeholder
# Add batch dimension
input_data = np.expand_dims(input_data, axis=0)
return input_data
# Convert model output to an audio file
def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
"""
Convert the model output into a .wav file.
"""
# Check if model_output is empty
if model_output is None or len(model_output) == 0:
raise ValueError("Model output is empty.")
# Normalize the audio output
normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
# Write the audio data to a file
write(filename, sample_rate, normalized_output)
return filename
# Generate sound effect
def generate_sfx(text):
"""
Takes input text, preprocesses it, runs it through the model,
and generates a downloadable audio file.
"""
input_data = preprocess_text(text)
# Generate prediction
prediction = model.predict(input_data)
# Ensure prediction shape is correct
if prediction.ndim == 2 and prediction.shape[1] > 1:
prediction = prediction.flatten() # Flatten if necessary
# Convert the prediction to an audio file
audio_file = convert_to_audio(prediction, filename="output.wav")
return audio_file
# Define the Gradio interface
interface = gr.Interface(
fn=generate_sfx,
inputs=gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
outputs=gr.Audio(label="Generated SFX", type="filepath"),
live=False,
title="SFX Generator from Text",
description="Enter a word or sentence, and the model will generate an SFX sound.",
)
# Run the interface
if __name__ == "__main__":
interface.launch()
|