Spaces:
Runtime error
Runtime error
File size: 2,204 Bytes
6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f 6c20eaa 38b530f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
import tensorflow as tf
import numpy as np
import nltk
from nltk.corpus import cmudict
# Download required NLTK data
nltk.download('averaged_perceptron_tagger')
nltk.download('cmudict')
# Load your model
model = tf.keras.models.load_model('audio_model.h5')
# Preprocess input text
def preprocess_text(text):
"""
Process the input text to prepare it for the model.
This could include tokenization, phoneme extraction, etc.
"""
d = cmudict.dict()
words = text.lower().split()
phonemes = []
for word in words:
if word in d:
phonemes.append(d[word][0])
else:
# If word not found in cmudict, use a placeholder or skip
phonemes.append(['UNKNOWN'])
# Flatten the list of phonemes
flattened_phonemes = [p for sublist in phonemes for p in sublist]
# Convert phonemes to numeric format for the model (customize this based on your model's input requirements)
numeric_input = np.array([hash(p) % 1000 for p in flattened_phonemes])
return numeric_input
# Define function to generate sound
def generate_sfx(text):
"""
Takes input text, preprocesses it, runs it through the model,
and generates an SFX sound.
"""
input_data = preprocess_text(text)
# Add batch dimension
input_data = np.expand_dims(input_data, axis=0)
# Generate prediction
prediction = model.predict(input_data)
# Postprocess the output to generate a sound file or data
# Customize based on how your model outputs audio (e.g., generating a WAV file)
# For now, let's return the prediction array as a placeholder
return prediction
# Define the Gradio interface
interface = gr.Interface(
fn=generate_sfx,
inputs=gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
outputs="numpy", # Assuming the model output is numerical, you can change this to audio or any other type as needed.
live=False,
title="SFX Generator from Text",
description="Enter a word or sentence, and the model will generate an SFX sound.",
)
# Run the interface
if __name__ == "__main__":
interface.launch()
|