Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import tensorflow as tf | |
| import numpy as np | |
| import nltk | |
| from nltk.corpus import cmudict | |
| # Download required NLTK data | |
| nltk.download('averaged_perceptron_tagger') | |
| nltk.download('cmudict') | |
| # Load your model | |
| model = tf.keras.models.load_model('audio_model.h5') | |
| # Preprocess input text | |
| def preprocess_text(text): | |
| """ | |
| Process the input text to prepare it for the model. | |
| This could include tokenization, phoneme extraction, etc. | |
| """ | |
| d = cmudict.dict() | |
| words = text.lower().split() | |
| phonemes = [] | |
| for word in words: | |
| if word in d: | |
| phonemes.append(d[word][0]) | |
| else: | |
| # If word not found in cmudict, use a placeholder or skip | |
| phonemes.append(['UNKNOWN']) | |
| # Flatten the list of phonemes | |
| flattened_phonemes = [p for sublist in phonemes for p in sublist] | |
| # Convert phonemes to numeric format for the model (customize this based on your model's input requirements) | |
| numeric_input = np.array([hash(p) % 1000 for p in flattened_phonemes]) | |
| return numeric_input | |
| # Define function to generate sound | |
| def generate_sfx(text): | |
| """ | |
| Takes input text, preprocesses it, runs it through the model, | |
| and generates an SFX sound. | |
| """ | |
| input_data = preprocess_text(text) | |
| # Add batch dimension | |
| input_data = np.expand_dims(input_data, axis=0) | |
| # Generate prediction | |
| prediction = model.predict(input_data) | |
| # Postprocess the output to generate a sound file or data | |
| # Customize based on how your model outputs audio (e.g., generating a WAV file) | |
| # For now, let's return the prediction array as a placeholder | |
| return prediction | |
| # Define the Gradio interface | |
| interface = gr.Interface( | |
| fn=generate_sfx, | |
| inputs=gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"), | |
| outputs="numpy", # Assuming the model output is numerical, you can change this to audio or any other type as needed. | |
| live=False, | |
| title="SFX Generator from Text", | |
| description="Enter a word or sentence, and the model will generate an SFX sound.", | |
| ) | |
| # Run the interface | |
| if __name__ == "__main__": | |
| interface.launch() | |