Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import soundfile as sf | |
| import gradio as gr | |
| from g2p_en import G2p | |
| from tensorflow.keras.models import load_model | |
| import os | |
| # Load the pre-trained model from the local directory | |
| model_path = './model.h5' | |
| model = load_model(model_path) | |
| # Initialize the g2p model for converting text to phonemes | |
| g2p = G2p() | |
| def text_to_phonemes(text): | |
| """Convert input text to phonemes.""" | |
| phonemes = g2p(text) | |
| return phonemes | |
| def generate_audio_features(phonemes, duration): | |
| """Generate audio features from phonemes using the trained model.""" | |
| # Convert phonemes into a format suitable for the model input | |
| # This is an example; modify it to match your model's requirements | |
| phoneme_features = np.array([ord(p) for p in phonemes]) # Convert phonemes to numerical features | |
| phoneme_features = np.expand_dims(phoneme_features, axis=0) # Reshape for model input | |
| # Generate audio features using the model | |
| audio_features = model.predict(phoneme_features) | |
| # Adjust the length of the features based on the selected duration | |
| num_samples = int(duration * 22050) # Example calculation assuming 22050 samples per second | |
| audio_features = np.resize(audio_features, (num_samples,)) | |
| return audio_features | |
| def features_to_audio(features): | |
| """Convert generated features back to audio.""" | |
| # Normalize the audio to a suitable range (-1 to 1) | |
| audio = np.interp(features, (features.min(), features.max()), (-1, 1)) | |
| return audio | |
| def generate_audio(text, duration): | |
| """Main function to handle text-to-audio conversion.""" | |
| # Step 1: Convert text to phonemes | |
| phonemes = text_to_phonemes(text) | |
| # Step 2: Generate audio features using the pre-trained model and duration | |
| audio_features = generate_audio_features(phonemes, duration) | |
| # Step 3: Convert features to actual audio | |
| audio_data = features_to_audio(audio_features) | |
| # Step 4: Save the generated audio | |
| audio_file = 'generated_audio.wav' | |
| sample_rate = 22050 # Use the sample rate for audio generation | |
| sf.write(audio_file, audio_data, sample_rate) | |
| return audio_file | |
| # Gradio interface | |
| def text_to_audio_interface(text, duration): | |
| """Gradio interface function to generate and return audio.""" | |
| # Call the generate_audio function with the text and selected duration | |
| audio_file = generate_audio(text, duration) | |
| # Return the path to the generated audio file | |
| return audio_file | |
| # Create the Gradio interface with a note, labeled button, and a slider for duration | |
| gr.Interface( | |
| fn=text_to_audio_interface, | |
| inputs=[ | |
| gr.inputs.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into Sfx Sound"), | |
| gr.inputs.Slider(minimum=1, maximum=20, default=5, step=1, label="Audio Duration (seconds)") | |
| ], | |
| outputs=gr.outputs.Audio(label="Generated Audio Preview"), | |
| title="Text-to-Audio Generator", | |
| description="Write a Word, set the duration, and press 'Generate' to convert the word into an audio effect!", | |
| live=True | |
| ).launch() | |