import numpy as np import soundfile as sf import gradio as gr from g2p_en import G2p from tensorflow.keras.models import load_model import os # Load the pre-trained model from the local directory model_path = './model.h5' model = load_model(model_path) # Initialize the g2p model for converting text to phonemes g2p = G2p() def text_to_phonemes(text): """Convert input text to phonemes.""" phonemes = g2p(text) return phonemes def generate_audio_features(phonemes, duration): """Generate audio features from phonemes using the trained model.""" # Convert phonemes into a format suitable for the model input # This is an example; modify it to match your model's requirements phoneme_features = np.array([ord(p) for p in phonemes]) # Convert phonemes to numerical features phoneme_features = np.expand_dims(phoneme_features, axis=0) # Reshape for model input # Generate audio features using the model audio_features = model.predict(phoneme_features) # Adjust the length of the features based on the selected duration num_samples = int(duration * 22050) # Example calculation assuming 22050 samples per second audio_features = np.resize(audio_features, (num_samples,)) return audio_features def features_to_audio(features): """Convert generated features back to audio.""" # Normalize the audio to a suitable range (-1 to 1) audio = np.interp(features, (features.min(), features.max()), (-1, 1)) return audio def generate_audio(text, duration): """Main function to handle text-to-audio conversion.""" # Step 1: Convert text to phonemes phonemes = text_to_phonemes(text) # Step 2: Generate audio features using the pre-trained model and duration audio_features = generate_audio_features(phonemes, duration) # Step 3: Convert features to actual audio audio_data = features_to_audio(audio_features) # Step 4: Save the generated audio audio_file = 'generated_audio.wav' sample_rate = 22050 # Use the sample rate for audio generation sf.write(audio_file, audio_data, sample_rate) return audio_file # Gradio interface def text_to_audio_interface(text, duration): """Gradio interface function to generate and return audio.""" # Call the generate_audio function with the text and selected duration audio_file = generate_audio(text, duration) # Return the path to the generated audio file return audio_file # Create the Gradio interface with a note, labeled button, and a slider for duration gr.Interface( fn=text_to_audio_interface, inputs=[ gr.inputs.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into Sfx Sound"), gr.inputs.Slider(minimum=1, maximum=20, default=5, step=1, label="Audio Duration (seconds)") ], outputs=gr.outputs.Audio(label="Generated Audio Preview"), title="Text-to-Audio Generator", description="Write a Word, set the duration, and press 'Generate' to convert the word into an audio effect!", live=True ).launch()