Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import soundfile as sf
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from g2p_en import G2p
|
| 5 |
+
from tensorflow.keras.models import load_model
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
# Load the pre-trained model from the local directory
|
| 9 |
+
model_path = './model.h5'
|
| 10 |
+
model = load_model(model_path)
|
| 11 |
+
|
| 12 |
+
# Initialize the g2p model for converting text to phonemes
|
| 13 |
+
g2p = G2p()
|
| 14 |
+
|
| 15 |
+
def text_to_phonemes(text):
|
| 16 |
+
"""Convert input text to phonemes."""
|
| 17 |
+
phonemes = g2p(text)
|
| 18 |
+
return phonemes
|
| 19 |
+
|
| 20 |
+
def generate_audio_features(phonemes, duration):
|
| 21 |
+
"""Generate audio features from phonemes using the trained model."""
|
| 22 |
+
# Convert phonemes into a format suitable for the model input
|
| 23 |
+
# This is an example; modify it to match your model's requirements
|
| 24 |
+
phoneme_features = np.array([ord(p) for p in phonemes]) # Convert phonemes to numerical features
|
| 25 |
+
phoneme_features = np.expand_dims(phoneme_features, axis=0) # Reshape for model input
|
| 26 |
+
|
| 27 |
+
# Generate audio features using the model
|
| 28 |
+
audio_features = model.predict(phoneme_features)
|
| 29 |
+
|
| 30 |
+
# Adjust the length of the features based on the selected duration
|
| 31 |
+
num_samples = int(duration * 22050) # Example calculation assuming 22050 samples per second
|
| 32 |
+
audio_features = np.resize(audio_features, (num_samples,))
|
| 33 |
+
|
| 34 |
+
return audio_features
|
| 35 |
+
|
| 36 |
+
def features_to_audio(features):
|
| 37 |
+
"""Convert generated features back to audio."""
|
| 38 |
+
# Normalize the audio to a suitable range (-1 to 1)
|
| 39 |
+
audio = np.interp(features, (features.min(), features.max()), (-1, 1))
|
| 40 |
+
|
| 41 |
+
return audio
|
| 42 |
+
|
| 43 |
+
def generate_audio(text, duration):
|
| 44 |
+
"""Main function to handle text-to-audio conversion."""
|
| 45 |
+
# Step 1: Convert text to phonemes
|
| 46 |
+
phonemes = text_to_phonemes(text)
|
| 47 |
+
|
| 48 |
+
# Step 2: Generate audio features using the pre-trained model and duration
|
| 49 |
+
audio_features = generate_audio_features(phonemes, duration)
|
| 50 |
+
|
| 51 |
+
# Step 3: Convert features to actual audio
|
| 52 |
+
audio_data = features_to_audio(audio_features)
|
| 53 |
+
|
| 54 |
+
# Step 4: Save the generated audio
|
| 55 |
+
audio_file = 'generated_audio.wav'
|
| 56 |
+
sample_rate = 22050 # Use the sample rate for audio generation
|
| 57 |
+
sf.write(audio_file, audio_data, sample_rate)
|
| 58 |
+
|
| 59 |
+
return audio_file
|
| 60 |
+
|
| 61 |
+
# Gradio interface
|
| 62 |
+
def text_to_audio_interface(text, duration):
|
| 63 |
+
"""Gradio interface function to generate and return audio."""
|
| 64 |
+
# Call the generate_audio function with the text and selected duration
|
| 65 |
+
audio_file = generate_audio(text, duration)
|
| 66 |
+
|
| 67 |
+
# Return the path to the generated audio file
|
| 68 |
+
return audio_file
|
| 69 |
+
|
| 70 |
+
# Create the Gradio interface with a note, labeled button, and a slider for duration
|
| 71 |
+
gr.Interface(
|
| 72 |
+
fn=text_to_audio_interface,
|
| 73 |
+
inputs=[
|
| 74 |
+
gr.inputs.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into Sfx Sound"),
|
| 75 |
+
gr.inputs.Slider(minimum=1, maximum=20, default=5, step=1, label="Audio Duration (seconds)")
|
| 76 |
+
],
|
| 77 |
+
outputs=gr.outputs.Audio(label="Generated Audio Preview"),
|
| 78 |
+
title="Text-to-Audio Generator",
|
| 79 |
+
description="Write a Word, set the duration, and press 'Generate' to convert the word into an audio effect!",
|
| 80 |
+
live=True
|
| 81 |
+
).launch()
|