szili2011 commited on
Commit
6c20eaa
·
verified ·
1 Parent(s): 8b27d4a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import soundfile as sf
3
+ import gradio as gr
4
+ from g2p_en import G2p
5
+ from tensorflow.keras.models import load_model
6
+ import os
7
+
8
+ # Load the pre-trained model from the local directory
9
+ model_path = './model.h5'
10
+ model = load_model(model_path)
11
+
12
+ # Initialize the g2p model for converting text to phonemes
13
+ g2p = G2p()
14
+
15
+ def text_to_phonemes(text):
16
+ """Convert input text to phonemes."""
17
+ phonemes = g2p(text)
18
+ return phonemes
19
+
20
+ def generate_audio_features(phonemes, duration):
21
+ """Generate audio features from phonemes using the trained model."""
22
+ # Convert phonemes into a format suitable for the model input
23
+ # This is an example; modify it to match your model's requirements
24
+ phoneme_features = np.array([ord(p) for p in phonemes]) # Convert phonemes to numerical features
25
+ phoneme_features = np.expand_dims(phoneme_features, axis=0) # Reshape for model input
26
+
27
+ # Generate audio features using the model
28
+ audio_features = model.predict(phoneme_features)
29
+
30
+ # Adjust the length of the features based on the selected duration
31
+ num_samples = int(duration * 22050) # Example calculation assuming 22050 samples per second
32
+ audio_features = np.resize(audio_features, (num_samples,))
33
+
34
+ return audio_features
35
+
36
+ def features_to_audio(features):
37
+ """Convert generated features back to audio."""
38
+ # Normalize the audio to a suitable range (-1 to 1)
39
+ audio = np.interp(features, (features.min(), features.max()), (-1, 1))
40
+
41
+ return audio
42
+
43
+ def generate_audio(text, duration):
44
+ """Main function to handle text-to-audio conversion."""
45
+ # Step 1: Convert text to phonemes
46
+ phonemes = text_to_phonemes(text)
47
+
48
+ # Step 2: Generate audio features using the pre-trained model and duration
49
+ audio_features = generate_audio_features(phonemes, duration)
50
+
51
+ # Step 3: Convert features to actual audio
52
+ audio_data = features_to_audio(audio_features)
53
+
54
+ # Step 4: Save the generated audio
55
+ audio_file = 'generated_audio.wav'
56
+ sample_rate = 22050 # Use the sample rate for audio generation
57
+ sf.write(audio_file, audio_data, sample_rate)
58
+
59
+ return audio_file
60
+
61
+ # Gradio interface
62
+ def text_to_audio_interface(text, duration):
63
+ """Gradio interface function to generate and return audio."""
64
+ # Call the generate_audio function with the text and selected duration
65
+ audio_file = generate_audio(text, duration)
66
+
67
+ # Return the path to the generated audio file
68
+ return audio_file
69
+
70
+ # Create the Gradio interface with a note, labeled button, and a slider for duration
71
+ gr.Interface(
72
+ fn=text_to_audio_interface,
73
+ inputs=[
74
+ gr.inputs.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into Sfx Sound"),
75
+ gr.inputs.Slider(minimum=1, maximum=20, default=5, step=1, label="Audio Duration (seconds)")
76
+ ],
77
+ outputs=gr.outputs.Audio(label="Generated Audio Preview"),
78
+ title="Text-to-Audio Generator",
79
+ description="Write a Word, set the duration, and press 'Generate' to convert the word into an audio effect!",
80
+ live=True
81
+ ).launch()