Spaces:

szili2011
/

FNaF-Audio-Generation

Runtime error

App Files Files Community

szili2011 commited on Sep 24, 2024

Commit

85d2702

verified ·

1 Parent(s): 09a58b6

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -23

app.py CHANGED Viewed

@@ -14,10 +14,6 @@ model = tf.keras.models.load_model('audio_model.h5')
 # Preprocess input text
 def preprocess_text(text):
-    """
-    Process the input text to prepare it for the model.
-    This includes tokenization and phoneme extraction.
-    """
     d = cmudict.dict()
     words = text.lower().split()
     phonemes = []
@@ -30,7 +26,7 @@ def preprocess_text(text):
     flattened_phonemes = [p for sublist in phonemes for p in sublist]
-    # Create dummy 13-feature vectors for each phoneme (you need to implement your own feature extraction)
     num_features = 13
     sequence_length = len(flattened_phonemes)
     input_data = np.random.rand(sequence_length, num_features)
@@ -42,32 +38,18 @@ def preprocess_text(text):
 # Convert model output to an audio file
 def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
-    """
-    Convert the model output into a .wav file.
-    """
-    # Normalize the audio output
     normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
-    # Write the audio data to a file
-    write(filename, sample_rate, normalized_output.astype(np.float32))  # Ensure the output is of type float32
     return filename
 # Define function to generate sound effect
 def generate_sfx(text, duration=30):
-    """
-    Takes input text, preprocesses it, runs it through the model,
-    and generates a downloadable audio file for the specified duration.
-    """
     input_data = preprocess_text(text)
-    # Generate prediction
     prediction = model.predict(input_data)
-    # Generate a longer output by repeating or padding
-    audio_data = np.tile(prediction.flatten(), (duration * 22050 // len(prediction.flatten()) + 1))[:duration * 22050]
-    # Convert the prediction to an audio file
     audio_file = convert_to_audio(audio_data, filename="output.wav")
     return audio_file
@@ -77,7 +59,7 @@ interface = gr.Interface(
     fn=generate_sfx,
     inputs=[
         gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
-        gr.Slider(minimum=2, maximum=20, default=30, label="Duration (seconds)")
     ],
     outputs=gr.Audio(label="Generated SFX", type="filepath"),
     title="SFX Generator from Text",

 # Preprocess input text
 def preprocess_text(text):
     d = cmudict.dict()
     words = text.lower().split()
     phonemes = []
     flattened_phonemes = [p for sublist in phonemes for p in sublist]
+    # Create dummy 13-feature vectors for each phoneme (implement your own feature extraction)
     num_features = 13
     sequence_length = len(flattened_phonemes)
     input_data = np.random.rand(sequence_length, num_features)
 # Convert model output to an audio file
 def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
     normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
+    write(filename, sample_rate, normalized_output.astype(np.float32))
     return filename
 # Define function to generate sound effect
 def generate_sfx(text, duration=30):
     input_data = preprocess_text(text)
     prediction = model.predict(input_data)
+    # Generate longer output by repeating or padding
+    audio_data = np.tile(prediction.flatten(), (duration * sample_rate // len(prediction.flatten()) + 1))[:duration * sample_rate]
     audio_file = convert_to_audio(audio_data, filename="output.wav")
     return audio_file
     fn=generate_sfx,
     inputs=[
         gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
+        gr.Slider(minimum=2, maximum=20, label="Duration (seconds)", value=30)
     ],
     outputs=gr.Audio(label="Generated SFX", type="filepath"),
     title="SFX Generator from Text",