Spaces:

szili2011
/

FNaF-Audio-Generation

Runtime error

App Files Files Community

szili2011 commited on Sep 24, 2024

Commit

809a47f

verified ·

1 Parent(s): 1798665

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -13

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import gradio as gr
 import tensorflow as tf
 import numpy as np
 import nltk
-import soundfile as sf
 from nltk.corpus import cmudict
 from scipy.io.wavfile import write
@@ -17,8 +16,6 @@ model = tf.keras.models.load_model('audio_model.h5')
 def preprocess_text(text):
     """
     Process the input text to prepare it for the model.
-    This could include tokenization, phoneme extraction, etc.
-    The model expects input of shape (batch_size, sequence_length, 13).
     """
     d = cmudict.dict()
     words = text.lower().split()
@@ -28,19 +25,19 @@ def preprocess_text(text):
         if word in d:
             phonemes.append(d[word][0])
         else:
-            # If word not found in cmudict, use a placeholder or skip
             phonemes.append(['UNKNOWN'])
     # Flatten the list of phonemes
     flattened_phonemes = [p for sublist in phonemes for p in sublist]
-    # Create dummy 13-feature vectors for each phoneme (you need to implement your own feature extraction)
     num_features = 13
     sequence_length = len(flattened_phonemes)
-    input_data = np.random.rand(sequence_length, num_features)  # Placeholder, replace with actual feature extraction
     # Add batch dimension
-    input_data = np.expand_dims(input_data, axis=0)  # Shape (1, sequence_length, 13)
     return input_data
@@ -48,17 +45,20 @@ def preprocess_text(text):
 def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
     """
     Convert the model output into a .wav file.
-    Model output is expected to be a numpy array.
     """
-    # Normalize the audio output (optional, based on your model)
     normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
-    # Write the audio data to a file (assuming model_output is a waveform)
     write(filename, sample_rate, normalized_output)
     return filename
-# Define function to generate sound effect
 def generate_sfx(text):
     """
     Takes input text, preprocesses it, runs it through the model,
@@ -69,8 +69,12 @@ def generate_sfx(text):
     # Generate prediction
     prediction = model.predict(input_data)
     # Convert the prediction to an audio file
-    audio_file = convert_to_audio(prediction.flatten(), filename="output.wav")
     return audio_file
@@ -78,7 +82,7 @@ def generate_sfx(text):
 interface = gr.Interface(
     fn=generate_sfx,
     inputs=gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
-    outputs=gr.Audio(label="Generated SFX", type="filepath"),  # Changed here
     live=False,
     title="SFX Generator from Text",
     description="Enter a word or sentence, and the model will generate an SFX sound.",

 import tensorflow as tf
 import numpy as np
 import nltk
 from nltk.corpus import cmudict
 from scipy.io.wavfile import write
 def preprocess_text(text):
     """
     Process the input text to prepare it for the model.
     """
     d = cmudict.dict()
     words = text.lower().split()
         if word in d:
             phonemes.append(d[word][0])
         else:
+            # Use a placeholder for words not found in cmudict
             phonemes.append(['UNKNOWN'])
     # Flatten the list of phonemes
     flattened_phonemes = [p for sublist in phonemes for p in sublist]
+    # Create dummy feature vectors (this should be replaced with actual feature extraction)
     num_features = 13
     sequence_length = len(flattened_phonemes)
+    input_data = np.random.rand(sequence_length, num_features)  # Placeholder
     # Add batch dimension
+    input_data = np.expand_dims(input_data, axis=0)
     return input_data
 def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
     """
     Convert the model output into a .wav file.
     """
+    # Check if model_output is empty
+    if model_output is None or len(model_output) == 0:
+        raise ValueError("Model output is empty.")
+    # Normalize the audio output
     normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
+    # Write the audio data to a file
     write(filename, sample_rate, normalized_output)
     return filename
+# Generate sound effect
 def generate_sfx(text):
     """
     Takes input text, preprocesses it, runs it through the model,
     # Generate prediction
     prediction = model.predict(input_data)
+    # Ensure prediction shape is correct
+    if prediction.ndim == 2 and prediction.shape[1] > 1:
+        prediction = prediction.flatten()  # Flatten if necessary
     # Convert the prediction to an audio file
+    audio_file = convert_to_audio(prediction, filename="output.wav")
     return audio_file
 interface = gr.Interface(
     fn=generate_sfx,
     inputs=gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
+    outputs=gr.Audio(label="Generated SFX", type="filepath"),
     live=False,
     title="SFX Generator from Text",
     description="Enter a word or sentence, and the model will generate an SFX sound.",