Spaces:

szili2011
/

FNaF-Audio-Generation

Runtime error

App Files Files Community

szili2011 commited on Sep 24, 2024

Commit

543c357

verified ·

1 Parent(s): c3f5e81

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -27

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ import nltk
 from nltk.corpus import cmudict
 from scipy.io.wavfile import write
 # Download required NLTK data
 nltk.download('averaged_perceptron_tagger')
 nltk.download('cmudict')
@@ -52,7 +55,6 @@ def convert_to_audio(model_output, sample_rate=22050):
     # Normalize the audio output
     normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
-    # Return normalized output for further processing
     return normalized_output
 # Generate sound effect with specified duration
@@ -63,34 +65,21 @@ def generate_sfx(text, duration=30):
     """
     input_data = preprocess_text(text)
-    # Initialize an empty list to hold audio segments
-    audio_segments = []
-    total_samples = duration * 22050  # Calculate total samples for 30 seconds
-    generated_samples = 0
-    while generated_samples < total_samples:
-        # Generate prediction
-        prediction = model.predict(input_data)
-        # Ensure prediction shape is correct
-        if prediction.ndim == 2 and prediction.shape[1] > 1:
-            prediction = prediction.flatten()  # Flatten if necessary
-        # Convert the prediction to audio data
-        audio_segment = convert_to_audio(prediction)
-        # Append the generated segment to the list
-        audio_segments.append(audio_segment)
-        # Increment the total samples generated
-        generated_samples += len(audio_segment)
-    # Concatenate all segments to form the final audio output
-    final_audio = np.concatenate(audio_segments)[:total_samples]  # Ensure we cut to the correct length
-    # Write the audio data to a file
     output_filename = "output.wav"
-    write(output_filename, 22050, final_audio)
     return output_filename
@@ -99,7 +88,7 @@ interface = gr.Interface(
     fn=generate_sfx,
     inputs=[
         gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
-        gr.Slider(label="Duration (seconds)", minimum=1, maximum=60, value=30)  # Added duration slider
     ],
     outputs=gr.Audio(label="Generated SFX", type="filepath"),
     live=False,

 from nltk.corpus import cmudict
 from scipy.io.wavfile import write
+# Ensure TensorFlow uses CPU only
+tf.config.set_visible_devices([], 'GPU')
 # Download required NLTK data
 nltk.download('averaged_perceptron_tagger')
 nltk.download('cmudict')
     # Normalize the audio output
     normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
     return normalized_output
 # Generate sound effect with specified duration
     """
     input_data = preprocess_text(text)
+    # Calculate total samples for the specified duration
+    total_samples = duration * 22050  # Samples for 30 seconds
+    # Generate audio samples
+    generated_samples = model.predict(input_data)
+    # Check the length of generated samples and ensure it meets the required duration
+    if len(generated_samples) < total_samples:
+        raise ValueError(f"Generated audio is shorter than {duration} seconds.")
+    # Convert the prediction to audio data
+    audio_data = convert_to_audio(generated_samples)
+    # Write the audio data to a file, limiting to the specified duration
     output_filename = "output.wav"
+    write(output_filename, 22050, audio_data[:total_samples])  # Limit to total_samples
     return output_filename
     fn=generate_sfx,
     inputs=[
         gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
+        gr.Slider(label="Duration (seconds)", minimum=30, maximum=120, value=30)  # Set duration options
     ],
     outputs=gr.Audio(label="Generated SFX", type="filepath"),
     live=False,