Spaces:

szili2011
/

FNaF-Audio-Generation

Runtime error

App Files Files Community

szili2011 commited on Sep 24, 2024

Commit

3ffb926

verified ·

1 Parent(s): 2b6bee1

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -84

app.py CHANGED Viewed

@@ -1,104 +1,57 @@
 import gradio as gr
 import tensorflow as tf
-import numpy as np
 import nltk
 from nltk.corpus import cmudict
-from scipy.io.wavfile import write
 # Ensure TensorFlow uses CPU only
-tf.config.set_visible_devices([], 'GPU')
-# Download required NLTK data
-nltk.download('averaged_perceptron_tagger')
-nltk.download('cmudict')
-# Load your model
-model = tf.keras.models.load_model('audio_model.h5')
-# Preprocess input text
-def preprocess_text(text):
-    """
-    Process the input text to prepare it for the model.
-    """
-    d = cmudict.dict()
-    words = text.lower().split()
-    phonemes = []
-    for word in words:
-        if word in d:
-            phonemes.append(d[word][0])
-        else:
-            phonemes.append(['UNKNOWN'])
-    # Flatten the list of phonemes
-    flattened_phonemes = [p for sublist in phonemes for p in sublist]
-    # Create dummy feature vectors (this should be replaced with actual feature extraction)
-    num_features = 13
-    sequence_length = len(flattened_phonemes)
-    input_data = np.random.rand(sequence_length, num_features)  # Placeholder
-    # Add batch dimension
-    input_data = np.expand_dims(input_data, axis=0)
-    return input_data
-# Convert model output to an audio file
-def convert_to_audio(model_output, sample_rate=22050):
-    """
-    Convert the model output into a .wav file.
-    """
-    # Check if model_output is empty
-    if model_output is None or len(model_output) == 0:
-        raise ValueError("Model output is empty.")
-    # Normalize the audio output
-    normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
-    return normalized_output
-# Generate sound effect with specified duration
-def generate_sfx(text, duration=30):
-    """
-    Takes input text, preprocesses it, runs it through the model,
-    and generates a downloadable audio file for a specified duration.
-    """
-    input_data = preprocess_text(text)
-    # Calculate total samples for the specified duration
-    total_samples = duration * 22050  # Samples for 30 seconds
-    # Generate audio samples
-    generated_samples = model.predict(input_data)
-    # Ensure generated samples meet the required duration
-    if generated_samples.shape[1] < total_samples:
-        # Pad with zeros if not enough audio is generated
-        padding = np.zeros((1, total_samples - generated_samples.shape[1]))
-        generated_samples = np.concatenate([generated_samples, padding], axis=1)
-    # Convert the prediction to audio data
-    audio_data = convert_to_audio(generated_samples)
-    # Write the audio data to a file, limiting to the specified duration
     output_filename = "output.wav"
-    write(output_filename, 22050, audio_data[:total_samples])  # Limit to total_samples
     return output_filename
-# Define the Gradio interface with updated slider settings
-interface = gr.Interface(
-    fn=generate_sfx,
-    inputs=[
-        gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
-        gr.Slider(label="Duration (seconds)", minimum=2, maximum=20, value=5)  # Set duration options
-    ],
-    outputs=gr.Audio(label="Generated SFX", type="filepath"),
-    live=False,
-    title="SFX Generator from Text",
-    description="Enter a word or sentence, and the model will generate an SFX sound for the specified duration.",
-)
-# Run the interface
 if __name__ == "__main__":
-    interface.launch()

+import os
+import numpy as np
 import gradio as gr
+from scipy.io.wavfile import write
 import tensorflow as tf
 import nltk
 from nltk.corpus import cmudict
+# Download CMU dictionary if not already downloaded
+nltk.download('cmudict', quiet=True)
 # Ensure TensorFlow uses CPU only
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # Disable GPU
+# Load CMU dictionary for pronunciation
+cmu_dict = cmudict.dict()
+# Load your pre-trained model (adjust the model loading according to your implementation)
+# For example, if your model is a Keras model, you would use:
+# model = tf.keras.models.load_model('path_to_your_model')
+# Replace this with your actual model loading code
+# model = ...
+def generate_audio(text, duration):
+    sample_rate = 22050  # Sample rate in Hz
+    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
+    # Placeholder: Generate a simple sine wave audio signal
+    frequency = 440  # Frequency in Hz (A4 note)
+    audio_data = 0.5 * np.sin(2 * np.pi * frequency * t)  # Generate sine wave
+    return audio_data
+def generate_sfx(duration):
+    text = "Sample text for audio generation"  # Replace with actual input text if needed
+    audio_data = generate_audio(text, duration)
+    audio_data = (audio_data * 32767).astype(np.int16)  # Scale to 16-bit PCM
+    total_samples = duration * 22050  # Adjust based on sample rate
+    if len(audio_data) < total_samples:
+        raise ValueError(f"Generated audio is shorter than {duration} seconds.")
     output_filename = "output.wav"
+    write(output_filename, 22050, audio_data[:total_samples])  # Write to WAV file
     return output_filename
+duration_slider = gr.Slider(minimum=2, maximum=20, label="Duration (seconds)", value=10)
+app = gr.Interface(fn=generate_sfx,
+                   inputs=duration_slider,
+                   outputs="audio",
+                   title="Sound Effect Generator",
+                   description="Generate sound effects for a specified duration.")
 if __name__ == "__main__":
+    app.launch()