Spaces:

szili2011
/

FNaF-Audio-Generation

Runtime error

File size: 2,204 Bytes

6c20eaa
38b530f
 
 
 
6c20eaa
38b530f
 
 
6c20eaa
38b530f
 
6c20eaa
38b530f
 
 
 
 
 
 
 
 
6c20eaa
38b530f
 
 
 
 
 
6c20eaa
38b530f
 
 
 
6c20eaa
38b530f
6c20eaa
38b530f
 
 
 
 
 
 
6c20eaa
38b530f
 
6c20eaa
38b530f
 
6c20eaa
38b530f
 
6c20eaa
38b530f
 
 
 
 
 
 
 
 
 
 
 
6c20eaa
38b530f

import gradio as gr
import tensorflow as tf
import numpy as np
import nltk
from nltk.corpus import cmudict

# Download required NLTK data
nltk.download('averaged_perceptron_tagger')
nltk.download('cmudict')

# Load your model
model = tf.keras.models.load_model('audio_model.h5')

# Preprocess input text
def preprocess_text(text):
    """
    Process the input text to prepare it for the model.
    This could include tokenization, phoneme extraction, etc.
    """
    d = cmudict.dict()
    words = text.lower().split()
    phonemes = []

    for word in words:
        if word in d:
            phonemes.append(d[word][0])
        else:
            # If word not found in cmudict, use a placeholder or skip
            phonemes.append(['UNKNOWN'])
    
    # Flatten the list of phonemes
    flattened_phonemes = [p for sublist in phonemes for p in sublist]
    # Convert phonemes to numeric format for the model (customize this based on your model's input requirements)
    numeric_input = np.array([hash(p) % 1000 for p in flattened_phonemes])
    
    return numeric_input

# Define function to generate sound
def generate_sfx(text):
    """
    Takes input text, preprocesses it, runs it through the model,
    and generates an SFX sound.
    """
    input_data = preprocess_text(text)
    
    # Add batch dimension
    input_data = np.expand_dims(input_data, axis=0)
    
    # Generate prediction
    prediction = model.predict(input_data)

    # Postprocess the output to generate a sound file or data
    # Customize based on how your model outputs audio (e.g., generating a WAV file)
    
    # For now, let's return the prediction array as a placeholder
    return prediction

# Define the Gradio interface
interface = gr.Interface(
    fn=generate_sfx,
    inputs=gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
    outputs="numpy",  # Assuming the model output is numerical, you can change this to audio or any other type as needed.
    live=False,
    title="SFX Generator from Text",
    description="Enter a word or sentence, and the model will generate an SFX sound.",
)

# Run the interface
if __name__ == "__main__":
    interface.launch()