Spaces:

szili2011
/

FNaF-Audio-Generation

Runtime error

App Files Files Community

FNaF-Audio-Generation / app.py

szili2011

Update app.py

13a6289 verified 5 months ago

raw

history blame contribute delete

2.65 kB

	import gradio as gr
	import tensorflow as tf
	import numpy as np
	import nltk
	from nltk.corpus import cmudict
	from scipy.io.wavfile import write

	# Define sample_rate as a global constant
	SAMPLE_RATE = 22050

	# Download required NLTK data
	nltk.download('averaged_perceptron_tagger', quiet=True)
	nltk.download('cmudict', quiet=True)

	# Load your model from the root directory
	model = tf.keras.models.load_model('audio_model.h5', compile=False)

	# Preprocess input text
	def preprocess_text(text):
	d = cmudict.dict()
	words = text.lower().split()
	phonemes = []

	for word in words:
	if word in d:
	phonemes.append(d[word][0])
	else:
	phonemes.append(['UNKNOWN'])

	flattened_phonemes = [p for sublist in phonemes for p in sublist]

	num_features = 13
	sequence_length = len(flattened_phonemes)
	if sequence_length == 0:
	return np.zeros((1, 1, num_features))

	input_data = np.random.rand(sequence_length, num_features)
	input_data = np.expand_dims(input_data, axis=0)

	return input_data

	# Convert model output to an audio file
	def convert_to_audio(model_output, filename="output.wav"):
	if model_output.size == 0:
	return None
	normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
	write(filename, SAMPLE_RATE, normalized_output.astype(np.float32))
	return filename

	# Define function to generate sound effect
	def generate_sfx(text, duration):
	input_data = preprocess_text(text)

	if input_data.shape[1] == 0:
	return None

	prediction = model.predict(input_data)
	flat_prediction = prediction.flatten()

	if len(flat_prediction) == 0:
	return None

	num_repeats = (duration * SAMPLE_RATE // len(flat_prediction)) + 1
	audio_data = np.tile(flat_prediction, num_repeats)[:duration * SAMPLE_RATE]

	audio_file = convert_to_audio(audio_data, filename="output.wav")

	return audio_file

	# Define the Gradio interface
	interface = gr.Interface(
	fn=generate_sfx,
	inputs=[
	gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
	gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Duration (seconds)")
	],
	outputs=gr.Audio(label="Generated SFX", type="filepath"),
	title="SFX Generator from Text",
	description="Enter a word or sentence, and the model will generate an SFX sound.",
	)

	# Run the interface
	if __name__ == "__main__":
	tf.config.set_visible_devices([], 'GPU')
	# The ValueError shows that share=True IS required for your environment.
	interface.launch()