Spaces:

musdfakoc
/

local_intelligence

Sleeping

App Files Files Community

local_intelligence / app.py

musdfakoc

Update app.py

7ce9286 verified about 1 year ago

raw

history blame contribute delete

5.69 kB

	import gradio as gr
	from keras.models import load_model
	from tensorflow.keras.utils import img_to_array
	from numpy import expand_dims
	from PIL import Image
	import librosa
	import numpy as np
	import soundfile as sf
	import os
	import random
	import tempfile
	import matplotlib.pyplot as plt
	import time # To generate unique filenames

	# Load your Pix2Pix model (make sure the path is correct)
	model = load_model('./model_022600.h5', compile=False)

	# Function to shift frequencies
	def shift_frequencies(spectrogram, shift):
	return np.roll(spectrogram, shift, axis=0)

	# Function to apply a frequency filter
	def apply_filter(spectrogram, low_cut, high_cut):
	filtered = np.copy(spectrogram)
	filtered[:low_cut, :] = 0 # Attenuate low frequencies
	filtered[high_cut:, :] = 0 # Attenuate high frequencies
	return filtered

	# Function to add harmonics
	def add_harmonics(spectrogram, harmonic_shift):
	harmonics = np.roll(spectrogram, harmonic_shift, axis=0) * 0.5 # Weaken the harmonics
	return np.clip(spectrogram + harmonics, 0, 1)

	# Function to modulate the amplitude
	def modulate_amplitude(spectrogram, factor):
	return np.clip(spectrogram * factor, 0, 1) # Amplify or attenuate the white areas

	# Function to randomly apply transformations
	def modify_spectrogram(spectrogram):
	apply_shift = random.choice([True, False])
	apply_filtering = random.choice([True, False])
	apply_harmonics = random.choice([True, False])
	apply_amplitude_modulation = random.choice([True, False])

	if apply_shift:
	shift_value = random.randint(-15, 15)
	spectrogram = shift_frequencies(spectrogram, shift=shift_value)

	if apply_filtering:
	low_cut = random.randint(10, 50)
	high_cut = random.randint(300, 600)
	spectrogram = apply_filter(spectrogram, low_cut=low_cut, high_cut=high_cut)

	if apply_harmonics:
	harmonic_shift = random.randint(2, 10)
	spectrogram = add_harmonics(spectrogram, harmonic_shift=harmonic_shift)

	if apply_amplitude_modulation:
	factor = random.uniform(0.8, 2.0)
	spectrogram = modulate_amplitude(spectrogram, factor=factor)

	return spectrogram

	# Save the modified spectrogram image for display
	def save_spectrogram_image(spectrogram, name):
	plt.figure(figsize=(10, 4))
	plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
	plt.axis('off')

	# Save the spectrogram image using the unique name
	temp_image_path = f"{name}_spectrogram.png"
	plt.savefig(temp_image_path, bbox_inches='tight', pad_inches=0)

	plt.close()
	return temp_image_path

	# Save the uploaded image with the same timestamp
	def save_uploaded_image(input_image, name):
	# Save the uploaded image with the same unique timestamp name
	uploaded_image_path = f"{name}_uploaded_image.png"
	input_image.save(uploaded_image_path)
	return uploaded_image_path

	# Process the input image and convert to audio
	def process_image(input_image):
	# Generate a unique name based on the current time
	image_name = f"image_{int(time.time())}"

	def load_image(image, size=(256, 256)):
	image = image.resize(size)
	pixels = img_to_array(image)
	pixels = (pixels - 127.5) / 127.5
	pixels = expand_dims(pixels, 0)
	return pixels

	# Save the uploaded image with the unique timestamp name
	uploaded_image_path = save_uploaded_image(input_image, image_name)

	# Preprocess the input
	src_image = load_image(input_image)

	# Generate output using the Pix2Pix model
	gen_image = model.predict(src_image)
	gen_image = (gen_image + 1) / 2.0 # scale to [0, 1]

	# Resize the generated image to original spectrogram size
	orig_size = (1293, 512)
	gen_image_resized = Image.fromarray((gen_image[0] * 255).astype('uint8')).resize(orig_size).convert('F')

	# Convert the image to a numpy array (spectrogram)
	img = np.array(gen_image_resized)

	# Modify the spectrogram randomly
	img = modify_spectrogram(img)

	# Save the modified spectrogram as an image, using the unique name
	spectrogram_image_path = save_spectrogram_image(img, image_name)

	# Convert the spectrogram back to audio using librosa
	wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)

	# Save the audio file, using the unique name
	audio_file_path = f"{image_name}_generated_audio.wav"
	sf.write(audio_file_path, wav, samplerate=44100)

	return uploaded_image_path, spectrogram_image_path, audio_file_path # Return paths for uploaded image, spectrogram, and audio

	# Gradio Interface
	def gradio_process_image(input_image):
	uploaded_image_path, spectrogram_image_path, audio_file_path = process_image(input_image)

	# After Gradio finishes using these files, delete them to avoid keeping them around
	def cleanup():
	os.remove(uploaded_image_path)
	os.remove(spectrogram_image_path)
	os.remove(audio_file_path)
	print(f"Deleted temp files: {uploaded_image_path}, {spectrogram_image_path}, {audio_file_path}")

	return uploaded_image_path, spectrogram_image_path, audio_file_path, cleanup

	# Create the Gradio interface
	interface = gr.Interface(
	fn=gradio_process_image,
	inputs=gr.Image(type="pil"), # Input is an image
	outputs=[gr.File(label="Uploaded Image"), gr.Image(type="filepath"), gr.Audio(type="filepath")], # Output uploaded image, spectrogram, and audio file
	title="Image to Audio Generator with Spectrogram Display",
	description="Upload an image, and get an audio file generated using Pix2Pix.",
	)

	# Launch the interface
	interface.launch()