Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from keras.models import load_model | |
| from tensorflow.keras.utils import img_to_array | |
| from numpy import expand_dims | |
| from PIL import Image | |
| import librosa | |
| import numpy as np | |
| import soundfile as sf | |
| import os | |
| import random | |
| import tempfile | |
| import matplotlib.pyplot as plt | |
| import time # To generate unique filenames | |
| # Load your Pix2Pix model (make sure the path is correct) | |
| model = load_model('./model_022600.h5', compile=False) | |
| # Function to shift frequencies | |
| def shift_frequencies(spectrogram, shift): | |
| return np.roll(spectrogram, shift, axis=0) | |
| # Function to apply a frequency filter | |
| def apply_filter(spectrogram, low_cut, high_cut): | |
| filtered = np.copy(spectrogram) | |
| filtered[:low_cut, :] = 0 # Attenuate low frequencies | |
| filtered[high_cut:, :] = 0 # Attenuate high frequencies | |
| return filtered | |
| # Function to add harmonics | |
| def add_harmonics(spectrogram, harmonic_shift): | |
| harmonics = np.roll(spectrogram, harmonic_shift, axis=0) * 0.5 # Weaken the harmonics | |
| return np.clip(spectrogram + harmonics, 0, 1) | |
| # Function to modulate the amplitude | |
| def modulate_amplitude(spectrogram, factor): | |
| return np.clip(spectrogram * factor, 0, 1) # Amplify or attenuate the white areas | |
| # Function to randomly apply transformations | |
| def modify_spectrogram(spectrogram): | |
| apply_shift = random.choice([True, False]) | |
| apply_filtering = random.choice([True, False]) | |
| apply_harmonics = random.choice([True, False]) | |
| apply_amplitude_modulation = random.choice([True, False]) | |
| if apply_shift: | |
| shift_value = random.randint(-15, 15) | |
| spectrogram = shift_frequencies(spectrogram, shift=shift_value) | |
| if apply_filtering: | |
| low_cut = random.randint(10, 50) | |
| high_cut = random.randint(300, 600) | |
| spectrogram = apply_filter(spectrogram, low_cut=low_cut, high_cut=high_cut) | |
| if apply_harmonics: | |
| harmonic_shift = random.randint(2, 10) | |
| spectrogram = add_harmonics(spectrogram, harmonic_shift=harmonic_shift) | |
| if apply_amplitude_modulation: | |
| factor = random.uniform(0.8, 2.0) | |
| spectrogram = modulate_amplitude(spectrogram, factor=factor) | |
| return spectrogram | |
| # Save the modified spectrogram image for display | |
| def save_spectrogram_image(spectrogram, name): | |
| plt.figure(figsize=(10, 4)) | |
| plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray') | |
| plt.axis('off') | |
| # Save the spectrogram image using the unique name | |
| temp_image_path = f"{name}_spectrogram.png" | |
| plt.savefig(temp_image_path, bbox_inches='tight', pad_inches=0) | |
| plt.close() | |
| return temp_image_path | |
| # Save the uploaded image with the same timestamp | |
| def save_uploaded_image(input_image, name): | |
| # Save the uploaded image with the same unique timestamp name | |
| uploaded_image_path = f"{name}_uploaded_image.png" | |
| input_image.save(uploaded_image_path) | |
| return uploaded_image_path | |
| # Process the input image and convert to audio | |
| def process_image(input_image): | |
| # Generate a unique name based on the current time | |
| image_name = f"image_{int(time.time())}" | |
| def load_image(image, size=(256, 256)): | |
| image = image.resize(size) | |
| pixels = img_to_array(image) | |
| pixels = (pixels - 127.5) / 127.5 | |
| pixels = expand_dims(pixels, 0) | |
| return pixels | |
| # Save the uploaded image with the unique timestamp name | |
| uploaded_image_path = save_uploaded_image(input_image, image_name) | |
| # Preprocess the input | |
| src_image = load_image(input_image) | |
| # Generate output using the Pix2Pix model | |
| gen_image = model.predict(src_image) | |
| gen_image = (gen_image + 1) / 2.0 # scale to [0, 1] | |
| # Resize the generated image to original spectrogram size | |
| orig_size = (1293, 512) | |
| gen_image_resized = Image.fromarray((gen_image[0] * 255).astype('uint8')).resize(orig_size).convert('F') | |
| # Convert the image to a numpy array (spectrogram) | |
| img = np.array(gen_image_resized) | |
| # Modify the spectrogram randomly | |
| img = modify_spectrogram(img) | |
| # Save the modified spectrogram as an image, using the unique name | |
| spectrogram_image_path = save_spectrogram_image(img, image_name) | |
| # Convert the spectrogram back to audio using librosa | |
| wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512) | |
| # Save the audio file, using the unique name | |
| audio_file_path = f"{image_name}_generated_audio.wav" | |
| sf.write(audio_file_path, wav, samplerate=44100) | |
| return uploaded_image_path, spectrogram_image_path, audio_file_path # Return paths for uploaded image, spectrogram, and audio | |
| # Gradio Interface | |
| def gradio_process_image(input_image): | |
| uploaded_image_path, spectrogram_image_path, audio_file_path = process_image(input_image) | |
| # After Gradio finishes using these files, delete them to avoid keeping them around | |
| def cleanup(): | |
| os.remove(uploaded_image_path) | |
| os.remove(spectrogram_image_path) | |
| os.remove(audio_file_path) | |
| print(f"Deleted temp files: {uploaded_image_path}, {spectrogram_image_path}, {audio_file_path}") | |
| return uploaded_image_path, spectrogram_image_path, audio_file_path, cleanup | |
| # Create the Gradio interface | |
| interface = gr.Interface( | |
| fn=gradio_process_image, | |
| inputs=gr.Image(type="pil"), # Input is an image | |
| outputs=[gr.File(label="Uploaded Image"), gr.Image(type="filepath"), gr.Audio(type="filepath")], # Output uploaded image, spectrogram, and audio file | |
| title="Image to Audio Generator with Spectrogram Display", | |
| description="Upload an image, and get an audio file generated using Pix2Pix.", | |
| ) | |
| # Launch the interface | |
| interface.launch() | |