Spaces:

musdfakoc
/

local_intelligence

Sleeping

App Files Files Community

musdfakoc commited on Sep 30, 2024

Commit

9faab60

verified ·

1 Parent(s): 50795ae

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -5

app.py CHANGED Viewed

@@ -9,7 +9,8 @@ import numpy as np
 import soundfile as sf
 import os
 import random
-import tempfile  # For temporary file handling
 # Load your Pix2Pix model (make sure the path is correct)
 model = load_model('./model_022600.h5', compile=False)
@@ -66,6 +67,20 @@ def modify_spectrogram(spectrogram):
     return spectrogram
 # Function to process the input image and convert to audio
 def process_image(input_image):
     # Load and preprocess the input image
@@ -93,6 +108,9 @@ def process_image(input_image):
     # Modify the spectrogram randomly
     img = modify_spectrogram(img)
     # Convert the spectrogram back to audio using librosa
     wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
@@ -101,15 +119,15 @@ def process_image(input_image):
         sf.write(temp_audio_file.name, wav, samplerate=44100)
         audio_file_path = temp_audio_file.name
-    return audio_file_path  # Return the file path
 # Create a Gradio interface
 interface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="pil"),  # Input is an image
-    outputs=gr.Audio(type="filepath"),  # Output is an audio file
-    title="Image to Audio Generator",  # App title
-    description="Upload an image (preferably a spectrogram), and get an audio file generated using Pix2Pix.",
 )
 # Launch the interface

 import soundfile as sf
 import os
 import random
+import tempfile
+import matplotlib.pyplot as plt
 # Load your Pix2Pix model (make sure the path is correct)
 model = load_model('./model_022600.h5', compile=False)
     return spectrogram
+# Function to save the modified spectrogram image for display
+def save_spectrogram_image(spectrogram):
+    plt.figure(figsize=(10, 4))
+    plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
+    plt.axis('off')
+    # Save to a temporary file
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_image_file:
+        plt.savefig(temp_image_file.name, bbox_inches='tight', pad_inches=0)
+        temp_image_path = temp_image_file.name
+    plt.close()
+    return temp_image_path
 # Function to process the input image and convert to audio
 def process_image(input_image):
     # Load and preprocess the input image
     # Modify the spectrogram randomly
     img = modify_spectrogram(img)
+    # Save the modified spectrogram as an image
+    spectrogram_image_path = save_spectrogram_image(img)
     # Convert the spectrogram back to audio using librosa
     wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
         sf.write(temp_audio_file.name, wav, samplerate=44100)
         audio_file_path = temp_audio_file.name
+    return spectrogram_image_path, audio_file_path  # Return the paths for both spectrogram image and audio
 # Create a Gradio interface
 interface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="pil"),  # Input is an image
+    outputs=[gr.Image(type="file"), gr.Audio(type="filepath")],  # Output both spectrogram image and audio file
+    title="Image to Audio Generator with Spectrogram Display",  # App title
+    description="Upload an image (preferably a spectrogram), and get an audio file generated using Pix2Pix. You can also see the modified spectrogram.",
 )
 # Launch the interface