Spaces:
Runtime error
Runtime error
File size: 4,689 Bytes
dcbc4e8 a57ce2c 9a1ca09 a57ce2c 9a1ca09 a57ce2c 9a1ca09 a57ce2c ab8268b a57ce2c 45c7a65 a57ce2c ab8268b a57ce2c 45c7a65 a57ce2c 5f9ec40 dcbc4e8 c64a0b8 a57ce2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import gradio as gr
import librosa
import numpy as np
import torch
from diffusers import StableDiffusionPipeline
import os
import gradio as gr
import sys
print(f"Gradio version: {gr.__version__}")
print(f"Gradio location: {gr.__file__}")
print(f"Python executable: {sys.executable}")
# Ensure that the script uses CUDA if available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load the Stable Diffusion model
model_id = "runwayml/stable-diffusion-v1-5" # Updated model ID for better accessibility
try:
stable_diffusion = StableDiffusionPipeline.from_pretrained(
model_id,
torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)
except Exception as e:
print(f"Error loading the model: {e}")
print("Ensure you have the correct model ID and access rights.")
exit(1)
def describe_audio(audio_path):
"""
Generate a textual description based on audio features.
Parameters:
audio_path (str): Path to the audio file.
Returns:
str: Generated description.
"""
try:
# Load the audio file
y, sr = librosa.load(audio_path, sr=None)
# Extract Mel Spectrogram
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
db_spec = librosa.power_to_db(S, ref=np.max)
# Calculate average amplitude and frequency
avg_amplitude = np.mean(db_spec)
spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
avg_frequency = np.mean(spectral_centroids)
# Generate description based on amplitude
if avg_amplitude < -40:
amplitude_desc = "a calm and serene landscape with gentle waves"
elif avg_amplitude < -20:
amplitude_desc = "a vibrant forest with rustling leaves"
else:
amplitude_desc = "a thunderstorm with dark clouds and lightning"
# Generate description based on frequency
if avg_frequency < 2000:
frequency_desc = "under soft, ambient light"
elif avg_frequency < 4000:
frequency_desc = "with vivid and lively colors"
else:
frequency_desc = "in a surreal and dynamic setting"
# Combine descriptions
description = f"{amplitude_desc} {frequency_desc}"
return description
except Exception as e:
print(f"Error processing audio: {e}")
return "an abstract artistic scene"
def generate_image(description):
"""
Generate an image using the Stable Diffusion model based on the description.
Parameters:
description (str): Textual description for image generation.
Returns:
PIL.Image: Generated image.
"""
try:
if device == "cuda":
with torch.autocast("cuda"):
image = stable_diffusion(description).images[0]
else:
image = stable_diffusion(description).images[0]
return image
except Exception as e:
print(f"Error generating image: {e}")
return None
def audio_to_image(audio_file):
"""
Convert an audio file to an artistic image.
Parameters:
audio_file (str): Path to the uploaded audio file.
Returns:
PIL.Image or str: Generated image or error message.
"""
if audio_file is None:
return "No audio file provided."
description = describe_audio(audio_file)
print(f"Generated Description: {description}")
image = generate_image(description)
if image is not None:
return image
else:
return "Failed to generate image."
# Gradio Interface
title = "🎵 Audio to Artistic Image Converter 🎨"
description_text = """
Upload an audio file, and this app will generate an artistic image based on the sound's characteristics.
"""
# Define example paths
example_paths = [
"example_audio/calm_ocean.wav",
"example_audio/rustling_leaves.wav",
"example_audio/thunderstorm.wav",
]
# Verify example files exist
valid_examples = []
for path in example_paths:
if os.path.isfile(path):
valid_examples.append([path])
else:
print(f"Example file not found: {path}")
if not os.path.exists("example_audio"):
os.makedirs("example_audio")
print("Please add some example audio files in the 'example_audio' directory.")
interface = gr.Interface(
fn=audio_to_image,
inputs=gr.Audio(source="upload", type="filepath"),
outputs=gr.Image(type="pil"),
title=title,
description=description_text,
examples=valid_examples if valid_examples else None,
allow_flagging="never",
theme="default"
)
if __name__ == "__main__":
interface.launch()
|