alpercagann's picture
Add torch to requirements
dc72d06
import os
import sys
# Apply compatibility patches first
try:
import compatibility_patches
except ImportError:
print("Warning: compatibility_patches not found")
# Print environment information
print("==== Environment Information ====")
print(f"Python version: {sys.version}")
print(f"Working directory: {os.getcwd()}")
print(f"Directory contents: {os.listdir('.')}")
# Import key packages and print versions
packages = {
"torch": None,
"transformers": None,
"diffusers": None,
"accelerate": None,
"einops": None,
"omegaconf": None,
"librosa": None
}
for package in packages.keys():
try:
module = __import__(package)
try:
packages[package] = module.__version__
print(f"{package}: {module.__version__}")
except AttributeError:
packages[package] = "Installed (version unknown)"
print(f"{package}: Installed (version unknown)")
except ImportError as e:
print(f"{package}: Not installed")
packages[package] = "Not installed"
# Create necessary directories
os.makedirs("assets", exist_ok=True)
os.makedirs("ckpts", exist_ok=True)
os.makedirs("outputs", exist_ok=True)
# Download example audio files if they don't exist
if not os.path.exists("assets/fire_crackling.wav") or not os.path.exists("assets/plastic_bag.wav"):
try:
from download_assets import download_gdrive_file
assets = {
"assets/fire_crackling.wav": "1vOAZcbkpo_hre2g26n--lUXdwbTQp22k",
"assets/plastic_bag.wav": "15igeDor7a47a-oluSCfO6GeUvFVl2ttb"
}
for path, file_id in assets.items():
if not os.path.exists(path):
download_gdrive_file(file_id, path)
except Exception as e:
print(f"Error downloading example audio files: {e}")
# Import required packages
import gradio as gr
# Import our controller
from controller import SonicDiffusionController
# Initialize controller
controller = SonicDiffusionController()
# Create the Gradio interface
with gr.Blocks(title="SonicDiffusion") as demo:
gr.Markdown("# SonicDiffusion - Audio-to-Image Generation")
gr.Markdown("Generate images conditioned on audio inputs using Stable Diffusion")
status_output = gr.Textbox(label="Status", value="System initialized. Start by checking dependencies and downloading assets.", lines=8)
with gr.Tab("1. Setup"):
with gr.Row():
with gr.Column():
check_deps_btn = gr.Button("Check Dependencies")
def format_deps():
deps = controller.check_dependencies()
return "\n".join([f"{pkg}: {vers}" for pkg, vers in deps.items()])
check_deps_btn.click(
fn=format_deps,
outputs=status_output
)
with gr.Column():
check_assets_btn = gr.Button("Check Assets")
def format_assets():
assets = controller.check_assets()
return "\n".join([f"{path}: {'✓' if exists else '✗'}" for path, exists in assets.items()])
check_assets_btn.click(
fn=format_assets,
outputs=status_output
)
download_assets_btn = gr.Button("Download Required Assets", variant="primary")
download_assets_btn.click(fn=controller.download_assets, outputs=status_output)
with gr.Tab("2. Generate"):
with gr.Row():
with gr.Column():
model_dropdown = gr.Dropdown(
label="Select Model",
choices=["Landscape Model", "Greatest Hits Model"],
value="Landscape Model"
)
load_model_btn = gr.Button("Load Selected Model", variant="primary")
load_model_btn.click(
fn=controller.load_model,
inputs=[model_dropdown],
outputs=status_output
)
prompt_input = gr.Textbox(
label="Prompt",
placeholder="Enter a descriptive prompt...",
value="a high quality photograph of a fantasy landscape"
)
# Use the preloaded example audio files
audio_dropdown = gr.Dropdown(
label="Select Example Audio",
choices=["assets/fire_crackling.wav", "assets/plastic_bag.wav", "Upload my own audio"],
value="assets/fire_crackling.wav"
)
audio_input = gr.Audio(
label="Or Upload Your Own Audio",
type="filepath",
sources=["upload", "microphone"],
visible=False
)
def toggle_audio_upload(choice):
return gr.update(visible=(choice == "Upload my own audio"))
audio_dropdown.change(
fn=toggle_audio_upload,
inputs=[audio_dropdown],
outputs=[audio_input]
)
with gr.Row():
cfg_scale = gr.Slider(
label="CFG Scale",
minimum=1.0,
maximum=20.0,
value=7.5,
step=0.5
)
steps = gr.Slider(
label="Steps",
minimum=20,
maximum=100,
value=50,
step=5
)
def prepare_audio_path(dropdown_choice, uploaded_audio):
if dropdown_choice == "Upload my own audio":
return uploaded_audio
else:
return dropdown_choice
generate_btn = gr.Button("Generate Image", variant="primary")
with gr.Column():
output_image = gr.Image(label="Generated Image", type="pil")
output_status = gr.Textbox(label="Generation Status", value="")
generate_btn.click(
fn=lambda p, d, u, c, s: (
controller.generate(p, prepare_audio_path(d, u), c, s),
"Generation completed"
),
inputs=[prompt_input, audio_dropdown, audio_input, cfg_scale, steps],
outputs=[output_image, output_status]
)
if __name__ == "__main__":
# Change the server parameters
demo.launch(server_name="0.0.0.0", share=True)