import os import sys # Apply compatibility patches first try: import compatibility_patches except ImportError: print("Warning: compatibility_patches not found") # Print environment information print("==== Environment Information ====") print(f"Python version: {sys.version}") print(f"Working directory: {os.getcwd()}") print(f"Directory contents: {os.listdir('.')}") # Import key packages and print versions packages = { "torch": None, "transformers": None, "diffusers": None, "accelerate": None, "einops": None, "omegaconf": None, "librosa": None } for package in packages.keys(): try: module = __import__(package) try: packages[package] = module.__version__ print(f"{package}: {module.__version__}") except AttributeError: packages[package] = "Installed (version unknown)" print(f"{package}: Installed (version unknown)") except ImportError as e: print(f"{package}: Not installed") packages[package] = "Not installed" # Create necessary directories os.makedirs("assets", exist_ok=True) os.makedirs("ckpts", exist_ok=True) os.makedirs("outputs", exist_ok=True) # Download example audio files if they don't exist if not os.path.exists("assets/fire_crackling.wav") or not os.path.exists("assets/plastic_bag.wav"): try: from download_assets import download_gdrive_file assets = { "assets/fire_crackling.wav": "1vOAZcbkpo_hre2g26n--lUXdwbTQp22k", "assets/plastic_bag.wav": "15igeDor7a47a-oluSCfO6GeUvFVl2ttb" } for path, file_id in assets.items(): if not os.path.exists(path): download_gdrive_file(file_id, path) except Exception as e: print(f"Error downloading example audio files: {e}") # Import required packages import gradio as gr # Import our controller from controller import SonicDiffusionController # Initialize controller controller = SonicDiffusionController() # Create the Gradio interface with gr.Blocks(title="SonicDiffusion") as demo: gr.Markdown("# SonicDiffusion - Audio-to-Image Generation") gr.Markdown("Generate images conditioned on audio inputs using Stable Diffusion") status_output = gr.Textbox(label="Status", value="System initialized. Start by checking dependencies and downloading assets.", lines=8) with gr.Tab("1. Setup"): with gr.Row(): with gr.Column(): check_deps_btn = gr.Button("Check Dependencies") def format_deps(): deps = controller.check_dependencies() return "\n".join([f"{pkg}: {vers}" for pkg, vers in deps.items()]) check_deps_btn.click( fn=format_deps, outputs=status_output ) with gr.Column(): check_assets_btn = gr.Button("Check Assets") def format_assets(): assets = controller.check_assets() return "\n".join([f"{path}: {'✓' if exists else '✗'}" for path, exists in assets.items()]) check_assets_btn.click( fn=format_assets, outputs=status_output ) download_assets_btn = gr.Button("Download Required Assets", variant="primary") download_assets_btn.click(fn=controller.download_assets, outputs=status_output) with gr.Tab("2. Generate"): with gr.Row(): with gr.Column(): model_dropdown = gr.Dropdown( label="Select Model", choices=["Landscape Model", "Greatest Hits Model"], value="Landscape Model" ) load_model_btn = gr.Button("Load Selected Model", variant="primary") load_model_btn.click( fn=controller.load_model, inputs=[model_dropdown], outputs=status_output ) prompt_input = gr.Textbox( label="Prompt", placeholder="Enter a descriptive prompt...", value="a high quality photograph of a fantasy landscape" ) # Use the preloaded example audio files audio_dropdown = gr.Dropdown( label="Select Example Audio", choices=["assets/fire_crackling.wav", "assets/plastic_bag.wav", "Upload my own audio"], value="assets/fire_crackling.wav" ) audio_input = gr.Audio( label="Or Upload Your Own Audio", type="filepath", sources=["upload", "microphone"], visible=False ) def toggle_audio_upload(choice): return gr.update(visible=(choice == "Upload my own audio")) audio_dropdown.change( fn=toggle_audio_upload, inputs=[audio_dropdown], outputs=[audio_input] ) with gr.Row(): cfg_scale = gr.Slider( label="CFG Scale", minimum=1.0, maximum=20.0, value=7.5, step=0.5 ) steps = gr.Slider( label="Steps", minimum=20, maximum=100, value=50, step=5 ) def prepare_audio_path(dropdown_choice, uploaded_audio): if dropdown_choice == "Upload my own audio": return uploaded_audio else: return dropdown_choice generate_btn = gr.Button("Generate Image", variant="primary") with gr.Column(): output_image = gr.Image(label="Generated Image", type="pil") output_status = gr.Textbox(label="Generation Status", value="") generate_btn.click( fn=lambda p, d, u, c, s: ( controller.generate(p, prepare_audio_path(d, u), c, s), "Generation completed" ), inputs=[prompt_input, audio_dropdown, audio_input, cfg_scale, steps], outputs=[output_image, output_status] ) if __name__ == "__main__": # Change the server parameters demo.launch(server_name="0.0.0.0", share=True)