Spaces:

alpercagann
/

SonicDiffusionClean

Runtime error

App Files Files Community

alpercagann commited on Apr 8, 2025

Commit

c9ef435

1 Parent(s): 517b2f4

Add simplified controller and update app

Browse files

Files changed (2) hide show

app.py +42 -56
controller.py +45 -129

app.py CHANGED Viewed

@@ -7,14 +7,15 @@ print(f"Python version: {sys.version}")
 print(f"Working directory: {os.getcwd()}")
 print(f"Directory contents: {os.listdir('.')}")
-# Import gradio first - this is our most essential dependency
-import gradio as gr
-# Try importing other packages (but don't fail if they're missing)
-torch_available = False
-transformers_available = False
-diffusers_available = False
 try:
     import torch
     print(f"PyTorch version: {torch.__version__}")
@@ -24,63 +25,48 @@ try:
     torch_available = True
 except ImportError as e:
     print(f"PyTorch import error: {e}")
-try:
-    import transformers
-    print(f"Transformers version: {transformers.__version__}")
-    transformers_available = True
-except ImportError as e:
-    print(f"Transformers import error: {e}")
-try:
-    import diffusers
-    print(f"Diffusers version: {diffusers.__version__}")
-    diffusers_available = True
-except ImportError as e:
-    print(f"Diffusers import error: {e}")
-# Simple demo interface
-def hello(name):
-    if not name:
-        name = "World"
-    status = []
-    if torch_available:
-        status.append("PyTorch ✓")
-    else:
-        status.append("PyTorch ✗")
-    if transformers_available:
-        status.append("Transformers ✓")
-    else:
-        status.append("Transformers ✗")
-    if diffusers_available:
-        status.append("Diffusers ✓")
-    else:
-        status.append("Diffusers ✗")
-    return f"Hello, {name}!\n\nPackage Status:\n" + "\n".join(status)
-# Create the Gradio interface
-demo = gr.Interface(
-    fn=hello,
-    inputs="text",
-    outputs="text",
-    title="SonicDiffusion - Setup Status",
-    description="This app shows which packages are successfully installed."
-)
 if __name__ == "__main__":
-    # Try installing packages at runtime if they're not available
-    if not torch_available:
-        print("Attempting to install PyTorch...")
-        try:
-            import subprocess
-            subprocess.check_call([sys.executable, "-m", "pip", "install", "torch==2.0.1"])
-            print("PyTorch installed successfully!")
-        except Exception as e:
-            print(f"Error installing PyTorch: {e}")
-    # Launch the demo
     demo.launch()

 print(f"Working directory: {os.getcwd()}")
 print(f"Directory contents: {os.listdir('.')}")
+# Create necessary directories
+os.makedirs("assets", exist_ok=True)
+os.makedirs("ckpts", exist_ok=True)
+os.makedirs("outputs", exist_ok=True)
+# Import required packages
+import gradio as gr
+# Try importing torch
 try:
     import torch
     print(f"PyTorch version: {torch.__version__}")
     torch_available = True
 except ImportError as e:
     print(f"PyTorch import error: {e}")
+    torch_available = False
+# Import our controller
+from controller import SimpleSonicDiffusionController
+# Initialize controller
+controller = SimpleSonicDiffusionController()
+# Create the Gradio interface
+with gr.Blocks(title="SonicDiffusion - Progressive Setup") as demo:
+    gr.Markdown("# SonicDiffusion - Simplified Version")
+    status_output = gr.Textbox(label="Status", value="System initialized. Click 'Check System' to verify setup.")
+    with gr.Tab("System Check"):
+        check_btn = gr.Button("Check System")
+        def check_system():
+            status = []
+            # Check PyTorch
+            status.append(f"PyTorch: {'Available' if torch_available else 'Not Available'}")
+            # Check directories
+            asset_status = controller.get_asset_status()
+            for dir_name, dir_status in asset_status.items():
+                status.append(f"Directory '{dir_name}': {dir_status}")
+            return "\n".join(status)
+        check_btn.click(fn=check_system, outputs=status_output)
+    with gr.Tab("Model"):
+        load_model_btn = gr.Button("Load Model")
+        load_model_btn.click(fn=controller.load_model, outputs=status_output)
+    with gr.Tab("Generate"):
+        text_input = gr.Textbox(label="Prompt")
+        gen_btn = gr.Button("Generate")
+        gen_output = gr.Textbox(label="Output")
+        gen_btn.click(fn=controller.generate, inputs=[text_input], outputs=gen_output)
 if __name__ == "__main__":
     demo.launch()

controller.py CHANGED Viewed

@@ -1,140 +1,56 @@
 import os
-import torch
-from unet2d_custom import UNet2DConditionModel
-from pipeline_stable_diffusion_custom import StableDiffusionPipeline
-from ldm.modules.encoders.audio_projector_res import Adapter
-class SonicDiffusionController:
-    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu"):
-        self.device = device
-        print(f"Using device: {self.device}")
-        self.sr = 44100
         self.model_loaded = False
-    def load_model(self,
-                  gate_dict_path="ckpts/landscape.pt",
-                  clap_path="CLAP/msclap",
-                  clap_weights="ckpts/CLAP_weights_2022.pth",
-                  adapter_ckpt_path="ckpts/audio_projector_landscape.pth"):
-        """Load the model conditionally based on environment and availability"""
         try:
-            # First, check if the required files exist
-            for path in [gate_dict_path, adapter_ckpt_path]:
-                if not os.path.exists(path):
-                    print(f"Warning: {path} not found, trying to download...")
-                    # You could add auto-download here
-            print("Loading models - this may take a moment...")
-            # Try to load the model with appropriate settings for your hardware
-            model_id = "CompVis/stable-diffusion-v1-4"
-            self.unet = UNet2DConditionModel.from_pretrained(
-                model_id,
-                subfolder="unet",
-                use_adapter_list=[False, True, True],
-                low_cpu_mem_usage=True,
-                device_map="auto"  # Let PyTorch decide the mapping
-            )
-            self.pipeline = StableDiffusionPipeline.from_pretrained(
-                model_id,
-                use_safetensors=True,
-                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
-            )
-            # Move models to the appropriate device
-            self.unet = self.unet.to(self.device)
-            self.pipeline = self.pipeline.to(self.device)
-            # Load gate dictionary
-            gate_dict = torch.load(gate_dict_path, map_location=self.device)
-            for name, param in self.unet.named_parameters():
-                if "adapter" in name:
-                    param.data = gate_dict[name].to(self.device)
-            # Set pipeline's UNet
-            self.pipeline.unet = self.unet
-            # Import and load audio encoder
-            import sys
-            sys.path.append(clap_path)
-            from CLAPWrapper import CLAPWrapper
-            self.audio_encoder = CLAPWrapper(clap_weights, use_cuda=(self.device=="cuda"))
-            self.audio_projector = Adapter(audio_token_count=77, transformer_layer_count=4).to(self.device)
-            self.audio_projector.load_state_dict(torch.load(adapter_ckpt_path, map_location=self.device))
-            self.audio_projector.eval()
-            self.model_loaded = True
-            print("Model loaded successfully!")
-            return True
-        except Exception as e:
-            print(f"Failed to load model: {e}")
-            import traceback
-            traceback.print_exc()
-            return False
-    def generate(self, file=None, audio=None, prompt=None, cfg_scale=5, num_inference_steps=50):
-        """Generate an image from audio input"""
-        if not self.model_loaded:
-            raise ValueError("Model not loaded. Call load_model() first.")
         try:
-            with torch.no_grad():
-                # Process audio input
-                audio_emb, _ = self.audio_encoder.get_audio_embeddings([audio], resample=self.sr)
-                audio_proj = self.audio_projector(audio_emb.unsqueeze(1))
-                # Create unconditional embedding
-                audio_emb = torch.zeros(1, 1024).to(self.device)
-                audio_uc = self.audio_projector(audio_emb.unsqueeze(1))
-                # Combine for context
-                audio_context = torch.cat([audio_uc, audio_proj]).to(self.device)
-                # Generate image
-                image = self.pipeline(
-                    prompt=prompt,
-                    audio_context=audio_context,
-                    guidance_scale=cfg_scale,
-                    num_inference_steps=num_inference_steps
-                )
-                return image.images[0]
         except Exception as e:
-            print(f"Error in generation: {e}")
-            import traceback
-            traceback.print_exc()
-            # Return a blank error image
-            from PIL import Image, ImageDraw
-            img = Image.new('RGB', (512, 512), color=(255, 255, 255))
-            d = ImageDraw.Draw(img)
-            d.text((10, 250), f"Error: {str(e)}", fill=(0, 0, 0))
-            return img
-    def update_audio_model(self, audio_model_update):
-        """Update audio model based on selection"""
         try:
-            if audio_model_update == "Landscape Model":
-                audio_projector_path = "ckpts/audio_projector_landscape.pth"
-                gate_dict_path = "ckpts/landscape.pt"
-            else:
-                audio_projector_path = "ckpts/audio_projector_gh.pth"
-                gate_dict_path = "ckpts/greatest_hits.pt"
-            # Load gate dictionary and update parameters
-            gate_dict = torch.load(gate_dict_path, map_location=self.device)
-            for name, param in self.pipeline.unet.named_parameters():
-                if "adapter" in name:
-                    param.data = gate_dict[name].to(self.device)
-            # Load audio projector state
-            self.audio_projector.load_state_dict(torch.load(audio_projector_path, map_location=self.device))
-            return "Model updated successfully"
         except Exception as e:
-            print(f"Error updating audio model: {e}")
-            return f"Error: {str(e)}"

 import os
+import sys
+class SimpleSonicDiffusionController:
+    """A simplified version of the controller with minimal dependencies"""
+    def __init__(self):
         self.model_loaded = False
+        self.device = self._get_device()
+    def _get_device(self):
+        """Determine the available device (CPU or CUDA)"""
         try:
+            import torch
+            if torch.cuda.is_available():
+                print(f"CUDA available: {torch.cuda.get_device_name(0)}")
+                return "cuda"
+            else:
+                print("CUDA not available, using CPU")
+                return "cpu"
+        except ImportError:
+            print("PyTorch not available, using CPU")
+            return "cpu"
+    def load_model(self):
+        """Simulated model loading"""
         try:
+            import torch
+            # Just create a simple tensor to verify PyTorch is working
+            self.test_tensor = torch.rand(3, 3)
+            self.model_loaded = True
+            return "Model loading simulation successful!"
         except Exception as e:
+            return f"Error loading model: {str(e)}"
+    def generate(self, text_prompt, audio_path=None):
+        """Simulated generation process"""
+        if not self.model_loaded:
+            return "Error: Model not loaded. Please click 'Load Model' first."
         try:
+            import torch
+            # Just a placeholder - we'll implement real generation later
+            return f"Generated output for prompt: '{text_prompt}'"
         except Exception as e:
+            return f"Error during generation: {str(e)}"
+    def get_asset_status(self):
+        """Check if required directories and files exist"""
+        asset_status = {}
+        # Check directories
+        for dir_name in ["assets", "ckpts", "outputs"]:
+            asset_status[dir_name] = "✓" if os.path.exists(dir_name) else "✗"
+        return asset_status