Spaces:

alpercagann
/

SonicDiffusionClean

Runtime error

App Files Files Community

alpercagann commited on Apr 8, 2025

Commit

8c0dbae

1 Parent(s): 32002e9

Create more complete SonicDiffusion controller

Browse files

Files changed (1) hide show

controller.py +140 -70

controller.py CHANGED Viewed

@@ -1,14 +1,22 @@
 import os
 import sys
-class SimpleSonicDiffusionController:
-    """A simplified version of the controller with minimal dependencies"""
     def __init__(self):
         self.model_loaded = False
-        self.tokenizer_loaded = False
-        self.pipe_loaded = False
         self.device = self._get_device()
     def _get_device(self):
         """Determine the available device (CPU or CUDA)"""
@@ -24,86 +32,148 @@ class SimpleSonicDiffusionController:
             print("PyTorch not available, using CPU")
             return "cpu"
-    def load_model(self):
-        """Load a simple model to verify libraries are working"""
-        status_messages = []
         try:
-            # Test PyTorch
-            import torch
-            self.test_tensor = torch.rand(3, 3)
-            status_messages.append("✓ PyTorch loaded successfully")
-            # Try loading a simple tokenizer from transformers
-            try:
-                from transformers import AutoTokenizer
-                self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
-                self.tokenizer_loaded = True
-                status_messages.append("✓ Transformers tokenizer loaded")
-            except Exception as e:
-                status_messages.append(f"✗ Transformers error: {str(e)}")
-            # Try loading a simple pipeline from diffusers
-            try:
-                from diffusers import DiffusionPipeline
-                # Just check if the class exists, don't actually load a model
-                self.pipe_class = DiffusionPipeline
-                self.pipe_loaded = True
-                status_messages.append("✓ Diffusers available")
-            except Exception as e:
-                status_messages.append(f"✗ Diffusers error: {str(e)}")
-            self.model_loaded = True
-            return "\n".join(status_messages)
         except Exception as e:
-            return f"Error loading model: {str(e)}"
-    def generate(self, text_prompt, audio_path=None):
-        """Generate text using available libraries"""
-        if not self.model_loaded:
-            return "Error: Model not loaded. Please click 'Load Model' first."
-        results = []
         try:
-            # Use tokenizer if available
-            if self.tokenizer_loaded:
-                tokens = self.tokenizer(text_prompt, return_tensors="pt")
-                token_count = len(tokens['input_ids'][0])
-                results.append(f"Transformers: Tokenized into {token_count} tokens")
-            # Check diffusers status
-            if self.pipe_loaded:
-                results.append("Diffusers is available for pipeline creation")
-            else:
-                results.append("Diffusers is not available")
-            return "\n".join(results)
         except Exception as e:
-            return f"Error during generation: {str(e)}"
-    def get_asset_status(self):
-        """Check if required directories and files exist"""
-        asset_status = {}
-        # Check directories
-        for dir_name in ["assets", "ckpts", "outputs"]:
-            asset_status[dir_name] = "✓" if os.path.exists(dir_name) else "✗"
-        # Check library availability
-        asset_status["PyTorch"] = "✓" if self._check_import("torch") else "✗"
-        asset_status["Transformers"] = "✓" if self._check_import("transformers") else "✗"
-        asset_status["Diffusers"] = "✓" if self._check_import("diffusers") else "✗"
-        asset_status["Accelerate"] = "✓" if self._check_import("accelerate") else "✗"
-        return asset_status
-    def _check_import(self, module_name):
-        """Check if a module can be imported"""
-        try:
-            __import__(module_name)
-            return True
-        except ImportError:
-            return False

 import os
 import sys
+import traceback
+class SonicDiffusionController:
+    """Controller for SonicDiffusion with asset downloading support"""
     def __init__(self):
         self.model_loaded = False
         self.device = self._get_device()
+        self.required_assets = {
+            "ckpts/landscape.pt": "1-oTNIjCZq3_mGI1XRfzDyCnmjXCvd0Vh",
+            "ckpts/greatest_hits.pt": "1wGDCB4iRFi4kf7bsFXV3qkc9_jvyNrCa",
+            "ckpts/audio_projector_landscape.pth": "1BdjzRJOC8bvyPgrAkJJcCaN3EEJg3STm",
+            "ckpts/audio_projector_gh.pth": "19Uk68PXVOjE3TJl86H-IlMaM1URhU33a",
+            "ckpts/CLAP_weights_2022.pth": "1VK22jxHkFwpxknxQBLd6kIgO5WxQdLFP",
+            "assets/fire_crackling.wav": "1vOAZcbkpo_hre2g26n--lUXdwbTQp22k",
+            "assets/plastic_bag.wav": "15igeDor7a47a-oluSCfO6GeUvFVl2ttb"
+        }
     def _get_device(self):
         """Determine the available device (CPU or CUDA)"""
             print("PyTorch not available, using CPU")
             return "cpu"
+    def check_dependencies(self):
+        """Check if all required dependencies are installed"""
+        dependencies = {
+            "torch": None,
+            "transformers": None,
+            "diffusers": None,
+            "accelerate": None,
+            "einops": None,
+            "omegaconf": None,
+            "librosa": None
+        }
+        for package in dependencies.keys():
+            try:
+                module = __import__(package)
+                try:
+                    dependencies[package] = module.__version__
+                except AttributeError:
+                    dependencies[package] = "Installed (version unknown)"
+            except ImportError:
+                dependencies[package] = "Not installed"
+        return dependencies
+    def check_assets(self):
+        """Check which assets exist and which need to be downloaded"""
+        asset_status = {}
+        for asset_path in self.required_assets.keys():
+            asset_status[asset_path] = os.path.exists(asset_path)
+        return asset_status
+    def download_assets(self, specific_asset=None):
+        """Download required assets"""
         try:
+            # Import the asset downloading function
+            from download_assets import get_gdrive_file_id, download_gdrive_file
+            # Create necessary directories
+            os.makedirs("assets", exist_ok=True)
+            os.makedirs("ckpts", exist_ok=True)
+            assets_to_download = self.required_assets
+            if specific_asset:
+                if specific_asset in self.required_assets:
+                    assets_to_download = {specific_asset: self.required_assets[specific_asset]}
+                else:
+                    return f"Asset {specific_asset} not found in required assets list"
+            # Check which assets need to be downloaded
+            missing_assets = {}
+            for asset_path, file_id in assets_to_download.items():
+                if not os.path.exists(asset_path):
+                    missing_assets[asset_path] = file_id
+            if not missing_assets:
+                return "All required assets already exist"
+            # Download missing assets
+            results = []
+            for asset_path, file_id in missing_assets.items():
+                results.append(f"Downloading {asset_path}...")
+                success = download_gdrive_file(file_id, asset_path)
+                results.append(f"  {'Success' if success else 'Failed'}")
+            return "\n".join(results)
         except Exception as e:
+            traceback.print_exc()
+            return f"Error downloading assets: {str(e)}"
+    def load_model(self, model_type="Landscape Model"):
+        """Load the selected SonicDiffusion model"""
+        if model_type not in ["Landscape Model", "Greatest Hits Model"]:
+            return f"Unknown model type: {model_type}"
+        # Determine which assets we need
+        if model_type == "Landscape Model":
+            gate_dict_path = "ckpts/landscape.pt"
+            audio_projector_path = "ckpts/audio_projector_landscape.pth"
+        else:
+            gate_dict_path = "ckpts/greatest_hits.pt"
+            audio_projector_path = "ckpts/audio_projector_gh.pth"
+        clap_path = "CLAP/msclap"
+        clap_weights = "ckpts/CLAP_weights_2022.pth"
+        # Check if assets exist
+        required_files = [gate_dict_path, audio_projector_path, clap_weights]
+        missing_files = [f for f in required_files if not os.path.exists(f)]
+        if missing_files:
+            # Download missing files
+            for file_path in missing_files:
+                if file_path in self.required_assets:
+                    try:
+                        from download_assets import download_gdrive_file
+                        download_gdrive_file(self.required_assets[file_path], file_path)
+                    except Exception as e:
+                        return f"Failed to download {file_path}: {str(e)}"
+                else:
+                    return f"Missing required file {file_path} and no download source available"
         try:
+            # Simple test of loading the model components
+            import torch
+            # Load a small test tensor to verify PyTorch works
+            self.test_tensor = torch.rand(3, 3).to(self.device)
+            # Just check if we can access the file
+            with open(gate_dict_path, 'rb') as f:
+                # Just read a small part to verify the file exists and is readable
+                f.read(10)
+            with open(audio_projector_path, 'rb') as f:
+                f.read(10)
+            with open(clap_weights, 'rb') as f:
+                f.read(10)
+            # For now, just mark as loaded - we'll implement real loading later
+            self.model_loaded = True
+            self.model_type = model_type
+            return f"{model_type} files verified and accessible"
         except Exception as e:
+            traceback.print_exc()
+            return f"Error loading model: {str(e)}"
+    def generate(self, text_prompt, audio_path=None, cfg_scale=7.5, steps=50):
+        """Generate an image using SonicDiffusion with the specified inputs"""
+        if not self.model_loaded:
+            return "Error: Model not loaded. Please click 'Load Model' first."
+        if not audio_path:
+            return "Error: Audio file is required"
+        if not os.path.exists(audio_path):
+            return f"Error: Audio file {audio_path} does not exist"
+        # Return info about what would be generated
+        return f"Would generate image with:\nModel: {self.model_type}\nPrompt: {text_prompt}\nAudio: {audio_path}\nCFG Scale: {cfg_scale}\nSteps: {steps}\n\nFull implementation coming soon!"