Spaces:

raayraay
/

SAMM-Spectral-Aware-Model-Merging

Sleeping

App Files Files Community

raayraay commited on Jan 13

Commit

b0a7741

verified ·

1 Parent(s): 998155c

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -150

app.py CHANGED Viewed

@@ -7,148 +7,153 @@ import numpy as np
 import os
 import gc
 from scipy.sparse.linalg import svds
 # --- CORE SAMM ALGORITHM ---
 def farms_spectral_analysis(tensor, num_patches=10, patch_size=64):
-        """
-            Implements the FARMS method (Fixed-Aspect-Ratio Matrix Subsampling).
-                Instead of analyzing the full rectangular matrix, we sample square submatrices
-                    to get a robust estimate of the spectral density and dominant directions.
-                        """""
-        # Ensure tensor is 2D
-        if len(tensor.shape) != 2:
-                    return None, None
-        rows, cols = tensor.shape
-        u_list = []
-        # FARMS: Randomly sample square patches to avoid aspect ratio bias
-        for _ in range(num_patches):
-                    r_start = np.random.randint(0, max(1, rows - patch_size))
-                    c_start = np.random.randint(0, max(1, cols - patch_size))
-                    # Extract patch
-                    patch = tensor[r_start:r_start+patch_size, c_start:c_start+patch_size]
-                    # Compute SVD on patch
-                    try:
-                                    # We only need top components to find the "Universal Subspace"
-                                    u, s, vh = np.linalg.svd(patch.float().numpy(), full_matrices=False)
-                                    u_list.append(u[:, :1])  # Keep top principal direction
-                                except:
-            continue
-        # In a full implementation, we would aggregate these patch spectra.
-        # For this simplified Space, we return the Full SVD guided by the hypothesis
-        # that the top directions are stable.
-        # Fallback to full SVD for the merging step, but using the "Universal" concept
-        # We posit the top k singular vectors form the shared subspace.
         try:
-                    u, s, v = torch.svd_lowrank(tensor.float(), q=32)  # Efficient randomized SVD
-                    return u, v  # Returns Left (U) and Right (V) singular vectors
-                except:
         return None, None
-    def spectral_aware_merge(adapters_dict, merge_ratio=0.5):
-            """
-                Merges adapters by aligning them in the Universal Weight Subspace.
-                    """""
-            merged_state_dict = {}
-            # Get the keys (layer names) from the first adapter
-            all_keys = list(next(iter(adapters_dict.values())).keys())
-            print(f"Starting SAMM merge on {len(all_keys)} layers...")
-            for key in all_keys:
-                        # 1. Collect weights from all adapters for this layer
-                        layer_tensors = []
-                        for name, state in adapters_dict.items():
-                                        if key in state:
-                                                            layer_tensors.append(state[key])
-                                    if not layer_tensors:
-                                        continue
-                        # Stack for analysis
-                        # Shape: (N_adapters, rows, cols)
-                        stack = torch.stack(layer_tensors)
-                        avg_weight = torch.mean(stack, dim=0)
-                        # 2. IF it's a LoRA weight (usually 'lora_A' or 'lora_B'), we try SAMM
-                        # For simplicity in this demo, we apply it to the computed Delta W or the raw weights
-                        # Here we apply the Universal Subspace Hypothesis:
-                        # "The mean is a good approximation only if we project out the noise orthogonal to the principal subspace."
-                        # Compute "Universal" basis from the average (center of the cluster)
-                        # Using the FARMS concept: the shared structure is in the dominant spectrum
-                        u_univ, v_univ = farms_spectral_analysis(avg_weight.cpu())
-                        if u_univ is not None:
-                                        # Project all adapters into this subspace and re-construct
-                                        # W_clean = U U^T W  (Filtering out non-universal spectral noise)
-                                        cleaned_tensors = []
-                                        for w in layer_tensors:
-                                                            w = w.float().cpu()
-                                                            # Project onto Top-32 universal directions (Filtering)
-                                                            # W_proj = U @ (U.T @ W)
-                                                            w_proj = torch.mm(u_univ, torch.mm(u_univ.t(), w))
-                                            cleaned_tensors.append(w_proj)
-                                        # Average the "Cleaned" (Spectrally Aligned) weights
-                                        merged_weight = torch.mean(torch.stack(cleaned_tensors), dim=0)
-            else:
-                            # Fallback to simple average if SVD fails or vector is 1D
-                            merged_weight = avg_weight
-                merged_state_dict[key] = merged_weight
     return merged_state_dict
 # --- GRADIO HANDLERS ---
 def run_samm_merge(base_model_id, lora_ids_text, hf_token):
-        if not hf_token:
-                    return "Error: Please enter a Hugging Face Write Token."
-        lora_ids = [x.strip() for x in lora_ids_text.split(",") if x.strip()]
-        if len(lora_ids) < 2:
-                    return "Error: Please provide at least 2 LoRA adapters to merge."
-        log = f"Loading {len(lora_ids)} adapters...\n"
-        yield log
-        try:
-                    # 1. Download/Load Adapters (Weights only to save RAM)
-                    adapters_weights = {}
-                    for lora_id in lora_ids:
-                                    log += f"Fetching {lora_id}...\n"
-                                    yield log
-                                    # We use PEFT to download, but we manually load state_dict to avoid loading Base Model 10 times
-                                    # Note: In a real large-scale deployment, we would stream this.
-                                    # Here we assume LoRA weights are small enough to fit in RAM.
-                                    try:
-                                        # Hack: Use downloading logic from PEFT without loading base model
-                                        from huggingface_hub import snapshot_download
-                                        path = snapshot_download(repo_id=lora_id, token=hf_token)
-                                        # Load safetensors or bin
-                                        if os.path.exists(os.path.join(path, "adapter_model.safetensors")):
-                                            from safetensors.torch import load_file
-                                            state = load_file(os.path.join(path, "adapter_model.safetensors"))
-        else:
-                                state = torch.load(os.path.join(path, "adapter_model.bin"), map_location="cpu")
-                    adapters_weights[lora_id] = state
-except Exception as e:
                 log += f"Failed to load {lora_id}: {str(e)}\n"
                 yield log
         # 2. Perform SAMM Merge
         log += "\nInitializing Spectral-Aware Model Merging (SAMM)...\n"
@@ -162,48 +167,47 @@ except Exception as e:
         os.makedirs(output_dir, exist_ok=True)
         # Save weights
-        from safetensors.torch import save_file
         save_file(merged_weights, os.path.join(output_dir, "adapter_model.safetensors"))
         # Save config (Copy from first adapter)
-        import json
         config_path = snapshot_download(repo_id=lora_ids[0], token=hf_token)
         with open(os.path.join(config_path, "adapter_config.json"), 'r') as f:
-                        config = json.load(f)
-                    with open(os.path.join(output_dir, "adapter_config.json"), 'w') as f:
-                                    json.dump(config, f)
-                                log += f"\nSuccess! Merged LoRA saved locally to ./{output_dir}\n"
         log += "Ready for download or push to hub."
         yield log
-except Exception as e:
         yield f"Critical Error: {str(e)}"
 # --- UI SETUP ---
 with gr.Blocks(title="SAMM: Spectral-Aware Model Merging") as demo:
-        gr.Markdown("""
-            # 💡 SAMM: Spectral-Aware Model Merging
-                Algorithm: Universal Weight Subspace via FARMS (Fixed-Aspect-Ratio Matrix Subsampling)
-                        This tool merges multiple LoRA adapters by identifying their shared spectral directions (the """Universal Subspace")
-                            and projecting weights into this noise-free manifold before averaging.
-                        """)
-        with gr.Row():
-                    base_model_input = gr.Textbox(label="Base Model ID""", value="mistralai/Mistral-7B-v0.1")
-                    hf_token_input = gr.Textbox(label="HF Write Token", type="password")
-        loras_input = gr.Textbox(label="LoRA Adapter IDs (comma separated)",
-                                                              placeholder="user/lora1, user/lora2, user/lora3...", lines=3)
-        merge_btn = gr.Button("Perform Spectral Merge", variant="primary")
-        output_log = gr.Textbox(label="Merge Logs", lines=10)
-        merge_btn.click(fn=run_samm_merge,
-                                            inputs=[base_model_input, loras_input, hf_token_input],
-                                            outputs=output_log)
-    if __name__ == "__main__":
-            demo.queue().launch()"

 import os
 import gc
 from scipy.sparse.linalg import svds
+from huggingface_hub import snapshot_download
+from safetensors.torch import load_file, save_file
+import json
 # --- CORE SAMM ALGORITHM ---
 def farms_spectral_analysis(tensor, num_patches=10, patch_size=64):
+    """
+    Implements the FARMS method (Fixed-Aspect-Ratio Matrix Subsampling).
+    Instead of analyzing the full rectangular matrix, we sample square submatrices
+    to get a robust estimate of the spectral density and dominant directions.
+    """
+    # Ensure tensor is 2D
+    if len(tensor.shape) != 2:
+        return None, None
+    rows, cols = tensor.shape
+    u_list = []
+    # FARMS: Randomly sample square patches to avoid aspect ratio bias
+    for _ in range(num_patches):
+        r_start = np.random.randint(0, max(1, rows - patch_size))
+        c_start = np.random.randint(0, max(1, cols - patch_size))
+        # Extract patch
+        patch = tensor[r_start:r_start+patch_size, c_start:c_start+patch_size]
+        # Compute SVD on patch
         try:
+            # We only need top components to find the "Universal Subspace"
+            u, s, vh = np.linalg.svd(patch.float().numpy(), full_matrices=False)
+            u_list.append(u[:, :1])  # Keep top principal direction
+        except:
+            continue
+    # In a full implementation, we would aggregate these patch spectra.
+    # For this simplified Space, we return the Full SVD guided by the hypothesis
+    # that the top directions are stable.
+    # Fallback to full SVD for the merging step, but using the "Universal" concept
+    # We posit the top k singular vectors form the shared subspace.
+    try:
+        u, s, v = torch.svd_lowrank(tensor.float(), q=32)  # Efficient randomized SVD
+        return u, v  # Returns Left (U) and Right (V) singular vectors
+    except:
         return None, None
+def spectral_aware_merge(adapters_dict, merge_ratio=0.5):
+    """
+    Merges adapters by aligning them in the Universal Weight Subspace.
+    """
+    merged_state_dict = {}
+    # Get the keys (layer names) from the first adapter
+    if not adapters_dict:
+        return {}
+    all_keys = list(next(iter(adapters_dict.values())).keys())
+    print(f"Starting SAMM merge on {len(all_keys)} layers...")
+    for key in all_keys:
+        # 1. Collect weights from all adapters for this layer
+        layer_tensors = []
+        for name, state in adapters_dict.items():
+            if key in state:
+                layer_tensors.append(state[key])
+        if not layer_tensors:
+            continue
+        # Stack for analysis
+        # Shape: (N_adapters, rows, cols)
+        stack = torch.stack(layer_tensors)
+        avg_weight = torch.mean(stack, dim=0)
+        # 2. IF it's a LoRA weight (usually 'lora_A' or 'lora_B'), we try SAMM
+        # For simplicity in this demo, we apply it to the computed Delta W or the raw weights
+        # Here we apply the Universal Subspace Hypothesis:
+        # "The mean is a good approximation only if we project out the noise orthogonal to the principal subspace."
+        # Compute "Universal" basis from the average (center of the cluster)
+        # Using the FARMS concept: the shared structure is in the dominant spectrum
+        u_univ, v_univ = farms_spectral_analysis(avg_weight.cpu())
+        if u_univ is not None:
+            # Project all adapters into this subspace and re-construct
+            # W_clean = U U^T W  (Filtering out non-universal spectral noise)
+            cleaned_tensors = []
+            for w in layer_tensors:
+                w = w.float().cpu()
+                # Project onto Top-32 universal directions (Filtering)
+                # W_proj = U @ (U.T @ W)
+                w_proj = torch.mm(u_univ, torch.mm(u_univ.t(), w))
+                cleaned_tensors.append(w_proj)
+            # Average the "Cleaned" (Spectrally Aligned) weights
+            merged_weight = torch.mean(torch.stack(cleaned_tensors), dim=0)
+        else:
+            # Fallback to simple average if SVD fails or vector is 1D
+            merged_weight = avg_weight
+        merged_state_dict[key] = merged_weight
     return merged_state_dict
 # --- GRADIO HANDLERS ---
 def run_samm_merge(base_model_id, lora_ids_text, hf_token):
+    if not hf_token:
+        return "Error: Please enter a Hugging Face Write Token."
+    lora_ids = [x.strip() for x in lora_ids_text.split(",") if x.strip()]
+    if len(lora_ids) < 2:
+        return "Error: Please provide at least 2 LoRA adapters to merge."
+    log = f"Loading {len(lora_ids)} adapters...\n"
+    yield log
+    try:
+        # 1. Download/Load Adapters (Weights only to save RAM)
+        adapters_weights = {}
+        for lora_id in lora_ids:
+            log += f"Fetching {lora_id}...\n"
+            yield log
+            # We use PEFT to download, but we manually load state_dict to avoid loading Base Model 10 times
+            # Note: In a real large-scale deployment, we would stream this.
+            # Here we assume LoRA weights are small enough to fit in RAM.
+            try:
+                # Hack: Use downloading logic from PEFT without loading base model
+                path = snapshot_download(repo_id=lora_id, token=hf_token)
+                # Load safetensors or bin
+                if os.path.exists(os.path.join(path, "adapter_model.safetensors")):
+                    state = load_file(os.path.join(path, "adapter_model.safetensors"))
+                else:
+                    state = torch.load(os.path.join(path, "adapter_model.bin"), map_location="cpu")
+                adapters_weights[lora_id] = state
+            except Exception as e:
                 log += f"Failed to load {lora_id}: {str(e)}\n"
                 yield log
+                continue # Skip this adapter if it fails
         # 2. Perform SAMM Merge
         log += "\nInitializing Spectral-Aware Model Merging (SAMM)...\n"
         os.makedirs(output_dir, exist_ok=True)
         # Save weights
         save_file(merged_weights, os.path.join(output_dir, "adapter_model.safetensors"))
         # Save config (Copy from first adapter)
         config_path = snapshot_download(repo_id=lora_ids[0], token=hf_token)
         with open(os.path.join(config_path, "adapter_config.json"), 'r') as f:
+            config = json.load(f)
+        with open(os.path.join(output_dir, "adapter_config.json"), 'w') as f:
+            json.dump(config, f)
+        log += f"\nSuccess! Merged LoRA saved locally to ./{output_dir}\n"
         log += "Ready for download or push to hub."
         yield log
+    except Exception as e:
         yield f"Critical Error: {str(e)}"
 # --- UI SETUP ---
 with gr.Blocks(title="SAMM: Spectral-Aware Model Merging") as demo:
+    gr.Markdown("""
+    # 💡 SAMM: Spectral-Aware Model Merging
+    Algorithm: Universal Weight Subspace via FARMS (Fixed-Aspect-Ratio Matrix Subsampling)
+    This tool merges multiple LoRA adapters by identifying their shared spectral directions (the "Universal Subspace")
+    and projecting weights into this noise-free manifold before averaging.
+    """)
+    with gr.Row():
+        base_model_input = gr.Textbox(label="Base Model ID", value="mistralai/Mistral-7B-v0.1")
+        hf_token_input = gr.Textbox(label="HF Write Token", type="password")
+    loras_input = gr.Textbox(label="LoRA Adapter IDs (comma separated)",
+                             placeholder="user/lora1, user/lora2, user/lora3...", lines=3)
+    merge_btn = gr.Button("Perform Spectral Merge", variant="primary")
+    output_log = gr.Textbox(label="Merge Logs", lines=10)
+    merge_btn.click(fn=run_samm_merge,
+                    inputs=[base_model_input, loras_input, hf_token_input],
+                    outputs=output_log)
+if __name__ == "__main__":
+    demo.queue().launch()