Spaces:

AlekseyCalvin
/

Soon_Merger

Running

App Files Files Community

AlekseyCalvin commited on 4 days ago

Commit

156e3f3

verified ·

1 Parent(s): f1167d3

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -31

app.py CHANGED Viewed

@@ -72,17 +72,33 @@ def cleanup_temp():
     os.makedirs(TempDir, exist_ok=True)
     gc.collect()
 def download_file(input_path, token, filename=None):
     """Downloads a file from URL or HF Repo."""
     local_path = TempDir / (filename if filename else "model.safetensors")
     if input_path.startswith("http"):
         print(f"Downloading from URL: {input_path}")
-        response = requests.get(input_path, stream=True)
-        response.raise_for_status()
-        with open(local_path, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
     else:
         print(f"Downloading from Repo: {input_path}")
         if not filename:
@@ -95,22 +111,24 @@ def download_file(input_path, token, filename=None):
                         if "adapter" in f: filename = f
                 else:
                     filename = "adapter_model.bin"
-            except:
                 filename = "adapter_model.safetensors"
-        hf_hub_download(repo_id=input_path, filename=filename, token=token, local_dir=TempDir, local_dir_use_symlinks=False)
-        downloaded_path = TempDir / filename
-        if downloaded_path != local_path:
-            if local_path.exists(): os.remove(local_path)
-            shutil.move(downloaded_path, local_path)
     return local_path
 def get_key_stem(key):
-    """
-    Normalizes a key to its structural stem by removing known prefixes and suffixes.
-    matches 'layers.0.attention' with 'model.diffusion_model.layers.0.attention'.
-    """
     key = key.replace(".weight", "").replace(".bias", "")
     key = key.replace(".lora_down", "").replace(".lora_up", "")
     key = key.replace(".lora_A", "").replace(".lora_B", "")
@@ -135,8 +153,8 @@ def get_key_stem(key):
 # TAB 1: UNIVERSAL MERGE (In-Place Memory Optimization)
 # =================================================================================
-def load_lora_to_memory(lora_path):
-    print(f"Loading LoRA from {lora_path}...")
     state_dict = load_file(lora_path, device="cpu")
     pairs = {}
@@ -149,11 +167,15 @@ def load_lora_to_memory(lora_path):
         else:
             if stem not in pairs:
                 pairs[stem] = {}
             if "lora_down" in k or "lora_A" in k:
-                pairs[stem]["down"] = v.float()
                 pairs[stem]["rank"] = v.shape[0]
             elif "lora_up" in k or "lora_B" in k:
-                pairs[stem]["up"] = v.float()
     for stem in pairs:
         if stem in alphas:
@@ -166,15 +188,15 @@ def load_lora_to_memory(lora_path):
     return pairs
-def merge_shard_logic(base_path, lora_pairs, scale, output_path):
     print(f"Loading base shard: {base_path}")
-    # Load base state into RAM. This is the peak memory usage point.
     base_state = load_file(base_path, device="cpu")
     lora_keys = set(lora_pairs.keys())
     keys_to_process = list(base_state.keys())
     for k in keys_to_process:
         v = base_state[k]
         base_stem = get_key_stem(k)
         match = None
@@ -195,6 +217,7 @@ def merge_shard_logic(base_path, lora_pairs, scale, output_path):
                 if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
         if match and "down" in match and "up" in match:
             down = match["down"]
             up = match["up"]
             alpha = match["alpha"]
@@ -207,6 +230,7 @@ def merge_shard_logic(base_path, lora_pairs, scale, output_path):
                 down = down.unsqueeze(-1).unsqueeze(-1)
                 up = up.unsqueeze(-1).unsqueeze(-1)
             try:
                 if len(up.shape) == 4:
                     delta = (up.squeeze() @ down.squeeze()).reshape(up.shape[0], down.shape[1], 1, 1)
@@ -217,9 +241,9 @@ def merge_shard_logic(base_path, lora_pairs, scale, output_path):
             delta = delta * scaling
-            # --- Dynamic Reshaping / Slicing ---
             valid_delta = True
             if delta.shape == v.shape:
                 pass
             elif delta.shape[0] == v.shape[0] * 3:
@@ -260,11 +284,11 @@ def merge_shard_logic(base_path, lora_pairs, scale, output_path):
     save_file(base_state, output_path)
     return True
-def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, output_repo, structure_repo, private, progress=gr.Progress()):
     cleanup_temp()
     login(hf_token)
-        # Determine Dtype
     if precision == "bf16":
         dtype = torch.bfloat16
     elif precision == "fp16":
@@ -273,7 +297,7 @@ def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, output_re
         dtype = torch.float32
     print(f"Selected Precision: {dtype}")
     try:
         api.create_repo(repo_id=output_repo, private=private, exist_ok=True, token=hf_token)
     except Exception as e:
@@ -311,14 +335,12 @@ def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, output_re
         progress(0.2 + (0.8 * i/len(shards)), desc=f"Merging {shard}")
         local_shard = hf_hub_download(repo_id=base_repo, filename=shard, token=hf_token, local_dir=TempDir)
         merged_path = TempDir / "merged.safetensors"
-                # Pass precision preference
         merge_shard_logic(local_shard, lora_pairs, scale, merged_path, precision_dtype=dtype)
-        # Upload
         api.upload_file(path_or_fileobj=merged_path, path_in_repo=shard, repo_id=output_repo, token=hf_token)
-        # Cleanup immediately
         os.remove(local_shard)
         if merged_path.exists(): os.remove(merged_path)
         gc.collect()

     os.makedirs(TempDir, exist_ok=True)
     gc.collect()
+def verify_safetensors(path):
+    """Checks if a file is a valid safetensors file."""
+    try:
+        with open(path, "rb") as f:
+            header_size_bytes = f.read(8)
+            if len(header_size_bytes) != 8: return False
+            header_size = struct.unpack("<Q", header_size_bytes)[0]
+            if header_size > os.path.getsize(path) or header_size <= 0:
+                return False
+        return True
+    except:
+        return False
 def download_file(input_path, token, filename=None):
     """Downloads a file from URL or HF Repo."""
     local_path = TempDir / (filename if filename else "model.safetensors")
     if input_path.startswith("http"):
         print(f"Downloading from URL: {input_path}")
+        try:
+            response = requests.get(input_path, stream=True, timeout=30)
+            response.raise_for_status()
+            with open(local_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+        except Exception as e:
+            raise ValueError(f"Failed to download URL. Check your link. Error: {e}")
     else:
         print(f"Downloading from Repo: {input_path}")
         if not filename:
                         if "adapter" in f: filename = f
                 else:
                     filename = "adapter_model.bin"
+            except Exception as e:
                 filename = "adapter_model.safetensors"
+        try:
+            hf_hub_download(repo_id=input_path, filename=filename, token=token, local_dir=TempDir, local_dir_use_symlinks=False)
+            downloaded_path = TempDir / filename
+            if downloaded_path != local_path:
+                if local_path.exists(): os.remove(local_path)
+                shutil.move(downloaded_path, local_path)
+        except Exception as e:
+             raise ValueError(f"Failed to download from HF Repo. Check ID/Token. Error: {e}")
+    if not verify_safetensors(local_path):
+        raise ValueError(f"Downloaded file is NOT a valid safetensors file. Check your URL/Repo. (File size: {os.path.getsize(local_path)} bytes)")
     return local_path
 def get_key_stem(key):
     key = key.replace(".weight", "").replace(".bias", "")
     key = key.replace(".lora_down", "").replace(".lora_up", "")
     key = key.replace(".lora_A", "").replace(".lora_B", "")
 # TAB 1: UNIVERSAL MERGE (In-Place Memory Optimization)
 # =================================================================================
+def load_lora_to_memory(lora_path, precision_dtype=torch.bfloat16):
+    print(f"Loading LoRA from {lora_path} in {precision_dtype}...")
     state_dict = load_file(lora_path, device="cpu")
     pairs = {}
         else:
             if stem not in pairs:
                 pairs[stem] = {}
+            # Cast immediately to save RAM
+            tensor_low = v.to(dtype=precision_dtype)
             if "lora_down" in k or "lora_A" in k:
+                pairs[stem]["down"] = tensor_low
                 pairs[stem]["rank"] = v.shape[0]
             elif "lora_up" in k or "lora_B" in k:
+                pairs[stem]["up"] = tensor_low
     for stem in pairs:
         if stem in alphas:
     return pairs
+def merge_shard_logic(base_path, lora_pairs, scale, output_path, precision_dtype=torch.bfloat16):
     print(f"Loading base shard: {base_path}")
     base_state = load_file(base_path, device="cpu")
     lora_keys = set(lora_pairs.keys())
     keys_to_process = list(base_state.keys())
     for k in keys_to_process:
+        # Don't detach v yet, we modify in place
         v = base_state[k]
         base_stem = get_key_stem(k)
         match = None
                 if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
         if match and "down" in match and "up" in match:
+            # Weights are already in precision_dtype from load_lora_to_memory
             down = match["down"]
             up = match["up"]
             alpha = match["alpha"]
                 down = down.unsqueeze(-1).unsqueeze(-1)
                 up = up.unsqueeze(-1).unsqueeze(-1)
+            # Compute Delta in Low Precision
             try:
                 if len(up.shape) == 4:
                     delta = (up.squeeze() @ down.squeeze()).reshape(up.shape[0], down.shape[1], 1, 1)
             delta = delta * scaling
             valid_delta = True
+            # --- Dynamic Reshaping / Slicing ---
             if delta.shape == v.shape:
                 pass
             elif delta.shape[0] == v.shape[0] * 3:
     save_file(base_state, output_path)
     return True
+def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, output_repo, structure_repo, private, precision, progress=gr.Progress()):
     cleanup_temp()
     login(hf_token)
+    # Determine Dtype
     if precision == "bf16":
         dtype = torch.bfloat16
     elif precision == "fp16":
         dtype = torch.float32
     print(f"Selected Precision: {dtype}")
     try:
         api.create_repo(repo_id=output_repo, private=private, exist_ok=True, token=hf_token)
     except Exception as e:
         progress(0.2 + (0.8 * i/len(shards)), desc=f"Merging {shard}")
         local_shard = hf_hub_download(repo_id=base_repo, filename=shard, token=hf_token, local_dir=TempDir)
         merged_path = TempDir / "merged.safetensors"
+        # Pass precision preference
         merge_shard_logic(local_shard, lora_pairs, scale, merged_path, precision_dtype=dtype)
         api.upload_file(path_or_fileobj=merged_path, path_in_repo=shard, repo_id=output_repo, token=hf_token)
         os.remove(local_shard)
         if merged_path.exists(): os.remove(merged_path)
         gc.collect()