CMSManhattan
/

JiRack_GPT5_1b

Model card Files Files and versions

xet

Community

kgrabko commited on Dec 21, 2025

Commit

0ba2ceb

verified ·

1 Parent(s): eabe75d

Upload save_jirack_as_safe_tensors.py

Browse files

Files changed (1) hide show

save_jirack_as_safe_tensors.py +73 -0

save_jirack_as_safe_tensors.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright (c) 2025 CMS Manhattan
+# All rights reserved.
+# Author: Konstantin Vladimirovich Grabko
+# Email: grabko@cmsmanhattan.com
+# Phone: +1(516)777-0945
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+# Additional terms:
+# Any commercial use or distribution of this software or derivative works
+# requires explicit written permission from the copyright holder.
+import torch
+from safetensors.torch import save_file
+import os
+def save_jirack_sharded(model, directory="jirack_weights"):
+    """
+    Converts the current model weights into sharded safetensors format.
+    """
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    # Extract state dictionary
+    state_dict = model.state_dict()
+    keys = sorted(list(state_dict.keys()))
+    # Split keys into two halves for sharding
+    mid = len(keys) // 2
+    shard1 = {k: state_dict[k] for k in keys[:mid]}
+    shard2 = {k: state_dict[k] for k in keys[mid:]}
+    # Define paths according to your standard format
+    path1 = os.path.join(directory, "model-00001-of-00002.safetensors")
+    path2 = os.path.join(directory, "model-00002-of-00002.safetensors")
+    print(f"Saving Shard 1 ({len(shard1)} keys) -> {path1}")
+    save_file(shard1, path1)
+    print(f"Saving Shard 2 ({len(shard2)} keys) -> {path2}")
+    save_file(shard2, path2)
+    # Use the authorship method from your class
+    print(f"Done. Model by {model.get_author_info()} is now sharded.")
+# Run as a standalone script to convert an existing .pt checkpoint:
+if __name__ == "__main__":
+    # Import your specific model class
+    from JiRackPyTorch_GPT5_class_1b import JiRackPyTorch
+    # from JiRackPyTorch_GPT5_class_3b import JiRackPyTorch
+    # 1. Initialize the new architecture (with RoPE, SWA, etc.)
+    model = JiRackPyTorch()
+    # 2. Load existing .pt weights if they exist (optional)
+    # Using strict=False allows loading weights even if RoPE parameters are missing in the old file
+    old_weights = "old_model_1b.pt"
+    if os.path.exists(old_weights):
+        print(f"Merging old weights from {old_weights}...")
+        model.load_state_dict(torch.load(old_weights, map_location="cpu"), strict=False)
+    # 3. Save to the new sharded safetensors format
+    save_jirack_sharded(model)