Maxlegrec
/

ChessLC0

@@ -340,6 +340,9 @@ class BT4(nn.Module):
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """Load model from pretrained checkpoint (required by transformers)."""
         from transformers import AutoConfig
         # Load config
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True)
@@ -347,33 +350,76 @@ class BT4(nn.Module):
         # Create model with config
         model = cls(config=config)
-        # Load weights if available
-        try:
-            from safetensors.torch import load_file
-            import os
-            # Try safetensors first
             safetensors_path = os.path.join(pretrained_model_name_or_path, "model.safetensors")
             if os.path.exists(safetensors_path):
                 state_dict = load_file(safetensors_path)
-                model.load_state_dict(state_dict)
             else:
                 # Fall back to pytorch format
                 pt_path = os.path.join(pretrained_model_name_or_path, "model.pt")
-                if os.path.exists(pt_path):
-                    checkpoint = torch.load(pt_path, map_location="cpu")
-                    if isinstance(checkpoint, dict):
-                        if "state_dict" in checkpoint:
-                            model.load_state_dict(checkpoint["state_dict"])
-                        elif "model" in checkpoint:
-                            model.load_state_dict(checkpoint["model"])
-                        else:
-                            model.load_state_dict(checkpoint)
                     else:
-                        model.load_state_dict(checkpoint)
-        except Exception as e:
-            # If weights don't exist or fail to load, return model without weights
-            pass
         return model

     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """Load model from pretrained checkpoint (required by transformers)."""
         from transformers import AutoConfig
+        from huggingface_hub import hf_hub_download
+        from safetensors.torch import load_file
+        import os
         # Load config
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True)
         # Create model with config
         model = cls(config=config)
+        # Check if it's a HuggingFace Hub path or local path
+        is_hf_hub = "/" in pretrained_model_name_or_path and not os.path.isdir(pretrained_model_name_or_path)
+        if is_hf_hub:
+            # Download from HuggingFace Hub - try safetensors first
+            print("DEBUG: Downloading safetensors from HuggingFace...")
+            safetensors_path = hf_hub_download(
+                repo_id=pretrained_model_name_or_path,
+                filename="model.safetensors",
+                cache_dir=kwargs.get("cache_dir", None),
+                token=kwargs.get("token", None),
+            )
+            print(f"DEBUG: Loaded safetensors from {safetensors_path}")
+            state_dict = load_file(safetensors_path)
+            print(f"DEBUG: State dict has {len(state_dict)} keys")
+            # Debug: check embedding weight before loading
+            embedding_before = model.embedding.weight.sum().item()
+            expected_embedding = state_dict['embedding.weight'].sum().item()
+            print(f"DEBUG: Before loading - embedding: {embedding_before:.6f}, expected: {expected_embedding:.6f}")
+            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
+            print(f"DEBUG: load_state_dict returned - missing: {len(missing_keys)}, unexpected: {len(unexpected_keys)}")
+            # Debug: check embedding weight after loading
+            embedding_after = model.embedding.weight.sum().item()
+            print(f"DEBUG: After loading - embedding: {embedding_after:.6f}")
+            if missing_keys:
+                print(f"Warning: Missing keys when loading weights: {len(missing_keys)} keys")
+            if unexpected_keys:
+                print(f"Warning: Unexpected keys when loading weights: {len(unexpected_keys)} keys")
+            # Verify weights loaded
+            if abs(embedding_after - expected_embedding) > 1e-5:
+                print(f"ERROR: Weights did not load correctly!")
+                print(f"  Before: {embedding_before:.6f}, Expected: {expected_embedding:.6f}, After: {embedding_after:.6f}")
+                # Force reload
+                print("DEBUG: Attempting to reload weights...")
+                model.load_state_dict(state_dict, strict=False)
+                embedding_after2 = model.embedding.weight.sum().item()
+                print(f"  After reload: {embedding_after2:.6f}")
+        else:
+            # Local path - try safetensors first
             safetensors_path = os.path.join(pretrained_model_name_or_path, "model.safetensors")
             if os.path.exists(safetensors_path):
                 state_dict = load_file(safetensors_path)
+                missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
+                if missing_keys:
+                    print(f"Warning: Missing keys when loading weights: {len(missing_keys)} keys")
+                if unexpected_keys:
+                    print(f"Warning: Unexpected keys when loading weights: {len(unexpected_keys)} keys")
             else:
                 # Fall back to pytorch format
                 pt_path = os.path.join(pretrained_model_name_or_path, "model.pt")
+                checkpoint = torch.load(pt_path, map_location="cpu")
+                if isinstance(checkpoint, dict):
+                    if "state_dict" in checkpoint:
+                        state_dict = checkpoint["state_dict"]
+                    elif "model" in checkpoint:
+                        state_dict = checkpoint["model"]
                     else:
+                        state_dict = checkpoint
+                else:
+                    state_dict = checkpoint
+                missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
+                if missing_keys:
+                    print(f"Warning: Missing keys when loading weights: {len(missing_keys)} keys")
+                if unexpected_keys:
+                    print(f"Warning: Unexpected keys when loading weights: {len(unexpected_keys)} keys")
         return model