366degrees
/

snp-universal-embedding

@@ -1,148 +0,0 @@
-# -*- coding: utf-8 -*-
-"""SNP-Universal-Embedding.ipynb
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/drive/1z8p0PYKMZjd6IZ2FEgxtRddl7t_52iFA
-"""
-!pip uninstall -y tokenizers transformers sentence-transformers
-!pip cache purge
-!pip install -q torch==2.8.0+cu126 torchvision==0.23.0+cu126 torchaudio==2.8.0+cu126 --index-url https://download.pytorch.org/whl/cu126
-!pip install -q tokenizers==0.19.1 transformers==4.40.1 sentence-transformers==2.6.1
-!pip install -q torch==2.8.0+cu126 torchvision==0.23.0+cu126 torchaudio==2.8.0+cu126 --index-url https://download.pytorch.org/whl/cu126
-!pip install -q tokenizers==0.19.1 transformers==4.40.1 sentence-transformers==2.6.1
-import torch
-from sentence_transformers import SentenceTransformer
-from sentence_transformers.models import Pooling
-from transformers import AutoTokenizer, AutoModel
-print("✅ Environment ready")
-print("Torch:", torch.__version__)
-import torch.nn as nn
-from transformers import AutoModel
-class CustomSNPModel(nn.Module):
-    def __init__(self, base_model="roberta-base"):
-        super().__init__()
-        self.shared_encoder = AutoModel.from_pretrained(base_model)
-        hidden_size = self.shared_encoder.config.hidden_size
-        self.mirror_head = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
-        self.prism_head  = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
-        self.projection  = nn.Linear(hidden_size, 6) # Changed output dimension to 6
-    def forward(self, input_ids, attention_mask=None, token_type_ids=None):
-        outputs = self.shared_encoder(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids
-        )
-        cls = outputs.last_hidden_state[:, 0, :]  # [CLS] embedding
-        mirror = self.mirror_head(cls)
-        prism  = self.prism_head(cls)
-        proj   = self.projection(cls)
-        # 🧩 Instead of combining 768 and 6-D tensors, just output your 6-D Prism embedding
-        return proj
-print("✅ SNP architecture defined.")
-import os
-import torch
-from sentence_transformers import SentenceTransformer
-from sentence_transformers.models import Pooling
-from transformers import AutoTokenizer, AutoModel
-ckpt_path = "/content/custom_snp_model_greene.pt"
-assert os.path.exists(ckpt_path), "❌ Greene checkpoint not found."
-state_dict = torch.load(ckpt_path, map_location="cpu")
-if "projection.weight" in state_dict:
-    w = state_dict["projection.weight"]
-    if w.shape == torch.Size([768, 6]):  # Greene version
-        print("🔁 Transposing projection.weight to match current model shape...")
-        state_dict["projection.weight"] = w.T
-if "projection.bias" in state_dict:
-    b = state_dict["projection.bias"]
-    if b.shape == torch.Size([768]):  # Greene version
-        print("🔧 Adjusting projection.bias shape to match current model...")
-        state_dict["projection.bias"] = b[:6]  # keep first 6 or reshape accordingly
-# Remove distributed prefixes if any
-clean_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
-model = CustomSNPModel(base_model="bert-base-uncased")
-missing, unexpected = model.load_state_dict(clean_state_dict, strict=False)
-print(f"✅ Checkpoint loaded.\nMissing keys: {len(missing)} | Unexpected: {len(unexpected)}")
-# ============================================================
-# 🔹 Quick Embedding Test for CustomSNPModel
-# (Safe version that drops token_type_ids)
-# ============================================================
-import torch
-# Example text input
-text = "A student must decide between a scholarship and their family."
-# Tokenize
-inputs = tokenizer(text, return_tensors="pt")
-# Remove token_type_ids if your model doesn't expect it
-if "token_type_ids" in inputs:
-    del inputs["token_type_ids"]
-# Run inference
-with torch.no_grad():
-    output = model(**inputs)
-# Handle different output formats
-if isinstance(output, tuple):
-    emb = output[0]
-elif isinstance(output, dict):
-    emb = output.get("pooler_output", output.get("last_hidden_state"))
-else:
-    emb = output
-print("✅ Embedding generated successfully.")
-print("Embedding shape:", emb.shape if hasattr(emb, "shape") else type(emb))
-import os, torch, json
-from transformers import AutoTokenizer
-EXPORT_DIR = "/content/SNP_Universal_Embedding"
-os.makedirs(EXPORT_DIR, exist_ok=True)
-# Save model weights
-torch.save(model.state_dict(), os.path.join(EXPORT_DIR, "pytorch_model.bin"))
-# Save config manually (add your own details)
-config = {
-    "model_type": "custom_snp",
-    "base_model": "bert-base-uncased",
-    "embedding_dimension": 6,
-    "description": "SNP-Universal-Embedding — distilled from emotional geometry via Substrate-Prism Neuron framework."
-}
-with open(os.path.join(EXPORT_DIR, "config.json"), "w") as f:
-    json.dump(config, f, indent=4)
-# Save tokenizer
-tokenizer.save_pretrained(EXPORT_DIR)
-print("✅ Model and tokenizer saved to:", EXPORT_DIR)
-!ls -lh $EXPORT_DIR
-import shutil
-from google.colab import files
-ZIP_PATH = "/content/SNP-Universal-Embedding.zip"
-shutil.make_archive("/content/SNP-Universal-Embedding", 'zip', EXPORT_DIR)
-files.download(ZIP_PATH)