366degrees
/

snp-universal-embedding

@@ -1,24 +1,20 @@
-# Dockerfile
-# Use the base image you selected
-FROM huggingface/transformers-pytorch-cpu:latest
-# Set working directory inside the container
 WORKDIR /app
-# Copy the requirements file first
-COPY requirements.txt .
-# Install dependencies from your *fixed* requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
-# Copy all your model files and scripts into the container
 COPY . .
-# Expose the port your Flask app runs on
-EXPOSE 7860
-# Set environment variable to trust your custom code
-ENV HF_TRUST_REMOTE_CODE=true
-# Run your api_inference.py script
-CMD ["python", "api_inference.py"]

+# Use lightweight Python base image
+FROM python:3.10-slim
+# Set working directory
 WORKDIR /app
+# Copy all local files into container
 COPY . .
+# Ensure custom architecture file is available
+COPY ./snp_universal_embedding.py /app/snp_universal_embedding.py
+# Install dependencies
+RUN pip install --no-cache-dir torch transformers flask sentence-transformers
+# Expose Cloud Run port
+ENV PORT=8080
+# Run the API
+CMD ["python", "api_inference.py"]

api_inference.py CHANGED Viewed

@@ -2,7 +2,7 @@
 import torch
 from transformers import AutoTokenizer, AutoModel, AutoConfig
 from flask import Flask, request, jsonify
-import os
 app = Flask(__name__)
@@ -12,15 +12,28 @@ MODEL_DIR = os.path.dirname(os.path.abspath(__file__))
 print(f"🔍 Loading model from {MODEL_DIR} ...")
 try:
-    config = AutoConfig.from_pretrained(MODEL_DIR)
     tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
     model = AutoModel.from_pretrained(MODEL_DIR, config=config, trust_remote_code=True)
     model.eval()
-    print("✅ SNP Universal Embedding model loaded successfully.")
 except Exception as e:
-    print("❌ Error loading model:", e)
     raise e
 # === Define Endpoints ===
 @app.route("/")
 def index():
@@ -29,6 +42,7 @@ def index():
         "endpoints": ["/embed", "/reason"]
     })
 @app.route("/embed", methods=["POST"])
 def embed():
     try:
@@ -40,12 +54,16 @@ def embed():
         inputs = tokenizer(text, return_tensors="pt")
         with torch.no_grad():
             outputs = model(**inputs)
-            embedding = outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
         return jsonify({"embedding": embedding})
     except Exception as e:
         return jsonify({"error": str(e)}), 500
 @app.route("/health")
 def health():
     return "ok", 200
@@ -60,6 +78,7 @@ def reason():
         "reasoning_status": "Feature in development for SNP reasoning structure"
     })
 if __name__ == "__main__":
-    port = int(os.environ.get("PORT", 7860))
     app.run(host="0.0.0.0", port=port)

 import torch
 from transformers import AutoTokenizer, AutoModel, AutoConfig
 from flask import Flask, request, jsonify
+import os, json
 app = Flask(__name__)
 print(f"🔍 Loading model from {MODEL_DIR} ...")
 try:
+    # --- Register your custom model class ---
+    from transformers.models.auto.modeling_auto import MODEL_MAPPING
+    from snp_universal_embedding import CustomSNPModel
+    # Register custom class to handle 'custom_snp' type
+    class DummyConfig(AutoConfig):
+        model_type = "custom_snp"
+    MODEL_MAPPING.register(DummyConfig, CustomSNPModel)
+    # Load model and tokenizer
+    config = AutoConfig.from_pretrained(MODEL_DIR, trust_remote_code=True)
     tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
     model = AutoModel.from_pretrained(MODEL_DIR, config=config, trust_remote_code=True)
     model.eval()
+    print("✅ Custom SNP model loaded successfully.")
 except Exception as e:
+    print("❌ Error loading custom model:", e)
     raise e
 # === Define Endpoints ===
 @app.route("/")
 def index():
         "endpoints": ["/embed", "/reason"]
     })
 @app.route("/embed", methods=["POST"])
 def embed():
     try:
         inputs = tokenizer(text, return_tensors="pt")
         with torch.no_grad():
             outputs = model(**inputs)
+            if isinstance(outputs, dict) and "last_hidden_state" in outputs:
+                embedding = outputs["last_hidden_state"].mean(dim=1).squeeze().tolist()
+            else:
+                embedding = outputs.mean(dim=1).squeeze().tolist()
         return jsonify({"embedding": embedding})
     except Exception as e:
         return jsonify({"error": str(e)}), 500
 @app.route("/health")
 def health():
     return "ok", 200
         "reasoning_status": "Feature in development for SNP reasoning structure"
     })
 if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 8080))
     app.run(host="0.0.0.0", port=port)

snp_universal_embedding.py ADDED Viewed

	@@ -0,0 +1,148 @@

+# -*- coding: utf-8 -*-
+"""SNP-Universal-Embedding.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1z8p0PYKMZjd6IZ2FEgxtRddl7t_52iFA
+"""
+!pip uninstall -y tokenizers transformers sentence-transformers
+!pip cache purge
+!pip install -q torch==2.8.0+cu126 torchvision==0.23.0+cu126 torchaudio==2.8.0+cu126 --index-url https://download.pytorch.org/whl/cu126
+!pip install -q tokenizers==0.19.1 transformers==4.40.1 sentence-transformers==2.6.1
+!pip install -q torch==2.8.0+cu126 torchvision==0.23.0+cu126 torchaudio==2.8.0+cu126 --index-url https://download.pytorch.org/whl/cu126
+!pip install -q tokenizers==0.19.1 transformers==4.40.1 sentence-transformers==2.6.1
+import torch
+from sentence_transformers import SentenceTransformer
+from sentence_transformers.models import Pooling
+from transformers import AutoTokenizer, AutoModel
+print("✅ Environment ready")
+print("Torch:", torch.__version__)
+import torch.nn as nn
+from transformers import AutoModel
+class CustomSNPModel(nn.Module):
+    def __init__(self, base_model="roberta-base"):
+        super().__init__()
+        self.shared_encoder = AutoModel.from_pretrained(base_model)
+        hidden_size = self.shared_encoder.config.hidden_size
+        self.mirror_head = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
+        self.prism_head  = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
+        self.projection  = nn.Linear(hidden_size, 6) # Changed output dimension to 6
+    def forward(self, input_ids, attention_mask=None, token_type_ids=None):
+        outputs = self.shared_encoder(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids
+        )
+        cls = outputs.last_hidden_state[:, 0, :]  # [CLS] embedding
+        mirror = self.mirror_head(cls)
+        prism  = self.prism_head(cls)
+        proj   = self.projection(cls)
+        # 🧩 Instead of combining 768 and 6-D tensors, just output your 6-D Prism embedding
+        return proj
+print("✅ SNP architecture defined.")
+import os
+import torch
+from sentence_transformers import SentenceTransformer
+from sentence_transformers.models import Pooling
+from transformers import AutoTokenizer, AutoModel
+ckpt_path = "/content/custom_snp_model_greene.pt"
+assert os.path.exists(ckpt_path), "❌ Greene checkpoint not found."
+state_dict = torch.load(ckpt_path, map_location="cpu")
+if "projection.weight" in state_dict:
+    w = state_dict["projection.weight"]
+    if w.shape == torch.Size([768, 6]):  # Greene version
+        print("🔁 Transposing projection.weight to match current model shape...")
+        state_dict["projection.weight"] = w.T
+if "projection.bias" in state_dict:
+    b = state_dict["projection.bias"]
+    if b.shape == torch.Size([768]):  # Greene version
+        print("🔧 Adjusting projection.bias shape to match current model...")
+        state_dict["projection.bias"] = b[:6]  # keep first 6 or reshape accordingly
+# Remove distributed prefixes if any
+clean_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
+model = CustomSNPModel(base_model="bert-base-uncased")
+missing, unexpected = model.load_state_dict(clean_state_dict, strict=False)
+print(f"✅ Checkpoint loaded.\nMissing keys: {len(missing)} | Unexpected: {len(unexpected)}")
+# ============================================================
+# 🔹 Quick Embedding Test for CustomSNPModel
+# (Safe version that drops token_type_ids)
+# ============================================================
+import torch
+# Example text input
+text = "A student must decide between a scholarship and their family."
+# Tokenize
+inputs = tokenizer(text, return_tensors="pt")
+# Remove token_type_ids if your model doesn't expect it
+if "token_type_ids" in inputs:
+    del inputs["token_type_ids"]
+# Run inference
+with torch.no_grad():
+    output = model(**inputs)
+# Handle different output formats
+if isinstance(output, tuple):
+    emb = output[0]
+elif isinstance(output, dict):
+    emb = output.get("pooler_output", output.get("last_hidden_state"))
+else:
+    emb = output
+print("✅ Embedding generated successfully.")
+print("Embedding shape:", emb.shape if hasattr(emb, "shape") else type(emb))
+import os, torch, json
+from transformers import AutoTokenizer
+EXPORT_DIR = "/content/SNP_Universal_Embedding"
+os.makedirs(EXPORT_DIR, exist_ok=True)
+# Save model weights
+torch.save(model.state_dict(), os.path.join(EXPORT_DIR, "pytorch_model.bin"))
+# Save config manually (add your own details)
+config = {
+    "model_type": "custom_snp",
+    "base_model": "bert-base-uncased",
+    "embedding_dimension": 6,
+    "description": "SNP-Universal-Embedding — distilled from emotional geometry via Substrate-Prism Neuron framework."
+}
+with open(os.path.join(EXPORT_DIR, "config.json"), "w") as f:
+    json.dump(config, f, indent=4)
+# Save tokenizer
+tokenizer.save_pretrained(EXPORT_DIR)
+print("✅ Model and tokenizer saved to:", EXPORT_DIR)
+!ls -lh $EXPORT_DIR
+import shutil
+from google.colab import files
+ZIP_PATH = "/content/SNP-Universal-Embedding.zip"
+shutil.make_archive("/content/SNP-Universal-Embedding", 'zip', EXPORT_DIR)
+files.download(ZIP_PATH)