Spaces:

366degrees
/

snp-universal-embedding

Sleeping

App Files Files Community

PunchNFIT commited on Oct 31, 2025

Commit

bcede05

1 Parent(s): 96c00df

Initial commit - SNP Universal Embedding model

Browse files

Files changed (7) hide show

Dockerfile +25 -0
api_inference.py +84 -0
config.json +6 -0
pytorch_model.bin +3 -0
requirements.txt +7 -0
snp_universal_embedding.py +148 -0
tokenizer.json +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.10-slim
+# Make absolutely sure the working directory exists
+RUN mkdir -p /app
+WORKDIR /app
+# Print working directory (for debugging)
+RUN echo "✅ Building from context:" && pwd && ls -R
+# Copy specific files explicitly by name
+COPY api_inference.py /app/api_inference.py
+COPY snp_universal_embedding.py /app/snp_universal_embedding.py
+COPY config.json /app/config.json
+COPY tokenizer.json /app/tokenizer.json
+COPY pytorch_model.bin /app/pytorch_model.bin
+COPY requirements.txt /app/requirements.txt
+# Install dependencies
+RUN pip install --no-cache-dir -r /app/requirements.txt
+# Expose Hugging Face port
+EXPOSE 7860
+# Run the app
+CMD ["python", "/app/api_inference.py"]

api_inference.py ADDED Viewed

	@@ -0,0 +1,84 @@

+# api_inference.py
+import torch
+from transformers import AutoTokenizer, AutoModel, AutoConfig
+from flask import Flask, request, jsonify
+import os, json
+app = Flask(__name__)
+# === Load Model ===
+MODEL_DIR = os.path.dirname(os.path.abspath(__file__))
+print(f"🔍 Loading model from {MODEL_DIR} ...")
+try:
+    # --- Register your custom model class ---
+    from transformers.models.auto.modeling_auto import MODEL_MAPPING
+    from snp_universal_embedding import CustomSNPModel
+    # Register custom class to handle 'custom_snp' type
+    class DummyConfig(AutoConfig):
+        model_type = "custom_snp"
+    MODEL_MAPPING.register(DummyConfig, CustomSNPModel)
+    # Load model and tokenizer
+    config = AutoConfig.from_pretrained(MODEL_DIR, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+    model = AutoModel.from_pretrained(MODEL_DIR, config=config, trust_remote_code=True)
+    model.eval()
+    print("✅ Custom SNP model loaded successfully.")
+except Exception as e:
+    print("❌ Error loading custom model:", e)
+    raise e
+# === Define Endpoints ===
+@app.route("/")
+def index():
+    return jsonify({
+        "status": "SNP Universal Embedding API running",
+        "endpoints": ["/embed", "/reason"]
+    })
+@app.route("/embed", methods=["POST"])
+def embed():
+    try:
+        data = request.get_json()
+        text = data.get("text", "")
+        if not text:
+            return jsonify({"error": "No text provided"}), 400
+        inputs = tokenizer(text, return_tensors="pt")
+        with torch.no_grad():
+            outputs = model(**inputs)
+            if isinstance(outputs, dict) and "last_hidden_state" in outputs:
+                embedding = outputs["last_hidden_state"].mean(dim=1).squeeze().tolist()
+            else:
+                embedding = outputs.mean(dim=1).squeeze().tolist()
+        return jsonify({"embedding": embedding})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route("/health")
+def health():
+    return "ok", 200
+@app.route("/reason", methods=["POST"])
+def reason():
+    data = request.get_json()
+    text = data.get("text", "")
+    return jsonify({
+        "text": text,
+        "reasoning_status": "Feature in development for SNP reasoning structure"
+    })
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 8080))
+    app.run(host="0.0.0.0", port=port)

config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "model_type": "custom_snp",
+    "base_model": "bert-base-uncased",
+    "embedding_dimension": 6,
+    "description": "SNP-Universal-Embedding \u2014 distilled from emotional geometry via Substrate-Prism Neuron framework."
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20835793a996aa7a73fa44deb791be89a646e29769de91fae4e438e2234ea56e
+size 442758231

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+# requirements.txt
+torch
+transformers
+sentence-transformers
+flask
+numpy
+scikit-learn

snp_universal_embedding.py ADDED Viewed

	@@ -0,0 +1,148 @@

+# -*- coding: utf-8 -*-
+"""SNP-Universal-Embedding.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1z8p0PYKMZjd6IZ2FEgxtRddl7t_52iFA
+"""
+!pip uninstall -y tokenizers transformers sentence-transformers
+!pip cache purge
+!pip install -q torch==2.8.0+cu126 torchvision==0.23.0+cu126 torchaudio==2.8.0+cu126 --index-url https://download.pytorch.org/whl/cu126
+!pip install -q tokenizers==0.19.1 transformers==4.40.1 sentence-transformers==2.6.1
+!pip install -q torch==2.8.0+cu126 torchvision==0.23.0+cu126 torchaudio==2.8.0+cu126 --index-url https://download.pytorch.org/whl/cu126
+!pip install -q tokenizers==0.19.1 transformers==4.40.1 sentence-transformers==2.6.1
+import torch
+from sentence_transformers import SentenceTransformer
+from sentence_transformers.models import Pooling
+from transformers import AutoTokenizer, AutoModel
+print("✅ Environment ready")
+print("Torch:", torch.__version__)
+import torch.nn as nn
+from transformers import AutoModel
+class CustomSNPModel(nn.Module):
+    def __init__(self, base_model="roberta-base"):
+        super().__init__()
+        self.shared_encoder = AutoModel.from_pretrained(base_model)
+        hidden_size = self.shared_encoder.config.hidden_size
+        self.mirror_head = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
+        self.prism_head  = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
+        self.projection  = nn.Linear(hidden_size, 6) # Changed output dimension to 6
+    def forward(self, input_ids, attention_mask=None, token_type_ids=None):
+        outputs = self.shared_encoder(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids
+        )
+        cls = outputs.last_hidden_state[:, 0, :]  # [CLS] embedding
+        mirror = self.mirror_head(cls)
+        prism  = self.prism_head(cls)
+        proj   = self.projection(cls)
+        # 🧩 Instead of combining 768 and 6-D tensors, just output your 6-D Prism embedding
+        return proj
+print("✅ SNP architecture defined.")
+import os
+import torch
+from sentence_transformers import SentenceTransformer
+from sentence_transformers.models import Pooling
+from transformers import AutoTokenizer, AutoModel
+ckpt_path = "/content/custom_snp_model_greene.pt"
+assert os.path.exists(ckpt_path), "❌ Greene checkpoint not found."
+state_dict = torch.load(ckpt_path, map_location="cpu")
+if "projection.weight" in state_dict:
+    w = state_dict["projection.weight"]
+    if w.shape == torch.Size([768, 6]):  # Greene version
+        print("🔁 Transposing projection.weight to match current model shape...")
+        state_dict["projection.weight"] = w.T
+if "projection.bias" in state_dict:
+    b = state_dict["projection.bias"]
+    if b.shape == torch.Size([768]):  # Greene version
+        print("🔧 Adjusting projection.bias shape to match current model...")
+        state_dict["projection.bias"] = b[:6]  # keep first 6 or reshape accordingly
+# Remove distributed prefixes if any
+clean_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
+model = CustomSNPModel(base_model="bert-base-uncased")
+missing, unexpected = model.load_state_dict(clean_state_dict, strict=False)
+print(f"✅ Checkpoint loaded.\nMissing keys: {len(missing)} | Unexpected: {len(unexpected)}")
+# ============================================================
+# 🔹 Quick Embedding Test for CustomSNPModel
+# (Safe version that drops token_type_ids)
+# ============================================================
+import torch
+# Example text input
+text = "A student must decide between a scholarship and their family."
+# Tokenize
+inputs = tokenizer(text, return_tensors="pt")
+# Remove token_type_ids if your model doesn't expect it
+if "token_type_ids" in inputs:
+    del inputs["token_type_ids"]
+# Run inference
+with torch.no_grad():
+    output = model(**inputs)
+# Handle different output formats
+if isinstance(output, tuple):
+    emb = output[0]
+elif isinstance(output, dict):
+    emb = output.get("pooler_output", output.get("last_hidden_state"))
+else:
+    emb = output
+print("✅ Embedding generated successfully.")
+print("Embedding shape:", emb.shape if hasattr(emb, "shape") else type(emb))
+import os, torch, json
+from transformers import AutoTokenizer
+EXPORT_DIR = "/content/SNP_Universal_Embedding"
+os.makedirs(EXPORT_DIR, exist_ok=True)
+# Save model weights
+torch.save(model.state_dict(), os.path.join(EXPORT_DIR, "pytorch_model.bin"))
+# Save config manually (add your own details)
+config = {
+    "model_type": "custom_snp",
+    "base_model": "bert-base-uncased",
+    "embedding_dimension": 6,
+    "description": "SNP-Universal-Embedding — distilled from emotional geometry via Substrate-Prism Neuron framework."
+}
+with open(os.path.join(EXPORT_DIR, "config.json"), "w") as f:
+    json.dump(config, f, indent=4)
+# Save tokenizer
+tokenizer.save_pretrained(EXPORT_DIR)
+print("✅ Model and tokenizer saved to:", EXPORT_DIR)
+!ls -lh $EXPORT_DIR
+import shutil
+from google.colab import files
+ZIP_PATH = "/content/SNP-Universal-Embedding.zip"
+shutil.make_archive("/content/SNP-Universal-Embedding", 'zip', EXPORT_DIR)
+files.download(ZIP_PATH)

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff