File size: 6,057 Bytes

"""
coding = utf-8
Copyright 2026 Rikka Botan. All rights reserved
Licensed under "MIT License"
Stable Static Embedding official PyTorch implementation
"""

from __future__ import annotations
import os
from pathlib import Path
from safetensors.torch import save_file as save_safetensors_file
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from typing import Dict
from dataclasses import dataclass
from tokenizers import Tokenizer
from transformers import PreTrainedTokenizerFast
from sentence_transformers.models.InputModule import InputModule
from safetensors.torch import load_file


def quantize_q4_k_m(weight: torch.Tensor):
    """
    weight: (vocab, dim)
    returns: packed uint8 + scale + zero
    """
    w = weight.detach().cpu().numpy().astype(np.float32)

    scales = np.max(np.abs(w), axis=1, keepdims=True) + 1e-8
    w_norm = w / scales

    q = np.clip(np.round((w_norm + 1) * 7.5), 0, 15).astype(np.uint8)

    # pack 2x4bit -> uint8
    packed = (q[:, 0::2] << 4) | q[:, 1::2]

    return {
        "packed": packed,
        "scales": scales.astype(np.float32),
    }


def dequantize_q4_k_m(packed: np.ndarray, scales: np.ndarray):
    hi = (packed >> 4) & 0xF
    lo = packed & 0xF

    q = np.empty((packed.shape[0], packed.shape[1]*2), dtype=np.uint8)
    q[:, 0::2] = hi
    q[:, 1::2] = lo

    w = (q.astype(np.float32) / 7.5) - 1.0
    w = w * scales
    return torch.from_numpy(w)


class SeparableDyT(nn.Module):
    def __init__(
        self,
        hidden_dim: int,
        alpha_init: float = 0.5
    ):
        super().__init__()
        self.alpha = nn.Parameter(alpha_init*torch.ones(hidden_dim))
        self.beta = nn.Parameter(torch.ones(hidden_dim))
        self.bias = nn.Parameter(torch.zeros(hidden_dim))
    
    def forward(
        self,
        x: torch.Tensor
    ) -> torch.Tensor:
        x = self.beta * F.tanh(self.alpha * x + self.bias)
        return x


class SSEQ(InputModule):
    """
    Stable Static Embedding (SSE)
    StaticEmbedding-compatible Sentence-Transformers module
    """

    def __init__(
        self,
        tokenizer: Tokenizer | PreTrainedTokenizerFast,
        vocab_size: int,
        hidden_dim: int = 1024,
        **kwargs,
    ):
        super().__init__()

        if isinstance(tokenizer, PreTrainedTokenizerFast):
            tokenizer = tokenizer._tokenizer
        elif not isinstance(tokenizer, Tokenizer):
            raise ValueError("Tokenizer must be a fast (Rust) tokenizer")

        self.tokenizer: Tokenizer = tokenizer
        self.tokenizer.no_padding()

        self.embedding = nn.EmbeddingBag(vocab_size, hidden_dim)
        self.dyt = SeparableDyT(hidden_dim)

        self.embedding_dim = hidden_dim

        # For model card compatibility
        self.base_model = kwargs.get("base_model", None)

    # Tokenization (StaticEmbedding-compatible)
    def tokenize(
        self,
        texts: list[str],
        **kwargs
    ) -> dict[str, torch.Tensor]:
        encodings = self.tokenizer.encode_batch(texts, add_special_tokens=False)
        encodings_ids = [encoding.ids for encoding in encodings]

        offsets = torch.from_numpy(
            np.cumsum(
                [0] + [len(token_ids) for token_ids in encodings_ids[:-1]]
            )
        )
        input_ids = torch.tensor(
            [token_id for token_ids in encodings_ids for token_id in token_ids],
            dtype=torch.long
        )
        return {
            "input_ids": input_ids,
            "offsets": offsets
        }

    # Forward
    def forward(
        self,
        features: Dict[str, torch.Tensor],
        **kwargs,
    ) -> Dict[str, torch.Tensor]:
        x = self.embedding(features["input_ids"], features["offsets"])
        x = self.dyt(x)
        features["sentence_embedding"] = x
        return features

    # Required APIs
    def get_sentence_embedding_dimension(self) -> int:
        return self.embedding_dim

    @property
    def max_seq_length(self) -> int:
        return torch.inf
    
    def save(self, output_path: str):
        os.makedirs(output_path, exist_ok=True)

        state = self.state_dict()

        emb = state["embedding.weight"]
        q = quantize_q4_k_m(emb)

        del state["embedding.weight"]

        save_safetensors_file(
            state,
            os.path.join(output_path, "model_rest.safetensors"),
        )

        with open(os.path.join(output_path, "embedding.q4_k_m.bin"), "wb") as f:
            f.write(q["packed"].tobytes())
            f.write(q["scales"].tobytes())

        self.tokenizer.save(
            str(Path(output_path) / "tokenizer.json")
        )
    
    @classmethod
    def load(cls, model_path: str):

        tokenizer = Tokenizer.from_file(
            os.path.join(model_path, "tokenizer.json")
        )

        state = load_file(
            os.path.join(model_path, "model_rest.safetensors"),
            device="cpu"
        )

        # read q4 binary
        bin_path = os.path.join(model_path, "embedding.q4_k_m.bin")
        with open(bin_path, "rb") as f:
            raw = f.read()

        hidden = state["dyt.alpha"].shape[0]
        total_uint8 = len(raw)

        bytes_per_row = hidden // 2 + 4
        vocab = total_uint8 // bytes_per_row

        packed_size = vocab * hidden // 2

        packed = np.frombuffer(raw[:packed_size], dtype=np.uint8)
        scales = np.frombuffer(raw[packed_size:], dtype=np.float32)

        packed = packed.reshape(vocab, hidden // 2)
        scales = scales.reshape(vocab, 1)

        emb = dequantize_q4_k_m(packed, scales)

        # rebuild model
        model = cls(
            tokenizer=tokenizer,
            vocab_size=emb.shape[0],
            hidden_dim=emb.shape[1]
        )

        state["embedding.weight"] = emb
        model.load_state_dict(state)

        return model


@dataclass
class SSESforzandoConfig:
    hidden_dim: int = 512
    vocab_size: int = 30522


@dataclass
class SSEForzandoConfig:
    hidden_dim: int = 384
    vocab_size: int = 30522