stanfordnlp/snli
Viewer • Updated • 570k • 22.5k • 94
Dhvani v6 builds on v5's three-head architecture with a key improvement: the vyanjana (expressive) head now trains on real formal/casual style pairs from the start, enabling stronger expression modeling.
Input → Qwen3-1.7B (LoRA) → Mean Pool (2048)
→ Shared Trunk (1024)
→ Surface Head (512) — graded semantic similarity
→ Abhida Head (512) — compression-invariant meaning
→ Vyanjana Head (512) — expression style (trained on real data)
→ Full Embedding (1536) = concat(all three)
| Head | Training Signal | What It Captures | v6 Enhancement |
|---|---|---|---|
| Surface | Soft in-batch InfoNCE (NLI) | Graded meaning closeness | Same as v5 |
| Abhida | Hard InfoNCE + variance (NLI) | Invariant propositional content | Same as v5 |
| Vyanjana | Real formal/casual pairs | Expression style, register, tone | New: Real style data |
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer
from peft import get_peft_model, LoraConfig, TaskType
from huggingface_hub import hf_hub_download
class DhvaniV6(nn.Module):
def __init__(self, cfg):
super().__init__()
base = AutoModel.from_pretrained(
cfg['base_model'], torch_dtype=torch.bfloat16,
attn_implementation='eager', trust_remote_code=True
)
lora_config = LoraConfig(
r=cfg['lora_r'], lora_alpha=cfg['lora_alpha'],
lora_dropout=cfg['lora_dropout'],
target_modules=cfg['lora_targets'],
bias='none', task_type=TaskType.FEATURE_EXTRACTION
)
self.base = get_peft_model(base, lora_config)
self.trunk = nn.Sequential(
nn.Linear(cfg['hidden_dim'], cfg['trunk_dim']),
nn.LayerNorm(cfg['trunk_dim']),
nn.GELU(),
)
self.abhida_head = nn.Sequential(
nn.Linear(cfg['trunk_dim'], cfg['subspace_dim']),
nn.LayerNorm(cfg['subspace_dim']),
)
self.vyanjana_head = nn.Sequential(
nn.Linear(cfg['trunk_dim'], cfg['subspace_dim']),
nn.LayerNorm(cfg['subspace_dim']),
)
self.surface_head = nn.Sequential(
nn.Linear(cfg['trunk_dim'], cfg['subspace_dim']),
nn.LayerNorm(cfg['subspace_dim']),
)
@staticmethod
def mean_pool(hidden, mask):
m = mask.unsqueeze(-1).float()
return (hidden * m).sum(1) / m.sum(1).clamp(min=1e-9)
def encode_tokens(self, input_ids, attention_mask):
out = self.base(input_ids=input_ids, attention_mask=attention_mask)
pooled = self.mean_pool(out.last_hidden_state.float(), attention_mask)
trunk = self.trunk(pooled)
return {
'abhida': F.normalize(self.abhida_head(trunk), p=2, dim=-1),
'vyanjana': F.normalize(self.vyanjana_head(trunk), p=2, dim=-1),
'surface': F.normalize(self.surface_head(trunk), p=2, dim=-1),
'full': F.normalize(torch.cat([
self.abhida_head(trunk),
self.vyanjana_head(trunk),
self.surface_head(trunk),
], dim=-1), p=2, dim=-1),
}
# Load model
ckpt_path = hf_hub_download(repo_id="rb512/dhvani-v6", filename="v6_best.pt")
ckpt = torch.load(ckpt_path, map_location="cpu", weights_only=False)
cfg = ckpt["config"]
tokenizer = AutoTokenizer.from_pretrained(cfg['base_model'], trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = DhvaniV6(cfg)
model.base.load_state_dict(ckpt["lora"])
model.trunk.load_state_dict(ckpt["trunk"])
model.abhida_head.load_state_dict(ckpt["abhida_head"])
model.vyanjana_head.load_state_dict(ckpt["vyanjana_head"])
model.surface_head.load_state_dict(ckpt["surface_head"])
model.eval()
# Encode text
texts = ["Hello world", "Greetings, world"]
enc = tokenizer(texts, max_length=128, truncation=True, padding='max_length', return_tensors='pt')
with torch.no_grad():
embeddings = model.encode_tokens(enc['input_ids'], enc['attention_mask'])
print("Surface embeddings:", embeddings['surface'].shape) # semantic similarity
print("Abhida embeddings:", embeddings['abhida'].shape) # meaning-invariant
print("Vyanjana embeddings:", embeddings['vyanjana'].shape) # expression style
print("Full embeddings:", embeddings['full'].shape) # concatenated
Named after Anandavardhana's 9th-century theory of dhvani (resonance) in Sanskrit poetics:
v6 enhances the vyañjanā component with real stylistic supervision.
@article{dhvani2026,
title={Dhvani: Structured Multi-Head Embeddings that Separate What Was Said from How It Was Said},
author={Anonymous},
year={2026}
}
Apache 2.0