Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /model /tinymind-apex /neural_core.py

bbkdevops

about 1 month ago

download

raw

3.26 kB

	from __future__ import annotations

	import argparse
	import json
	from pathlib import Path

	import torch
	import torch.nn as nn


	ROOT = Path(r"D:\ad\tinymind\model\tinymind-apex")


	class NeuralCore(nn.Module):
	def __init__(self, vocab_size: int, domain_count: int, tool_count: int, emb: int = 96, hidden: int = 192):
	super().__init__()
	self.embedding = nn.Embedding(vocab_size, emb, padding_idx=0)
	self.conv3 = nn.Conv1d(emb, hidden, kernel_size=3, padding=1)
	self.conv5 = nn.Conv1d(emb, hidden, kernel_size=5, padding=2)
	self.norm = nn.LayerNorm(hidden * 2)
	self.dropout = nn.Dropout(0.0)
	self.domain_head = nn.Linear(hidden * 2, domain_count)
	self.tool_head = nn.Linear(hidden * 2, tool_count)
	self.risk_head = nn.Linear(hidden * 2, 1)

	def encode(self, x):
	emb = self.embedding(x).transpose(1, 2)
	h3 = torch.relu(self.conv3(emb)).amax(dim=2)
	h5 = torch.relu(self.conv5(emb)).amax(dim=2)
	h = torch.cat([h3, h5], dim=1)
	return self.dropout(self.norm(h))

	def forward(self, x):
	h = self.encode(x)
	return {
	"domain": self.domain_head(h),
	"tool": self.tool_head(h),
	"risk": self.risk_head(h).squeeze(-1),
	}


	class TinyMindNeuralCore:
	def __init__(self, root: Path = ROOT):
	ckpt = torch.load(root / "artifacts" / "neural_core" / "tinymind_neural_core.pt", map_location="cpu", weights_only=False)
	self.vocab = ckpt["vocab"]
	self.domains = ckpt["domains"]
	self.tools = ckpt["tools"]
	self.id_to_domain = {v: k for k, v in self.domains.items()}
	self.id_to_tool = {v: k for k, v in self.tools.items()}
	self.max_len = ckpt["max_len"]
	cfg = ckpt["config"]
	self.model = NeuralCore(len(self.vocab), len(self.domains), len(self.tools), cfg["emb"], cfg["hidden"])
	self.model.load_state_dict(ckpt["state_dict"])
	self.model.eval()

	def encode(self, text: str):
	ids = [self.vocab.get(ch, 1) for ch in text[: self.max_len]]
	ids += [0] * (self.max_len - len(ids))
	return torch.tensor([ids], dtype=torch.long)

	def predict(self, text: str):
	with torch.no_grad():
	out = self.model(self.encode(text))
	domain_prob = torch.softmax(out["domain"], dim=1)[0]
	tool_prob = torch.softmax(out["tool"], dim=1)[0]
	risk_prob = torch.sigmoid(out["risk"])[0].item()
	domain_i = int(domain_prob.argmax().item())
	tool_i = int(tool_prob.argmax().item())
	return {
	"domain": {"label": self.id_to_domain[domain_i], "confidence": round(float(domain_prob[domain_i]), 6)},
	"tool_policy": {"label": self.id_to_tool[tool_i], "confidence": round(float(tool_prob[tool_i]), 6)},
	"high_risk_probability": round(float(risk_prob), 6),
	"model": "tinymind-neural-core",
	}


	def main() -> int:
	parser = argparse.ArgumentParser()
	parser.add_argument("text")
	args = parser.parse_args()
	core = TinyMindNeuralCore()
	print(json.dumps(core.predict(args.text), ensure_ascii=False, indent=2))
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())

Xet Storage Details

Size:: 3.26 kB
Xet hash:: 51894963d8c78e49f205e174e18f2e703feb4a3542a45d5a3f913ea8baa1a6c6

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.