Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /model /tinymind-apex /tinymind_apex.py

bbkdevops

about 1 month ago

download

raw

4.96 kB

	from __future__ import annotations

	import argparse
	import json
	from pathlib import Path
	from typing import Any

	import joblib

	try:
	from neural_core import TinyMindNeuralCore
	except Exception:
	TinyMindNeuralCore = None


	ROOT = Path(r"D:\ad\tinymind\model\tinymind-apex")


	class TinyMindApex:
	def __init__(self, root: Path = ROOT):
	artifact_dir = root / "artifacts"
	bundle = joblib.load(artifact_dir / "tinymind_apex_model.joblib")
	self.vectorizer = bundle["vectorizer"]
	self.nn = bundle["nn"]
	self.records = [
	json.loads(line)
	for line in (artifact_dir / "records.jsonl").read_text(encoding="utf-8").splitlines()
	if line.strip()
	]
	self.manifest = json.loads((artifact_dir / "manifest.json").read_text(encoding="utf-8"))
	self.neural = None
	if TinyMindNeuralCore is not None:
	try:
	self.neural = TinyMindNeuralCore(root)
	except Exception:
	self.neural = None

	def generate(self, query: str, top_k: int = 5) -> dict[str, Any]:
	query_lower = query.lower()
	high_risk_terms = ["delete system32", "password", "ข้อมูลหลุด", "leaked", "disable antivirus", "ลบรหัส", "รหัสผ่าน"]
	neural_prediction = self.neural.predict(query) if self.neural is not None else None
	if any(term in query_lower for term in high_risk_terms):
	return {
	"answer": "คำขอนี้มีความเสี่ยงด้าน privacy/destructive/license หรือข้อมูลรั่วไหล จึงต้องหยุดและเปลี่ยนเป็นทางเลือกที่ปลอดภัย เช่น audit แบบ read-only หรือรวบรวมข้อมูล lawful/open sources เท่านั้น.",
	"suggested_tool_calls": [
	{
	"name": "user.confirm",
	"arguments": {
	"question": "This request is high-risk. Confirm a safe alternative such as lawful source collection or read-only audit instead?",
	"risk": "privacy",
	},
	}
	],
	"matches": [],
	"safety": {
	"notes": ["High-risk wording detected. Refusal/confirmation override applied before retrieval."],
	"default_policy": "read-only first, reversible changes only, confirmation for privileged/destructive/privacy/license-risk actions",
	},
	"model_manifest": self.manifest,
	"neural_prediction": neural_prediction,
	}

	query_vec = self.vectorizer.transform([query])
	distances, indices = self.nn.kneighbors(query_vec, n_neighbors=max(1, min(top_k, len(self.records))))
	matches = []
	tool_calls = []
	answer_parts = []
	safety_notes = []

	for dist, idx in zip(distances[0], indices[0]):
	record = self.records[int(idx)]
	score = round(1.0 - float(dist), 6)
	matches.append(
	{
	"id": record["id"],
	"kind": record["kind"],
	"domain": record["domain"],
	"score": score,
	"task": record["task"],
	}
	)
	if record.get("answer") and len(answer_parts) < 3:
	answer_parts.append(record["answer"])
	for call in record.get("tool_calls", []):
	if "tool" in call and "name" not in call:
	call = {"name": call["tool"], "arguments": call.get("arguments", {})}
	if call not in tool_calls:
	tool_calls.append(call)

	return {
	"answer": "\n\n".join(answer_parts) if answer_parts else "No strong match found. Ask for more context or run a read-only audit first.",
	"suggested_tool_calls": tool_calls[:8],
	"matches": matches,
	"safety": {
	"notes": safety_notes,
	"default_policy": "read-only first, reversible changes only, confirmation for privileged/destructive/privacy/license-risk actions",
	},
	"model_manifest": self.manifest,
	"neural_prediction": neural_prediction,
	}


	def main() -> int:
	parser = argparse.ArgumentParser()
	parser.add_argument("query")
	parser.add_argument("--top-k", type=int, default=5)
	parser.add_argument("--root", default=str(ROOT))
	args = parser.parse_args()
	model = TinyMindApex(Path(args.root))
	print(json.dumps(model.generate(args.query, args.top_k), ensure_ascii=False, indent=2))
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())

Xet Storage Details

Size:: 4.96 kB
Xet hash:: 580495d55c561db2a3dd38e0bb294f3cadbdfdd1ab4b56d0c9879a51ae993c13

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.