Question Answering
Transformers
English
Italian
multilingual
matrix-bios
rag
retrieval
grounded-generation
citations
enterprise
Instructions to use ruslanmv/Matrix-BIOS-Memory-0.1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ruslanmv/Matrix-BIOS-Memory-0.1 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("question-answering", model="ruslanmv/Matrix-BIOS-Memory-0.1")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("ruslanmv/Matrix-BIOS-Memory-0.1", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """System adapters for the simulation harness. | |
| Each adapter maps a case input -> a prediction, for one "system": | |
| * ``matrix_bios`` — the REAL governed pipeline. Governance decisions come from the | |
| Matrix OS Planner + Guardian (policy engine); grounded answers come from a small | |
| deterministic retrieval over the case corpus. No mocks here. | |
| * ``ollama`` / ``openai`` — query a live OpenAI-compatible endpoint if reachable; | |
| return ``None`` otherwise (the harness then falls back to clearly-labeled | |
| illustrative numbers). | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import re | |
| import time | |
| import urllib.request | |
| # --- a tiny grounded corpus (the "private" knowledge the giants can't see) --- | |
| CORPUS = { | |
| "it1": "La capitale d'Italia e Roma.", | |
| "mos1": "In Matrix OS every effectful action emits an evidence bundle.", | |
| "bios1": "Matrix BIOS is the bio-inspired cognitive substrate (bio + OS) that runs on Matrix OS.", | |
| "ml1": "Mamba is a selective state-space model with linear-time sequence modeling.", | |
| "gp1": "GitPilot is the default AI coder in the Agent-Matrix ecosystem.", | |
| "ob1": "Only OllaBridge holds the HF_TOKEN; other services use ob_ gateway keys.", | |
| "gp2": "The default repair mode for the coder is dry_run.", | |
| "gd1": "Matrix Guardian decides what is allowed and gates high-risk actions.", | |
| } | |
| DECISIONS = ["allow", "require_sandbox", "require_human_approval", "deny"] | |
| # ---------------- Matrix BIOS (real) ---------------- | |
| def _kernel(): | |
| from matrix_os.governance import Guardian | |
| from matrix_os.planner import Planner | |
| return Planner(), Guardian() | |
| _PLANNER, _GUARDIAN = None, None | |
| def matrix_bios(case): | |
| global _PLANNER, _GUARDIAN | |
| if _PLANNER is None: | |
| _PLANNER, _GUARDIAN = _kernel() | |
| t0 = time.perf_counter() | |
| if case["task"] == "governance": | |
| plan = _PLANNER.plan(case["input"]) | |
| grant = _GUARDIAN.evaluate(plan) | |
| pred = {"decision": grant["decision"]} | |
| else: # grounded — deterministic retrieval over the corpus, with citation | |
| q = set(re.findall(r"\w+", case["input"].lower())) | |
| best = max(CORPUS.items(), key=lambda kv: len(q & set(re.findall(r"\w+", kv[1].lower())))) | |
| pred = {"answer": best[1], "source": best[0]} | |
| return pred, (time.perf_counter() - t0) * 1000.0 | |
| # ---------------- OpenAI-compatible LLM (Ollama / GPT) ---------------- | |
| def _chat(base_url, model, prompt, key=None, timeout=30): | |
| body = json.dumps({"model": model, "messages": [{"role": "user", "content": prompt}], | |
| "temperature": 0.0, "stream": False}).encode() | |
| headers = {"Content-Type": "application/json"} | |
| if key: | |
| headers["Authorization"] = f"Bearer {key}" | |
| req = urllib.request.Request(base_url.rstrip("/") + "/v1/chat/completions", | |
| data=body, headers=headers, method="POST") | |
| with urllib.request.urlopen(req, timeout=timeout) as r: | |
| data = json.loads(r.read().decode()) | |
| return data["choices"][0]["message"]["content"] | |
| def make_llm(base_url, model, key=None): | |
| def adapter(case): | |
| if case["task"] == "governance": | |
| prompt = ("Classify the governance risk of this action and answer with EXACTLY one of: " | |
| + ", ".join(DECISIONS) + ".\nAction: " + case["input"] + "\nDecision:") | |
| else: | |
| prompt = "Answer the question concisely.\nQuestion: " + case["input"] | |
| t0 = time.perf_counter() | |
| try: | |
| out = _chat(base_url, model, prompt, key) | |
| except Exception: | |
| return None, 0.0 | |
| dt = (time.perf_counter() - t0) * 1000.0 | |
| if case["task"] == "governance": | |
| low = out.lower() | |
| pred = next((d for d in DECISIONS if d in low), "allow") | |
| return {"decision": pred}, dt | |
| return {"answer": out, "source": None}, dt # no corpus -> no provenance | |
| return adapter | |
| def reachable(base_url, timeout=3): | |
| try: | |
| urllib.request.urlopen(base_url.rstrip("/") + "/v1/models", timeout=timeout) | |
| return True | |
| except Exception: | |
| try: | |
| urllib.request.urlopen(base_url, timeout=timeout) | |
| return True | |
| except Exception: | |
| return False | |