| """Matrix-BIOS-Memory-0.1 — grounded, citation-faithful recall (RAG). |
| Ships a FAISS index + a small corpus; every answer cites the source ids it used. |
| pip install torch transformers sentence-transformers faiss-cpu huggingface_hub |
| """ |
| import json |
| import faiss |
| import torch |
| from huggingface_hub import snapshot_download |
| from sentence_transformers import SentenceTransformer |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
| REPO = "ruslanmv/Matrix-BIOS-Memory-0.1" |
| path = snapshot_download(REPO) |
| cfg = json.load(open(f"{path}/memory_config.json")) |
| docs = json.load(open(f"{path}/docs.json")) |
| index = faiss.read_index(f"{path}/index.faiss") |
|
|
| embedder = SentenceTransformer(cfg["embedder"]) |
| gen_tok = AutoTokenizer.from_pretrained(cfg["generator"]) |
| gen_model = AutoModelForSeq2SeqLM.from_pretrained(cfg["generator"]).eval() |
|
|
| def answer(question: str): |
| qv = embedder.encode([question], normalize_embeddings=True).astype("float32") |
| _, idx = index.search(qv, cfg["top_k"]) |
| hits = [docs[i] for i in idx[0] if 0 <= i < len(docs)] |
| context = "\n".join(f"[{d['id']}] {d['text']}" for d in hits) |
| prompt = ("Answer the question using ONLY the context, and cite the [id] you used.\n" |
| f"Context:\n{context}\n\nQuestion: {question}\nAnswer:") |
| ids = gen_tok(prompt, return_tensors="pt", truncation=True).input_ids |
| with torch.no_grad(): |
| out = gen_model.generate(ids, max_new_tokens=64) |
| return gen_tok.decode(out[0], skip_special_tokens=True), [d["id"] for d in hits] |
|
|
| if __name__ == "__main__": |
| for q in ["What does every effectful action in Matrix OS emit?", |
| "Qual e la capitale d'Italia?"]: |
| ans, sources = answer(q) |
| print(f"Q: {q}\nA: {ans}\n sources: {sources}\n") |
|
|