How-to-Matrix-BIOS / examples /run_memory.py
ruslanmv's picture
Upload examples/run_memory.py with huggingface_hub
8f3f3ca verified
Raw
History Blame Contribute Delete
1.83 kB
"""Matrix-BIOS-Memory-0.1 — grounded, citation-faithful recall (RAG).
Ships a FAISS index + a small corpus; every answer cites the source ids it used.
pip install torch transformers sentence-transformers faiss-cpu huggingface_hub
"""
import json
import faiss
import torch
from huggingface_hub import snapshot_download
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
REPO = "ruslanmv/Matrix-BIOS-Memory-0.1"
path = snapshot_download(REPO)
cfg = json.load(open(f"{path}/memory_config.json")) # embedder / generator / top_k
docs = json.load(open(f"{path}/docs.json")) # [{"id": ..., "text": ...}]
index = faiss.read_index(f"{path}/index.faiss")
embedder = SentenceTransformer(cfg["embedder"])
gen_tok = AutoTokenizer.from_pretrained(cfg["generator"])
gen_model = AutoModelForSeq2SeqLM.from_pretrained(cfg["generator"]).eval()
def answer(question: str):
qv = embedder.encode([question], normalize_embeddings=True).astype("float32")
_, idx = index.search(qv, cfg["top_k"])
hits = [docs[i] for i in idx[0] if 0 <= i < len(docs)]
context = "\n".join(f"[{d['id']}] {d['text']}" for d in hits)
prompt = ("Answer the question using ONLY the context, and cite the [id] you used.\n"
f"Context:\n{context}\n\nQuestion: {question}\nAnswer:")
ids = gen_tok(prompt, return_tensors="pt", truncation=True).input_ids
with torch.no_grad():
out = gen_model.generate(ids, max_new_tokens=64)
return gen_tok.decode(out[0], skip_special_tokens=True), [d["id"] for d in hits]
if __name__ == "__main__":
for q in ["What does every effectful action in Matrix OS emit?",
"Qual e la capitale d'Italia?"]:
ans, sources = answer(q)
print(f"Q: {q}\nA: {ans}\n sources: {sources}\n")