GodsDevProject's picture
Upload 20 files
5830944 verified
raw
history blame contribute delete
538 Bytes
import os, json, numpy as np
from core.vector import embed
def load_docs(base="data/demo"):
meta = json.load(open(os.path.join(base, "metadata.json")))
docs = []
for fname, m in meta.items():
text = open(os.path.join(base, "documents", fname), encoding="utf-8").read()
docs.append({
"id": fname,
"text": text,
"vec": embed(text),
"agency": m["agency"],
"year": m["year"],
"program": m.get("program", "Unknown")
})
return docs