File size: 538 Bytes
5830944
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import os, json, numpy as np
from core.vector import embed

def load_docs(base="data/demo"):
    meta = json.load(open(os.path.join(base, "metadata.json")))
    docs = []
    for fname, m in meta.items():
        text = open(os.path.join(base, "documents", fname), encoding="utf-8").read()
        docs.append({
            "id": fname,
            "text": text,
            "vec": embed(text),
            "agency": m["agency"],
            "year": m["year"],
            "program": m.get("program", "Unknown")
        })
    return docs