Create core/index.py
Browse files- core/index.py +17 -0
core/index.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, json, numpy as np
|
| 2 |
+
from core.vector import embed
|
| 3 |
+
|
| 4 |
+
def load_docs(base="data/demo"):
|
| 5 |
+
meta = json.load(open(os.path.join(base, "metadata.json")))
|
| 6 |
+
docs = []
|
| 7 |
+
for fname, m in meta.items():
|
| 8 |
+
text = open(os.path.join(base, "documents", fname), encoding="utf-8").read()
|
| 9 |
+
docs.append({
|
| 10 |
+
"id": fname,
|
| 11 |
+
"text": text,
|
| 12 |
+
"vec": embed(text),
|
| 13 |
+
"agency": m["agency"],
|
| 14 |
+
"year": m["year"],
|
| 15 |
+
"program": m.get("program", "Unknown")
|
| 16 |
+
})
|
| 17 |
+
return docs
|