| from typing import List, TypedDict |
| import json |
| import os |
|
|
| DATA_PATH = "data/index/demo_index.json" |
|
|
| class SearchResult(TypedDict): |
| document: str |
| agency: str |
| date: str |
| excerpt: str |
| citation: str |
| score: float |
|
|
|
|
| def search_docs(query: str) -> List[SearchResult]: |
| query_l = query.lower() |
| results: List[SearchResult] = [] |
|
|
| if not os.path.exists(DATA_PATH): |
| return results |
|
|
| with open(DATA_PATH, "r") as f: |
| docs = json.load(f) |
|
|
| for d in docs: |
| text = d["excerpt"].lower() |
| if query_l in text: |
| score = text.count(query_l) / max(len(text), 1) |
| results.append({ |
| "document": d["document"], |
| "agency": d["agency"], |
| "date": d["date"], |
| "excerpt": d["excerpt"], |
| "citation": d["citation"], |
| "score": round(score, 4) |
| }) |
|
|
| return sorted(results, key=lambda x: x["score"], reverse=True) |