Spaces:
Sleeping
Sleeping
| from typing import List, TypedDict | |
| import json | |
| import os | |
| DATA_PATH = "data/index/demo_index.json" | |
| class SearchResult(TypedDict): | |
| document: str | |
| agency: str | |
| date: str | |
| excerpt: str | |
| citation: str | |
| score: float | |
| def search_docs(query: str) -> List[SearchResult]: | |
| query_l = query.lower() | |
| results: List[SearchResult] = [] | |
| if not os.path.exists(DATA_PATH): | |
| return results | |
| with open(DATA_PATH, "r") as f: | |
| docs = json.load(f) | |
| for d in docs: | |
| text = d["excerpt"].lower() | |
| if query_l in text: | |
| score = text.count(query_l) / max(len(text), 1) | |
| results.append({ | |
| "document": d["document"], | |
| "agency": d["agency"], | |
| "date": d["date"], | |
| "excerpt": d["excerpt"], | |
| "citation": d["citation"], | |
| "score": round(score, 4) | |
| }) | |
| return sorted(results, key=lambda x: x["score"], reverse=True) |