File size: 973 Bytes
5830944
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from typing import List, TypedDict
import json
import os

DATA_PATH = "data/index/demo_index.json"

class SearchResult(TypedDict):
    document: str
    agency: str
    date: str
    excerpt: str
    citation: str
    score: float


def search_docs(query: str) -> List[SearchResult]:
    query_l = query.lower()
    results: List[SearchResult] = []

    if not os.path.exists(DATA_PATH):
        return results

    with open(DATA_PATH, "r") as f:
        docs = json.load(f)

    for d in docs:
        text = d["excerpt"].lower()
        if query_l in text:
            score = text.count(query_l) / max(len(text), 1)
            results.append({
                "document": d["document"],
                "agency": d["agency"],
                "date": d["date"],
                "excerpt": d["excerpt"],
                "citation": d["citation"],
                "score": round(score, 4)
            })

    return sorted(results, key=lambda x: x["score"], reverse=True)