File size: 771 Bytes
e12a049
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from __future__ import annotations

from hashlib import sha256
from pathlib import Path

from hackathon_advisor.data import ProjectIndex, normalize_vector, tokenize


def load_test_index() -> ProjectIndex:
    return ProjectIndex.from_files(
        Path("data/projects.json"),
        Path("data/project_index.json"),
        query_embedder=test_query_embedder,
    )


def test_query_embedder(text: str) -> tuple[float, ...]:
    vector = [0.0] * 768
    for token in tokenize(text):
        digest = sha256(token.encode("utf-8")).digest()
        index = int.from_bytes(digest[:2], "big") % len(vector)
        sign = 1.0 if digest[2] % 2 == 0 else -1.0
        vector[index] += sign
    if not any(vector):
        vector[0] = 1.0
    return normalize_vector(vector)