FOIA_Doc_Search / vector.py
GodsDevProject's picture
Upload 98 files
bb0b469 verified
raw
history blame
263 Bytes
import numpy as np
import re
DIM = 512
def embed(text: str) -> np.ndarray:
v = np.zeros(DIM, dtype=np.float32)
for w in re.findall(r"[a-zA-Z]{2,}", text.lower()):
v[hash(w) % DIM] += 1.0
n = np.linalg.norm(v)
return v / n if n > 0 else v