File size: 263 Bytes
4a10357 |
1 2 3 4 5 6 7 8 9 10 11 |
import numpy as np
import re
DIM = 512
def embed(text: str) -> np.ndarray:
v = np.zeros(DIM, dtype=np.float32)
for w in re.findall(r"[a-zA-Z]{2,}", text.lower()):
v[hash(w) % DIM] += 1.0
n = np.linalg.norm(v)
return v / n if n > 0 else v |