File size: 263 Bytes
4a10357
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
import numpy as np
import re

DIM = 512

def embed(text: str) -> np.ndarray:
    v = np.zeros(DIM, dtype=np.float32)
    for w in re.findall(r"[a-zA-Z]{2,}", text.lower()):
        v[hash(w) % DIM] += 1.0
    n = np.linalg.norm(v)
    return v / n if n > 0 else v