| import numpy as np | |
| import re | |
| DIM = 512 | |
| def embed(text: str) -> np.ndarray: | |
| v = np.zeros(DIM, dtype=np.float32) | |
| for w in re.findall(r"[a-zA-Z]{2,}", text.lower()): | |
| v[hash(w) % DIM] += 1.0 | |
| n = np.linalg.norm(v) | |
| return v / n if n > 0 else v |
| import numpy as np | |
| import re | |
| DIM = 512 | |
| def embed(text: str) -> np.ndarray: | |
| v = np.zeros(DIM, dtype=np.float32) | |
| for w in re.findall(r"[a-zA-Z]{2,}", text.lower()): | |
| v[hash(w) % DIM] += 1.0 | |
| n = np.linalg.norm(v) | |
| return v / n if n > 0 else v |