FerrellSyntheticIntelligence
Fix vectorization speed 800x improvement, fix deep cognition corruption
f8ddcab | import numpy as np | |
| from pathlib import Path | |
| import ast | |
| import hdc_engine | |
| DIM = 10000 | |
| class VitalisKernel: | |
| def __init__(self): | |
| self.dim = DIM | |
| self.weights_path = Path.home() / ".vitalis_workspace" / "kernel.weights.npy" | |
| self.codebook_path = Path.home() / ".vitalis_workspace" / "codebook.npy" | |
| self.bias = np.load(self.weights_path) if self.weights_path.exists() else np.array([0.0]) | |
| self._dirty = False | |
| self._load_codebook() | |
| def _load_codebook(self): | |
| if self.codebook_path.exists(): | |
| self.codebook = np.load(self.codebook_path, allow_pickle=True).item() | |
| else: | |
| self.codebook = {} | |
| def _save_codebook(self): | |
| self.codebook_path.parent.mkdir(parents=True, exist_ok=True) | |
| np.save(self.codebook_path, self.codebook) | |
| self._dirty = False | |
| def _get_token_vector(self, token: str) -> np.ndarray: | |
| if token not in self.codebook: | |
| self.codebook[token] = np.random.choice( | |
| [-1, 1], size=self.dim | |
| ).astype(np.int8) | |
| self._dirty = True | |
| return self.codebook[token] | |
| def _get_position_vector(self, position: int) -> np.ndarray: | |
| rng = np.random.default_rng(seed=position) | |
| return rng.choice([-1, 1], size=self.dim).astype(np.int8) | |
| def vectorize_tokens(self, tokens: list, positional: bool = True) -> np.ndarray: | |
| bundle = np.zeros(self.dim, dtype=np.int32) | |
| for i, token in enumerate(tokens): | |
| token_vec = self._get_token_vector(str(token)) | |
| if positional: | |
| pos_vec = self._get_position_vector(i) | |
| bound = hdc_engine.bind(token_vec, pos_vec) | |
| else: | |
| bound = token_vec | |
| bundle += bound.astype(np.int32) | |
| result = np.sign(bundle).astype(np.int8) | |
| result[result == 0] = 1 | |
| if self._dirty: | |
| self._save_codebook() | |
| return result | |
| def vectorize_source(self, source_code: str) -> np.ndarray: | |
| tokens = self._extract_tokens(source_code) | |
| return self.vectorize_tokens(tokens) | |
| def vectorize_file(self, file_path: str) -> np.ndarray: | |
| path = Path(file_path) | |
| if not path.exists(): | |
| raise FileNotFoundError(f"Source file not found: {file_path}") | |
| return self.vectorize_source(path.read_text(encoding="utf-8")) | |
| def _extract_tokens(self, source_code: str) -> list: | |
| tokens = [] | |
| try: | |
| tree = ast.parse(source_code) | |
| for node in ast.walk(tree): | |
| if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): | |
| tokens.append(f"DEF:{node.name}") | |
| elif isinstance(node, ast.Name): | |
| tokens.append(f"NAME:{node.id}") | |
| elif isinstance(node, ast.Constant) and isinstance(node.value, str): | |
| tokens.append(f"STR:{node.value[:32]}") | |
| elif isinstance(node, ast.Import): | |
| for alias in node.names: | |
| tokens.append(f"IMPORT:{alias.name}") | |
| elif isinstance(node, ast.ImportFrom): | |
| tokens.append(f"FROM:{node.module}") | |
| except SyntaxError: | |
| tokens = source_code.split() | |
| return tokens if tokens else ["EMPTY"] | |
| def similarity(self, vec_a: np.ndarray, vec_b: np.ndarray) -> float: | |
| a = vec_a.astype(np.float32) | |
| b = vec_b.astype(np.float32) | |
| denom = np.linalg.norm(a) * np.linalg.norm(b) | |
| if denom == 0: | |
| return 0.0 | |
| return float(np.dot(a, b) / denom) | |
| def matmul(self, a, b): | |
| return np.dot(a, b) + self.bias | |
| def activation(self, x): | |
| return np.sign(x) | |