File size: 3,763 Bytes
29cdc9d
 
8663f54
 
 
 
29cdc9d
 
 
8663f54
29cdc9d
8663f54
29cdc9d
f8ddcab
8663f54
 
 
 
 
 
 
 
 
 
 
f8ddcab
8663f54
 
 
c3e2cd8
 
 
f8ddcab
8663f54
 
 
 
 
 
c3e2cd8
8663f54
 
c3e2cd8
fa6e2ea
 
 
 
c3e2cd8
 
8663f54
 
f8ddcab
 
8663f54
 
 
 
c3e2cd8
8663f54
 
 
 
 
fa6e2ea
8663f54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29cdc9d
 
 
8663f54
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
from pathlib import Path
import ast
import hdc_engine

DIM = 10000

class VitalisKernel:
    def __init__(self):
        self.dim = DIM
        self.weights_path = Path.home() / ".vitalis_workspace" / "kernel.weights.npy"
        self.codebook_path = Path.home() / ".vitalis_workspace" / "codebook.npy"
        self.bias = np.load(self.weights_path) if self.weights_path.exists() else np.array([0.0])
        self._dirty = False
        self._load_codebook()

    def _load_codebook(self):
        if self.codebook_path.exists():
            self.codebook = np.load(self.codebook_path, allow_pickle=True).item()
        else:
            self.codebook = {}

    def _save_codebook(self):
        self.codebook_path.parent.mkdir(parents=True, exist_ok=True)
        np.save(self.codebook_path, self.codebook)
        self._dirty = False

    def _get_token_vector(self, token: str) -> np.ndarray:
        if token not in self.codebook:
            self.codebook[token] = np.random.choice(
                [-1, 1], size=self.dim
            ).astype(np.int8)
            self._dirty = True
        return self.codebook[token]

    def _get_position_vector(self, position: int) -> np.ndarray:
        rng = np.random.default_rng(seed=position)
        return rng.choice([-1, 1], size=self.dim).astype(np.int8)

    def vectorize_tokens(self, tokens: list, positional: bool = True) -> np.ndarray:
        bundle = np.zeros(self.dim, dtype=np.int32)
        for i, token in enumerate(tokens):
            token_vec = self._get_token_vector(str(token))
            if positional:
                pos_vec = self._get_position_vector(i)
                bound = hdc_engine.bind(token_vec, pos_vec)
            else:
                bound = token_vec
            bundle += bound.astype(np.int32)
        result = np.sign(bundle).astype(np.int8)
        result[result == 0] = 1
        if self._dirty:
            self._save_codebook()
        return result

    def vectorize_source(self, source_code: str) -> np.ndarray:
        tokens = self._extract_tokens(source_code)
        return self.vectorize_tokens(tokens)

    def vectorize_file(self, file_path: str) -> np.ndarray:
        path = Path(file_path)
        if not path.exists():
            raise FileNotFoundError(f"Source file not found: {file_path}")
        return self.vectorize_source(path.read_text(encoding="utf-8"))

    def _extract_tokens(self, source_code: str) -> list:
        tokens = []
        try:
            tree = ast.parse(source_code)
            for node in ast.walk(tree):
                if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
                    tokens.append(f"DEF:{node.name}")
                elif isinstance(node, ast.Name):
                    tokens.append(f"NAME:{node.id}")
                elif isinstance(node, ast.Constant) and isinstance(node.value, str):
                    tokens.append(f"STR:{node.value[:32]}")
                elif isinstance(node, ast.Import):
                    for alias in node.names:
                        tokens.append(f"IMPORT:{alias.name}")
                elif isinstance(node, ast.ImportFrom):
                    tokens.append(f"FROM:{node.module}")
        except SyntaxError:
            tokens = source_code.split()
        return tokens if tokens else ["EMPTY"]

    def similarity(self, vec_a: np.ndarray, vec_b: np.ndarray) -> float:
        a = vec_a.astype(np.float32)
        b = vec_b.astype(np.float32)
        denom = np.linalg.norm(a) * np.linalg.norm(b)
        if denom == 0:
            return 0.0
        return float(np.dot(a, b) / denom)

    def matmul(self, a, b):
        return np.dot(a, b) + self.bias

    def activation(self, x):
        return np.sign(x)