grapheneaffiliates commited on
Commit
9095704
·
verified ·
1 Parent(s): f1ac798

Upload python/utils/phi_positional.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. python/utils/phi_positional.py +115 -0
python/utils/phi_positional.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Golden-angle positional encoding using the maximally irrational φ⁻¹ spacing.
3
+
4
+ Position n gets angle n × 2π × φ⁻¹ on a golden-angle spiral in d_model dimensions.
5
+ This guarantees well-separated, non-repeating position vectors for any sequence length.
6
+ Long-range positions compress via Zeckendorf decomposition (Fibonacci-based representation).
7
+ """
8
+
9
+ import math
10
+ import torch
11
+ import torch.nn as nn
12
+
13
+ PHI = (1 + math.sqrt(5)) / 2
14
+ PHI_INV = 1.0 / PHI # φ⁻¹ ≈ 0.618...
15
+
16
+
17
+ def _zeckendorf(n: int):
18
+ """Represent n as a sum of non-consecutive Fibonacci numbers."""
19
+ if n <= 0:
20
+ return []
21
+ fibs = [1, 2]
22
+ while fibs[-1] <= n:
23
+ fibs.append(fibs[-1] + fibs[-2])
24
+ terms = []
25
+ remaining = n
26
+ for f in reversed(fibs):
27
+ if f <= remaining:
28
+ terms.append(f)
29
+ remaining -= f
30
+ if remaining == 0:
31
+ break
32
+ return terms
33
+
34
+
35
+ class PhiPositionalEncoding(nn.Module):
36
+ """
37
+ Golden-angle spiral positional encoding.
38
+
39
+ Each position n maps to d_model dimensions via pairs of (cos, sin) at
40
+ golden-angle frequencies. The base angle is n × 2π × φ⁻¹, with each
41
+ dimension pair using a different frequency scale based on φ powers.
42
+
43
+ For positions beyond max_cached, Zeckendorf decomposition provides
44
+ logarithmic-cost encoding by summing cached Fibonacci-indexed embeddings.
45
+ """
46
+
47
+ def __init__(self, d_model: int, max_cached: int = 8192):
48
+ super().__init__()
49
+ self.d_model = d_model
50
+ self.max_cached = max_cached
51
+ n_pairs = d_model // 2
52
+ has_odd = d_model % 2 == 1
53
+
54
+ # Precompute frequency scales: φ^(-k/n_pairs) for k in [0, n_pairs)
55
+ # This gives geometrically spaced frequencies anchored to golden ratio
56
+ freq_scales = torch.tensor(
57
+ [PHI ** (-k / n_pairs) for k in range(n_pairs)],
58
+ dtype=torch.float32,
59
+ )
60
+ self.register_buffer('freq_scales', freq_scales)
61
+
62
+ # Precompute position embeddings for [0, max_cached)
63
+ positions = torch.arange(max_cached, dtype=torch.float32)
64
+ # Base angle: position × 2π × φ⁻¹
65
+ base_angles = positions * (2 * math.pi * PHI_INV) # (max_cached,)
66
+ # Scale by frequency for each pair
67
+ angles = base_angles.unsqueeze(1) * freq_scales.unsqueeze(0) # (max_cached, n_pairs)
68
+
69
+ pe = torch.zeros(max_cached, d_model)
70
+ pe[:, 0::2] = torch.cos(angles[:, :d_model // 2 + (1 if has_odd else 0)])
71
+ pe[:, 1::2] = torch.sin(angles[:, :n_pairs])
72
+ # Normalize to unit norm for consistency with S³ geometry
73
+ pe = pe / (pe.norm(dim=1, keepdim=True) + 1e-8)
74
+ self.register_buffer('pe', pe)
75
+
76
+ # Cache Fibonacci numbers for Zeckendorf decomposition
77
+ fibs = [1, 2]
78
+ while fibs[-1] < max_cached * 10:
79
+ fibs.append(fibs[-1] + fibs[-2])
80
+ self.register_buffer('_fibs', torch.tensor(fibs, dtype=torch.long))
81
+
82
+ def forward(self, seq_len: int, offset: int = 0) -> torch.Tensor:
83
+ """
84
+ Returns positional encoding of shape (seq_len, d_model).
85
+ For positions < max_cached, uses precomputed table.
86
+ For positions >= max_cached, uses Zeckendorf decomposition.
87
+ """
88
+ if offset + seq_len <= self.max_cached:
89
+ return self.pe[offset:offset + seq_len]
90
+
91
+ pe_out = torch.zeros(seq_len, self.d_model, device=self.pe.device)
92
+ for i in range(seq_len):
93
+ pos = offset + i
94
+ if pos < self.max_cached:
95
+ pe_out[i] = self.pe[pos]
96
+ else:
97
+ # Zeckendorf: sum embeddings at Fibonacci indices
98
+ terms = _zeckendorf(pos)
99
+ emb = torch.zeros(self.d_model, device=self.pe.device)
100
+ for fib_val in terms:
101
+ idx = min(fib_val, self.max_cached - 1)
102
+ emb = emb + self.pe[idx]
103
+ pe_out[i] = emb / (emb.norm() + 1e-8)
104
+ return pe_out
105
+
106
+ def encode_position(self, position: int) -> torch.Tensor:
107
+ """Encode a single position. Returns (d_model,) tensor."""
108
+ if position < self.max_cached:
109
+ return self.pe[position]
110
+ terms = _zeckendorf(position)
111
+ emb = torch.zeros(self.d_model, device=self.pe.device)
112
+ for fib_val in terms:
113
+ idx = min(fib_val, self.max_cached - 1)
114
+ emb = emb + self.pe[idx]
115
+ return emb / (emb.norm() + 1e-8)