Upload folder using huggingface_hub
Browse files- README.md +167 -0
- config.json +9 -0
- create_safetensors.py +79 -0
- model.py +54 -0
- model.safetensors +3 -0
README.md
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
tags:
|
| 4 |
+
- pytorch
|
| 5 |
+
- safetensors
|
| 6 |
+
- threshold-logic
|
| 7 |
+
- neuromorphic
|
| 8 |
+
- arithmetic
|
| 9 |
+
- multiplier
|
| 10 |
+
- compressor
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# threshold-4to2-compressor
|
| 14 |
+
|
| 15 |
+
4:2 compressor for high-speed multiplier trees. Reduces 4 input bits plus carry-in to 2 output bits plus carry-out while preserving arithmetic value.
|
| 16 |
+
|
| 17 |
+
## Circuit
|
| 18 |
+
|
| 19 |
+
```
|
| 20 |
+
x y z w cin
|
| 21 |
+
│ │ │ │ │
|
| 22 |
+
└──┬───┴──┬───┴──┬───┘ │
|
| 23 |
+
│ │ │ │
|
| 24 |
+
▼ │ │ │
|
| 25 |
+
┌─────┐ │ │ │
|
| 26 |
+
│XOR │ │ │ │
|
| 27 |
+
│(x,y)│ │ │ │
|
| 28 |
+
└──┬──┘ │ │ │
|
| 29 |
+
│ │ │ │
|
| 30 |
+
▼ ▼ │ │
|
| 31 |
+
┌─────────────┐ │ │
|
| 32 |
+
│ XOR(xy,z) │ │ │
|
| 33 |
+
└──────┬──────┘ │ │
|
| 34 |
+
│ │ │
|
| 35 |
+
▼ ▼ │
|
| 36 |
+
┌──────────────┐ │
|
| 37 |
+
│ XOR(xyz,w) │ │
|
| 38 |
+
└──────┬───────┘ │
|
| 39 |
+
│ │
|
| 40 |
+
▼ ▼
|
| 41 |
+
┌─────────────────────┐
|
| 42 |
+
│ XOR(xyzw, cin) │───► Sum
|
| 43 |
+
└─────────────────────┘
|
| 44 |
+
|
| 45 |
+
cout = MAJ(x,y,z) (independent of w, cin)
|
| 46 |
+
carry = MAJ(XOR(x,y,z), w, cin)
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
## Function
|
| 50 |
+
|
| 51 |
+
```
|
| 52 |
+
compress_4to2(x, y, z, w, cin) -> (sum, carry, cout)
|
| 53 |
+
|
| 54 |
+
Invariant: x + y + z + w + cin = sum + 2*carry + 2*cout
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
## Truth Table (partial - 32 combinations)
|
| 58 |
+
|
| 59 |
+
| x | y | z | w | cin | sum | carry | cout | verify |
|
| 60 |
+
|---|---|---|---|-----|-----|-------|------|--------|
|
| 61 |
+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0=0 |
|
| 62 |
+
| 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1=1 |
|
| 63 |
+
| 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 2=2 |
|
| 64 |
+
| 1 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 3=3 |
|
| 65 |
+
| 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 4=4 |
|
| 66 |
+
| 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 5=5 |
|
| 67 |
+
|
| 68 |
+
Input sum range: 0 to 5
|
| 69 |
+
Output encoding: sum + 2*carry + 2*cout (range 0-5)
|
| 70 |
+
|
| 71 |
+
## Mechanism
|
| 72 |
+
|
| 73 |
+
The 4:2 compressor is built from two cascaded 3:2 compressors with a twist:
|
| 74 |
+
|
| 75 |
+
**Stage 1: Compress (x, y, z)**
|
| 76 |
+
- sum1 = x XOR y XOR z
|
| 77 |
+
- cout = MAJ(x, y, z) ← This goes to next column
|
| 78 |
+
|
| 79 |
+
**Stage 2: Compress (sum1, w, cin)**
|
| 80 |
+
- sum = sum1 XOR w XOR cin
|
| 81 |
+
- carry = MAJ(sum1, w, cin) ← This goes to next column
|
| 82 |
+
|
| 83 |
+
Key insight: The cout is computed early and can propagate horizontally while sum/carry are still being computed.
|
| 84 |
+
|
| 85 |
+
## Architecture
|
| 86 |
+
|
| 87 |
+
| Component | Function | Neurons | Layers |
|
| 88 |
+
|-----------|----------|---------|--------|
|
| 89 |
+
| XOR(x,y) | First pair | 3 | 2 |
|
| 90 |
+
| XOR(xy,z) | Add third | 3 | 2 |
|
| 91 |
+
| MAJ(x,y,z) | cout | 1 | 1 |
|
| 92 |
+
| XOR(xyz,w) | Add fourth | 3 | 2 |
|
| 93 |
+
| XOR(xyzw,cin) | sum | 3 | 2 |
|
| 94 |
+
| MAJ(xyz,w,cin) | carry | 1 | 1 |
|
| 95 |
+
|
| 96 |
+
**Total: 14 neurons**
|
| 97 |
+
|
| 98 |
+
## Parameters
|
| 99 |
+
|
| 100 |
+
| | |
|
| 101 |
+
|---|---|
|
| 102 |
+
| Inputs | 5 (x, y, z, w, cin) |
|
| 103 |
+
| Outputs | 3 (sum, carry, cout) |
|
| 104 |
+
| Neurons | 14 |
|
| 105 |
+
| Layers | 8 |
|
| 106 |
+
| Parameters | 44 |
|
| 107 |
+
| Magnitude | 46 |
|
| 108 |
+
|
| 109 |
+
## Delay Analysis
|
| 110 |
+
|
| 111 |
+
Critical path for sum: 4 XOR stages = 8 layers
|
| 112 |
+
Critical path for carry: 4 XOR stages + 1 MAJ = 9 layers
|
| 113 |
+
Critical path for cout: 1 MAJ = 1 layer (very fast!)
|
| 114 |
+
|
| 115 |
+
The early cout enables fast horizontal carry propagation in multiplier arrays.
|
| 116 |
+
|
| 117 |
+
## Usage
|
| 118 |
+
|
| 119 |
+
```python
|
| 120 |
+
from safetensors.torch import load_file
|
| 121 |
+
import torch
|
| 122 |
+
|
| 123 |
+
w = load_file('model.safetensors')
|
| 124 |
+
|
| 125 |
+
def compress_4to2(x, y, z, w_in, cin):
|
| 126 |
+
# Implementation details in model.py
|
| 127 |
+
pass
|
| 128 |
+
|
| 129 |
+
# Example: sum of 5 bits
|
| 130 |
+
s, carry, cout = compress_4to2(1, 1, 1, 1, 1)
|
| 131 |
+
print(f"1+1+1+1+1 = {s} + 2*{carry} + 2*{cout} = {s + 2*carry + 2*cout}")
|
| 132 |
+
# Output: 1+1+1+1+1 = 1 + 2*1 + 2*1 = 5
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
## Applications
|
| 136 |
+
|
| 137 |
+
- Booth multipliers (radix-4)
|
| 138 |
+
- Wallace/Dadda tree reduction
|
| 139 |
+
- FMA (fused multiply-add) units
|
| 140 |
+
- High-performance DSP
|
| 141 |
+
|
| 142 |
+
## Comparison with 3:2 Compressor
|
| 143 |
+
|
| 144 |
+
| Property | 3:2 | 4:2 |
|
| 145 |
+
|----------|-----|-----|
|
| 146 |
+
| Inputs | 3 | 5 (4 + cin) |
|
| 147 |
+
| Outputs | 2 | 3 (2 + cout) |
|
| 148 |
+
| Reduction ratio | 3→2 | 4→2 per column |
|
| 149 |
+
| Neurons | 7 | 14 |
|
| 150 |
+
| Tree depth for n bits | O(log₁.₅ n) | O(log₂ n) |
|
| 151 |
+
|
| 152 |
+
4:2 compressors provide faster reduction in multiplier trees.
|
| 153 |
+
|
| 154 |
+
## Files
|
| 155 |
+
|
| 156 |
+
```
|
| 157 |
+
threshold-4to2-compressor/
|
| 158 |
+
├── model.safetensors
|
| 159 |
+
├── model.py
|
| 160 |
+
├── create_safetensors.py
|
| 161 |
+
├── config.json
|
| 162 |
+
└── README.md
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
## License
|
| 166 |
+
|
| 167 |
+
MIT
|
config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "threshold-4to2-compressor",
|
| 3 |
+
"description": "4:2 compressor for high-speed multiplier trees",
|
| 4 |
+
"inputs": 5,
|
| 5 |
+
"outputs": 3,
|
| 6 |
+
"neurons": 14,
|
| 7 |
+
"layers": 8,
|
| 8 |
+
"parameters": 44
|
| 9 |
+
}
|
create_safetensors.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from safetensors.torch import save_file
|
| 3 |
+
|
| 4 |
+
weights = {}
|
| 5 |
+
|
| 6 |
+
# 4:2 Compressor
|
| 7 |
+
# Inputs: x, y, z, w, cin
|
| 8 |
+
# Outputs: sum, carry, cout
|
| 9 |
+
# Invariant: x + y + z + w + cin = sum + 2*carry + 2*cout
|
| 10 |
+
|
| 11 |
+
def add_xor(prefix):
|
| 12 |
+
weights[f'{prefix}.or.weight'] = torch.tensor([[1.0, 1.0]], dtype=torch.float32)
|
| 13 |
+
weights[f'{prefix}.or.bias'] = torch.tensor([-1.0], dtype=torch.float32)
|
| 14 |
+
weights[f'{prefix}.nand.weight'] = torch.tensor([[-1.0, -1.0]], dtype=torch.float32)
|
| 15 |
+
weights[f'{prefix}.nand.bias'] = torch.tensor([1.0], dtype=torch.float32)
|
| 16 |
+
weights[f'{prefix}.and.weight'] = torch.tensor([[1.0, 1.0]], dtype=torch.float32)
|
| 17 |
+
weights[f'{prefix}.and.bias'] = torch.tensor([-2.0], dtype=torch.float32)
|
| 18 |
+
|
| 19 |
+
# XOR chain for sum: x XOR y XOR z XOR w XOR cin
|
| 20 |
+
add_xor('xor_xy') # XOR(x, y)
|
| 21 |
+
add_xor('xor_xyz') # XOR(xy, z)
|
| 22 |
+
add_xor('xor_xyzw') # XOR(xyz, w)
|
| 23 |
+
add_xor('xor_sum') # XOR(xyzw, cin) -> sum
|
| 24 |
+
|
| 25 |
+
# MAJ(x, y, z) -> cout (at-least-2-of-3)
|
| 26 |
+
weights['cout.weight'] = torch.tensor([[1.0, 1.0, 1.0]], dtype=torch.float32)
|
| 27 |
+
weights['cout.bias'] = torch.tensor([-2.0], dtype=torch.float32)
|
| 28 |
+
|
| 29 |
+
# MAJ(xyz, w, cin) -> carry (at-least-2-of-3)
|
| 30 |
+
weights['carry.weight'] = torch.tensor([[1.0, 1.0, 1.0]], dtype=torch.float32)
|
| 31 |
+
weights['carry.bias'] = torch.tensor([-2.0], dtype=torch.float32)
|
| 32 |
+
|
| 33 |
+
save_file(weights, 'model.safetensors')
|
| 34 |
+
|
| 35 |
+
def xor2(a, b, prefix):
|
| 36 |
+
inp = torch.tensor([float(a), float(b)])
|
| 37 |
+
or_out = int((inp @ weights[f'{prefix}.or.weight'].T + weights[f'{prefix}.or.bias'] >= 0).item())
|
| 38 |
+
nand_out = int((inp @ weights[f'{prefix}.nand.weight'].T + weights[f'{prefix}.nand.bias'] >= 0).item())
|
| 39 |
+
l1 = torch.tensor([float(or_out), float(nand_out)])
|
| 40 |
+
return int((l1 @ weights[f'{prefix}.and.weight'].T + weights[f'{prefix}.and.bias'] >= 0).item())
|
| 41 |
+
|
| 42 |
+
def compress_4to2(x, y, z, w, cin):
|
| 43 |
+
# XOR chain for sum
|
| 44 |
+
xy = xor2(x, y, 'xor_xy')
|
| 45 |
+
xyz = xor2(xy, z, 'xor_xyz')
|
| 46 |
+
xyzw = xor2(xyz, w, 'xor_xyzw')
|
| 47 |
+
sum_out = xor2(xyzw, cin, 'xor_sum')
|
| 48 |
+
|
| 49 |
+
# cout = MAJ(x, y, z)
|
| 50 |
+
inp_cout = torch.tensor([float(x), float(y), float(z)])
|
| 51 |
+
cout = int((inp_cout @ weights['cout.weight'].T + weights['cout.bias'] >= 0).item())
|
| 52 |
+
|
| 53 |
+
# carry = MAJ(xyz, w, cin)
|
| 54 |
+
inp_carry = torch.tensor([float(xyz), float(w), float(cin)])
|
| 55 |
+
carry = int((inp_carry @ weights['carry.weight'].T + weights['carry.bias'] >= 0).item())
|
| 56 |
+
|
| 57 |
+
return sum_out, carry, cout
|
| 58 |
+
|
| 59 |
+
print("Verifying 4:2 compressor...")
|
| 60 |
+
errors = 0
|
| 61 |
+
for x in [0, 1]:
|
| 62 |
+
for y in [0, 1]:
|
| 63 |
+
for z in [0, 1]:
|
| 64 |
+
for w in [0, 1]:
|
| 65 |
+
for cin in [0, 1]:
|
| 66 |
+
s, carry, cout = compress_4to2(x, y, z, w, cin)
|
| 67 |
+
input_sum = x + y + z + w + cin
|
| 68 |
+
output_sum = s + 2 * carry + 2 * cout
|
| 69 |
+
if input_sum != output_sum:
|
| 70 |
+
errors += 1
|
| 71 |
+
print(f"ERROR: {x}+{y}+{z}+{w}+{cin}={input_sum}, but got {output_sum}")
|
| 72 |
+
|
| 73 |
+
if errors == 0:
|
| 74 |
+
print("All 32 test cases passed!")
|
| 75 |
+
else:
|
| 76 |
+
print(f"FAILED: {errors} errors")
|
| 77 |
+
|
| 78 |
+
print(f"Magnitude: {sum(t.abs().sum().item() for t in weights.values()):.0f}")
|
| 79 |
+
print(f"Parameters: {sum(t.numel() for t in weights.values())}")
|
model.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from safetensors.torch import load_file
|
| 3 |
+
|
| 4 |
+
def load_model(path='model.safetensors'):
|
| 5 |
+
return load_file(path)
|
| 6 |
+
|
| 7 |
+
def xor2(a, b, prefix, w):
|
| 8 |
+
inp = torch.tensor([float(a), float(b)])
|
| 9 |
+
or_out = int((inp @ w[f'{prefix}.or.weight'].T + w[f'{prefix}.or.bias'] >= 0).item())
|
| 10 |
+
nand_out = int((inp @ w[f'{prefix}.nand.weight'].T + w[f'{prefix}.nand.bias'] >= 0).item())
|
| 11 |
+
l1 = torch.tensor([float(or_out), float(nand_out)])
|
| 12 |
+
return int((l1 @ w[f'{prefix}.and.weight'].T + w[f'{prefix}.and.bias'] >= 0).item())
|
| 13 |
+
|
| 14 |
+
def compress_4to2(x, y, z, w_in, cin, weights):
|
| 15 |
+
"""4:2 compressor: x+y+z+w+cin = sum + 2*carry + 2*cout."""
|
| 16 |
+
xy = xor2(x, y, 'xor_xy', weights)
|
| 17 |
+
xyz = xor2(xy, z, 'xor_xyz', weights)
|
| 18 |
+
xyzw = xor2(xyz, w_in, 'xor_xyzw', weights)
|
| 19 |
+
sum_out = xor2(xyzw, cin, 'xor_sum', weights)
|
| 20 |
+
|
| 21 |
+
inp_cout = torch.tensor([float(x), float(y), float(z)])
|
| 22 |
+
cout = int((inp_cout @ weights['cout.weight'].T + weights['cout.bias'] >= 0).item())
|
| 23 |
+
|
| 24 |
+
inp_carry = torch.tensor([float(xyz), float(w_in), float(cin)])
|
| 25 |
+
carry = int((inp_carry @ weights['carry.weight'].T + weights['carry.bias'] >= 0).item())
|
| 26 |
+
|
| 27 |
+
return sum_out, carry, cout
|
| 28 |
+
|
| 29 |
+
if __name__ == '__main__':
|
| 30 |
+
w = load_model()
|
| 31 |
+
print('4:2 Compressor selected tests:')
|
| 32 |
+
print('x y z w cin | sum carry cout | verify')
|
| 33 |
+
print('------------+----------------+-------')
|
| 34 |
+
for total in range(6):
|
| 35 |
+
# Generate a combination with this total
|
| 36 |
+
for x in [0, 1]:
|
| 37 |
+
for y in [0, 1]:
|
| 38 |
+
for z in [0, 1]:
|
| 39 |
+
for w_in in [0, 1]:
|
| 40 |
+
for cin in [0, 1]:
|
| 41 |
+
if x + y + z + w_in + cin == total:
|
| 42 |
+
s, carry, cout = compress_4to2(x, y, z, w_in, cin, w)
|
| 43 |
+
check = 'OK' if total == s + 2*carry + 2*cout else 'FAIL'
|
| 44 |
+
print(f'{x} {y} {z} {w_in} {cin} | {s} {carry} {cout} | {check}')
|
| 45 |
+
break
|
| 46 |
+
else:
|
| 47 |
+
continue
|
| 48 |
+
break
|
| 49 |
+
else:
|
| 50 |
+
continue
|
| 51 |
+
break
|
| 52 |
+
else:
|
| 53 |
+
continue
|
| 54 |
+
break
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e7eb7afaff414cf82f81a410fda0f6a4e34d6b203fa56c9218b10ddb9e6521d
|
| 3 |
+
size 2200
|