icarus112's picture
Upload folder using huggingface_hub
1c59946 verified
// SP Hebbian learning kernel.
//
// For each active (winner) column c, for each of its synapses s:
// if input[bit[c][s]] active: perm += inc
// else: perm -= dec
// Clamp to [0, 1].
//
// Launch: one block per column (2048 blocks), but we predicate on
// active_mask[c] to avoid launching k-specific blocks.
//
// This matches the CPU reference line-for-line:
// src/sp.rs lines 157-169.
extern "C" __global__
void sp_learn(
const unsigned char * __restrict__ active_mask, // (n_columns,) 0/1
const unsigned char * __restrict__ inp, // (input_bits,)
const unsigned int * __restrict__ syn_bit, // (n_columns * S,)
float * __restrict__ syn_perm, // (n_columns * S,) in-place
float inc,
float dec,
unsigned int synapses_per_col,
unsigned int n_columns
) {
const unsigned int c = blockIdx.x;
if (c >= n_columns) return;
if (active_mask[c] == 0) return;
const unsigned int base = c * synapses_per_col;
const unsigned int tid = threadIdx.x;
const unsigned int bsz = blockDim.x;
for (unsigned int s = tid; s < synapses_per_col; s += bsz) {
unsigned int b = syn_bit[base + s];
float p = syn_perm[base + s];
if (inp[b] != 0) {
p += inc;
if (p > 1.0f) p = 1.0f;
} else {
p -= dec;
if (p < 0.0f) p = 0.0f;
}
syn_perm[base + s] = p;
}
}