// SP Hebbian learning kernel. // // For each active (winner) column c, for each of its synapses s: // if input[bit[c][s]] active: perm += inc // else: perm -= dec // Clamp to [0, 1]. // // Launch: one block per column (2048 blocks), but we predicate on // active_mask[c] to avoid launching k-specific blocks. // // This matches the CPU reference line-for-line: // src/sp.rs lines 157-169. extern "C" __global__ void sp_learn( const unsigned char * __restrict__ active_mask, // (n_columns,) 0/1 const unsigned char * __restrict__ inp, // (input_bits,) const unsigned int * __restrict__ syn_bit, // (n_columns * S,) float * __restrict__ syn_perm, // (n_columns * S,) in-place float inc, float dec, unsigned int synapses_per_col, unsigned int n_columns ) { const unsigned int c = blockIdx.x; if (c >= n_columns) return; if (active_mask[c] == 0) return; const unsigned int base = c * synapses_per_col; const unsigned int tid = threadIdx.x; const unsigned int bsz = blockDim.x; for (unsigned int s = tid; s < synapses_per_col; s += bsz) { unsigned int b = syn_bit[base + s]; float p = syn_perm[base + s]; if (inp[b] != 0) { p += inc; if (p > 1.0f) p = 1.0f; } else { p -= dec; if (p < 0.0f) p = 0.0f; } syn_perm[base + s] = p; } }