// Duty cycle + boost update kernel. // // For each column c (one thread each): // active_sample = active_mask[c] ? 1 : 0 // overlap_sample = raw_overlap[c] >= stim_thr ? 1 : 0 // active_duty[c] = (1-alpha) * active_duty[c] + alpha * active_sample // overlap_duty[c] = (1-alpha) * overlap_duty[c] + alpha * overlap_sample // // Then, if learn: // boost[c] = exp(-boost_strength * (active_duty[c] - mean_duty)) // mean_duty is computed on the host (one reduction) and passed in. extern "C" __global__ void sp_duty_update( const unsigned char * __restrict__ active_mask, // (n_columns,) const unsigned int * __restrict__ raw_overlap, // (n_columns,) float * __restrict__ active_duty, // (n_columns,) in-place float * __restrict__ overlap_duty, // (n_columns,) in-place float * __restrict__ boost, // (n_columns,) in-place float alpha, float stim_thr, float boost_strength, // 0 to skip boost float mean_duty, unsigned int learn_flag, // 0 or 1 unsigned int n_columns ) { unsigned int c = blockIdx.x * blockDim.x + threadIdx.x; if (c >= n_columns) return; float ad = active_duty[c]; float od = overlap_duty[c]; float a_sample = (active_mask[c] != 0) ? 1.0f : 0.0f; float o_sample = ((float)raw_overlap[c] >= stim_thr) ? 1.0f : 0.0f; ad = (1.0f - alpha) * ad + alpha * a_sample; od = (1.0f - alpha) * od + alpha * o_sample; active_duty[c] = ad; overlap_duty[c] = od; if (learn_flag && boost_strength > 0.0f) { boost[c] = expf(-boost_strength * (ad - mean_duty)); } }