| // Duty cycle + boost update kernel. | |
| // | |
| // For each column c (one thread each): | |
| // active_sample = active_mask[c] ? 1 : 0 | |
| // overlap_sample = raw_overlap[c] >= stim_thr ? 1 : 0 | |
| // active_duty[c] = (1-alpha) * active_duty[c] + alpha * active_sample | |
| // overlap_duty[c] = (1-alpha) * overlap_duty[c] + alpha * overlap_sample | |
| // | |
| // Then, if learn: | |
| // boost[c] = exp(-boost_strength * (active_duty[c] - mean_duty)) | |
| // mean_duty is computed on the host (one reduction) and passed in. | |
| extern "C" __global__ | |
| void sp_duty_update( | |
| const unsigned char * __restrict__ active_mask, // (n_columns,) | |
| const unsigned int * __restrict__ raw_overlap, // (n_columns,) | |
| float * __restrict__ active_duty, // (n_columns,) in-place | |
| float * __restrict__ overlap_duty, // (n_columns,) in-place | |
| float * __restrict__ boost, // (n_columns,) in-place | |
| float alpha, | |
| float stim_thr, | |
| float boost_strength, // 0 to skip boost | |
| float mean_duty, | |
| unsigned int learn_flag, // 0 or 1 | |
| unsigned int n_columns | |
| ) { | |
| unsigned int c = blockIdx.x * blockDim.x + threadIdx.x; | |
| if (c >= n_columns) return; | |
| float ad = active_duty[c]; | |
| float od = overlap_duty[c]; | |
| float a_sample = (active_mask[c] != 0) ? 1.0f : 0.0f; | |
| float o_sample = ((float)raw_overlap[c] >= stim_thr) ? 1.0f : 0.0f; | |
| ad = (1.0f - alpha) * ad + alpha * a_sample; | |
| od = (1.0f - alpha) * od + alpha * o_sample; | |
| active_duty[c] = ad; | |
| overlap_duty[c] = od; | |
| if (learn_flag && boost_strength > 0.0f) { | |
| boost[c] = expf(-boost_strength * (ad - mean_duty)); | |
| } | |
| } | |