icarus112's picture
Upload folder using huggingface_hub
1c59946 verified
// Duty cycle + boost update kernel.
//
// For each column c (one thread each):
// active_sample = active_mask[c] ? 1 : 0
// overlap_sample = raw_overlap[c] >= stim_thr ? 1 : 0
// active_duty[c] = (1-alpha) * active_duty[c] + alpha * active_sample
// overlap_duty[c] = (1-alpha) * overlap_duty[c] + alpha * overlap_sample
//
// Then, if learn:
// boost[c] = exp(-boost_strength * (active_duty[c] - mean_duty))
// mean_duty is computed on the host (one reduction) and passed in.
extern "C" __global__
void sp_duty_update(
const unsigned char * __restrict__ active_mask, // (n_columns,)
const unsigned int * __restrict__ raw_overlap, // (n_columns,)
float * __restrict__ active_duty, // (n_columns,) in-place
float * __restrict__ overlap_duty, // (n_columns,) in-place
float * __restrict__ boost, // (n_columns,) in-place
float alpha,
float stim_thr,
float boost_strength, // 0 to skip boost
float mean_duty,
unsigned int learn_flag, // 0 or 1
unsigned int n_columns
) {
unsigned int c = blockIdx.x * blockDim.x + threadIdx.x;
if (c >= n_columns) return;
float ad = active_duty[c];
float od = overlap_duty[c];
float a_sample = (active_mask[c] != 0) ? 1.0f : 0.0f;
float o_sample = ((float)raw_overlap[c] >= stim_thr) ? 1.0f : 0.0f;
ad = (1.0f - alpha) * ad + alpha * a_sample;
od = (1.0f - alpha) * od + alpha * o_sample;
active_duty[c] = ad;
overlap_duty[c] = od;
if (learn_flag && boost_strength > 0.0f) {
boost[c] = expf(-boost_strength * (ad - mean_duty));
}
}