// TM activate kernel. See tm_predict.cu for TmConfig. struct TmConfig { unsigned int activation_threshold; unsigned int learning_threshold; unsigned int cells_per_column; unsigned int synapses_per_segment; unsigned int n_segments; unsigned int n_cells; unsigned int max_segments_per_cell; unsigned int max_new_synapses; int conn_thr_i16; int perm_inc_i16; int perm_dec_i16; int predicted_seg_dec_i16; int initial_perm_i16; unsigned int iter_seed; unsigned int n_cols; unsigned int bits_words; }; extern "C" __global__ void tm_activate( const unsigned char * __restrict__ sp_active_mask, const unsigned char * __restrict__ col_predicted, const unsigned int * __restrict__ cell_predictive_bits, unsigned int * __restrict__ cell_active_bits, unsigned int * __restrict__ cell_winner_bits, unsigned int * __restrict__ unpredicted_count, unsigned int * __restrict__ burst_cols_flat, unsigned int * __restrict__ burst_cols_count, TmConfig cfg ) { unsigned int col = blockIdx.x * blockDim.x + threadIdx.x; if (col >= cfg.n_cols) return; if (sp_active_mask[col] == 0) return; unsigned int base_cell = col * cfg.cells_per_column; if (col_predicted[col]) { for (unsigned int k = 0; k < cfg.cells_per_column; k++) { unsigned int cell = base_cell + k; unsigned int word_idx = cell >> 5; unsigned int bit_mask = 1u << (cell & 31u); unsigned int pred_word = cell_predictive_bits[word_idx]; if (pred_word & bit_mask) { atomicOr(&cell_active_bits[word_idx], bit_mask); atomicOr(&cell_winner_bits[word_idx], bit_mask); } } } else { atomicAdd(unpredicted_count, 1u); for (unsigned int k = 0; k < cfg.cells_per_column; k++) { unsigned int cell = base_cell + k; unsigned int word_idx = cell >> 5; unsigned int bit_mask = 1u << (cell & 31u); atomicOr(&cell_active_bits[word_idx], bit_mask); } unsigned int winner = base_cell; unsigned int word_idx = winner >> 5; unsigned int bit_mask = 1u << (winner & 31u); atomicOr(&cell_winner_bits[word_idx], bit_mask); unsigned int slot = atomicAdd(burst_cols_count, 1u); burst_cols_flat[slot] = col; } }