| |
|
|
| struct TmConfig { |
| unsigned int activation_threshold; |
| unsigned int learning_threshold; |
| unsigned int cells_per_column; |
| unsigned int synapses_per_segment; |
| unsigned int n_segments; |
| unsigned int n_cells; |
| unsigned int max_segments_per_cell; |
| unsigned int max_new_synapses; |
| int conn_thr_i16; |
| int perm_inc_i16; |
| int perm_dec_i16; |
| int predicted_seg_dec_i16; |
| int initial_perm_i16; |
| unsigned int iter_seed; |
| unsigned int n_cols; |
| unsigned int bits_words; |
| }; |
|
|
| extern "C" __global__ |
| void tm_activate( |
| const unsigned char * __restrict__ sp_active_mask, |
| const unsigned char * __restrict__ col_predicted, |
| const unsigned int * __restrict__ cell_predictive_bits, |
| unsigned int * __restrict__ cell_active_bits, |
| unsigned int * __restrict__ cell_winner_bits, |
| unsigned int * __restrict__ unpredicted_count, |
| unsigned int * __restrict__ burst_cols_flat, |
| unsigned int * __restrict__ burst_cols_count, |
| TmConfig cfg |
| ) { |
| unsigned int col = blockIdx.x * blockDim.x + threadIdx.x; |
| if (col >= cfg.n_cols) return; |
| if (sp_active_mask[col] == 0) return; |
|
|
| unsigned int base_cell = col * cfg.cells_per_column; |
|
|
| if (col_predicted[col]) { |
| for (unsigned int k = 0; k < cfg.cells_per_column; k++) { |
| unsigned int cell = base_cell + k; |
| unsigned int word_idx = cell >> 5; |
| unsigned int bit_mask = 1u << (cell & 31u); |
| unsigned int pred_word = cell_predictive_bits[word_idx]; |
| if (pred_word & bit_mask) { |
| atomicOr(&cell_active_bits[word_idx], bit_mask); |
| atomicOr(&cell_winner_bits[word_idx], bit_mask); |
| } |
| } |
| } else { |
| atomicAdd(unpredicted_count, 1u); |
| for (unsigned int k = 0; k < cfg.cells_per_column; k++) { |
| unsigned int cell = base_cell + k; |
| unsigned int word_idx = cell >> 5; |
| unsigned int bit_mask = 1u << (cell & 31u); |
| atomicOr(&cell_active_bits[word_idx], bit_mask); |
| } |
| unsigned int winner = base_cell; |
| unsigned int word_idx = winner >> 5; |
| unsigned int bit_mask = 1u << (winner & 31u); |
| atomicOr(&cell_winner_bits[word_idx], bit_mask); |
| unsigned int slot = atomicAdd(burst_cols_count, 1u); |
| burst_cols_flat[slot] = col; |
| } |
| } |
|
|