File size: 6,747 Bytes
17a649b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
//! pyo3 bindings for HTMRegion (Numenta BAMI-spec HTM).
//!
//! Exposed class:
//!     HTMRegion(input_bits, n_columns, cells_per_column, seed) -> HTMRegion
//!       .step(input_sdr: np.ndarray[bool; input_bits], learn: bool = True)
//!           -> (active_columns: np.ndarray[bool; n_columns],
//!               active_cells:   np.ndarray[bool; n_columns*cells_per_column],
//!               predicted_cells:np.ndarray[bool; n_columns*cells_per_column],
//!               anomaly: float)
//!       .reset()
//!       .n_columns -> int
//!       .cells_per_column -> int
//!       .input_bits -> int
//!
//! GIL is dropped during the heavy compute via `py.allow_threads(...)` so the
//! region is effectively `Send` for Python-side threading.

// pyo3 0.22 `#[pymethods]` expansion inserts an implicit `.into()` on the
// returned `Result` to normalise the error type, which clippy reports as
// `useless_conversion` when our methods already return `PyErr`. The emitted
// code sits outside the user-written impl, so item-level allows don't reach
// it; the module-wide allow is the documented workaround.
#![allow(clippy::useless_conversion)]

mod region;
mod sp;
mod tm;

#[cfg(feature = "gpu")]
mod gpu;

use numpy::{
    IntoPyArray, PyArray1, PyArray2, PyArrayMethods, PyReadonlyArray1, PyReadonlyArray2,
    PyUntypedArrayMethods,
};
use pyo3::prelude::*;

use crate::region::HTMRegionCore;

/// Result of one HTM step: (active_columns, active_cells, predicted_cells, anomaly).
type StepOutput<'py> = (
    Bound<'py, PyArray1<bool>>,
    Bound<'py, PyArray1<bool>>,
    Bound<'py, PyArray1<bool>>,
    f32,
);

#[pyclass(module = "htm_rust")]
pub struct HTMRegion {
    core: HTMRegionCore,
}

#[pymethods]
impl HTMRegion {
    /// Create a new HTM region.
    ///
    /// Args:
    ///     input_bits: length of binary input SDR
    ///     n_columns: number of mini-columns in the SP (e.g. 2048)
    ///     cells_per_column: cells per column in the TM (e.g. 32)
    ///     seed: RNG seed for reproducibility
    #[new]
    #[pyo3(signature = (input_bits, n_columns, cells_per_column, seed=42))]
    fn new(
        input_bits: usize,
        n_columns: usize,
        cells_per_column: usize,
        seed: u64,
    ) -> PyResult<Self> {
        if input_bits == 0 {
            return Err(pyo3::exceptions::PyValueError::new_err(
                "input_bits must be > 0",
            ));
        }
        if n_columns == 0 {
            return Err(pyo3::exceptions::PyValueError::new_err(
                "n_columns must be > 0",
            ));
        }
        if cells_per_column == 0 {
            return Err(pyo3::exceptions::PyValueError::new_err(
                "cells_per_column must be > 0",
            ));
        }
        Ok(Self {
            core: HTMRegionCore::new(input_bits, n_columns, cells_per_column, seed),
        })
    }

    #[getter]
    fn input_bits(&self) -> usize { self.core.sp.cfg.input_bits }

    #[getter]
    fn n_columns(&self) -> usize { self.core.sp.cfg.n_columns }

    #[getter]
    fn cells_per_column(&self) -> usize { self.core.tm.cfg.cells_per_column }

    /// Process one timestep.
    ///
    /// Args:
    ///     input_sdr: 1-D numpy boolean array of length `input_bits`.
    ///     learn: if True, update SP permanences and TM synapses.
    ///
    /// Returns:
    ///     (active_columns, active_cells, predicted_cells, anomaly)
    #[pyo3(signature = (input_sdr, learn=true))]
    fn step<'py>(
        &mut self,
        py: Python<'py>,
        input_sdr: PyReadonlyArray1<'py, bool>,
        learn: bool,
    ) -> PyResult<StepOutput<'py>> {
        let expected = self.core.sp.cfg.input_bits;
        let slice = input_sdr.as_slice()?;
        let got = slice.len();
        if got != expected {
            return Err(pyo3::exceptions::PyValueError::new_err(format!(
                "input_sdr length {got} != expected input_bits {expected}",
            )));
        }

        // Copy input to an owned Vec so we can drop the GIL.
        let input_vec: Vec<bool> = slice.to_vec();

        let (active_cols, active_cells, predicted_cells, anomaly) =
            py.allow_threads(|| self.core.step(&input_vec, learn));

        let a: Bound<'py, PyArray1<bool>> = active_cols.into_pyarray_bound(py);
        let c: Bound<'py, PyArray1<bool>> = active_cells.into_pyarray_bound(py);
        let p: Bound<'py, PyArray1<bool>> = predicted_cells.into_pyarray_bound(py);
        Ok((a, c, p, anomaly))
    }

    /// Clear TM predictive state. Does NOT unlearn synapses.
    fn reset(&mut self) { self.core.reset(); }

    /// Process T timesteps from a `(T, input_bits)` bool ndarray.
    ///
    /// Returns:
    ///     cols: (T, n_columns) float32 0/1 active-column mask
    ///     anom: (T,) float32 anomaly scores
    ///
    /// Single GIL release for the whole pass, avoiding T × Python-call overhead.
    #[pyo3(signature = (inputs, learn=true))]
    fn step_many<'py>(
        &mut self,
        py: Python<'py>,
        inputs: PyReadonlyArray2<'py, bool>,
        learn: bool,
    ) -> PyResult<(Bound<'py, PyArray2<f32>>, Bound<'py, PyArray1<f32>>)> {
        let shape = inputs.shape();
        if shape.len() != 2 {
            return Err(pyo3::exceptions::PyValueError::new_err(
                "inputs must be 2-D (T, input_bits)",
            ));
        }
        let t = shape[0];
        let bits = shape[1];
        let expected = self.core.sp.cfg.input_bits;
        if bits != expected {
            return Err(pyo3::exceptions::PyValueError::new_err(format!(
                "inputs last dim {bits} != expected input_bits {expected}",
            )));
        }
        let slice = inputs.as_slice()?;
        let n_cols = self.core.sp.cfg.n_columns;

        // Own the input buffer so we can drop the GIL.
        let input_vec: Vec<bool> = slice.to_vec();

        let (cols_u8, anom) =
            py.allow_threads(|| self.core.step_many(&input_vec, bits, t, learn));

        // Convert u8 mask to f32 for direct numpy consumption.
        let cols_f32: Vec<f32> = cols_u8.iter().map(|&b| b as f32).collect();

        // Build (T, n_cols) and (T,) arrays.
        let cols_arr =
            numpy::PyArray1::from_vec_bound(py, cols_f32)
                .reshape([t, n_cols])
                .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
        let anom_arr = numpy::PyArray1::from_vec_bound(py, anom);
        Ok((cols_arr, anom_arr))
    }
}

/// Python module entry point.
#[pymodule]
fn htm_rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<HTMRegion>()?;
    #[cfg(feature = "gpu")]
    {
        gpu::register(m)?;
    }
    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
    Ok(())
}