use pyo3::prelude::*; #[cfg(target_os = "windows")] #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; mod turboquant; mod sq8; mod pq; mod baselines; #[pymodule] fn tq_native_lib(m: &Bound<'_, PyModule>) -> PyResult<()> { // Threading: // - Default: use OS-reported available parallelism // - Override: set env var TQ_RAYON_THREADS (e.g. "8") // NOTE: build_global() can only be called once; ignore error if already initialized. let threads = 2* std::env::var("TQ_RAYON_THREADS") .ok() .and_then(|v| v.parse::().ok()) .or_else(|| std::thread::available_parallelism().ok().map(|n| n.get())) .unwrap_or(0); let mut b = rayon::ThreadPoolBuilder::new(); if threads > 0 { b = b.num_threads(threads); } let _ = b.build_global(); // NEW CORE: Scalar Quantization (b-1) + QJL (1) m.add_function(wrap_pyfunction!(turboquant::tq_scan, m)?)?; m.add_function(wrap_pyfunction!(turboquant::tq_batch_scan, m)?)?; m.add_function(wrap_pyfunction!(turboquant::tq_ivf_online_scan, m)?)?; m.add_function(wrap_pyfunction!(turboquant::tq_ivf_scan_with_clusters, m)?)?; m.add_function(wrap_pyfunction!(turboquant::tq_quantize_rotated, m)?)?; m.add_function(wrap_pyfunction!(turboquant::tq_kmeans_train, m)?)?; m.add_function(wrap_pyfunction!(turboquant::tq_assign_clusters, m)?)?; m.add_function(wrap_pyfunction!(turboquant::tq_unified_search, m)?)?; // Baseline comparisons (New names for stress_5m.py) m.add_function(wrap_pyfunction!(baselines::sq_scan, m)?)?; m.add_function(wrap_pyfunction!(baselines::pq_scan, m)?)?; // Original names m.add_function(wrap_pyfunction!(sq8::sq8_score_simd, m)?)?; m.add_function(wrap_pyfunction!(pq::pq_score_simd, m)?)?; Ok(()) }