use pyo3::prelude::*; use tantivy::collector::TopDocs; use tantivy::query::QueryParser; use tantivy::schema::*; use tantivy::{doc, Index, ReloadPolicy}; use walkdir::WalkDir; use std::path::PathBuf; use std::fs; #[pyfunction] fn index_directory(index_path: String, repo_path: String) -> PyResult { let mut schema_builder = Schema::builder(); let filepath = schema_builder.add_text_field("filepath", STRING | STORED); let content = schema_builder.add_text_field("content", TEXT); let schema = schema_builder.build(); let index_path_buf = PathBuf::from(&index_path); let index = if index_path_buf.exists() { Index::open_in_dir(&index_path_buf) .map_err(|e| PyErr::new::(format!("{}", e)))? } else { std::fs::create_dir_all(&index_path_buf)?; Index::create_in_dir(&index_path_buf, schema.clone()) .map_err(|e| PyErr::new::(format!("{}", e)))? }; let mut writer = index.writer(50_000_000) .map_err(|e| PyErr::new::(format!("{}", e)))?; let mut count: u64 = 0; for entry in WalkDir::new(&repo_path).into_iter().filter_map(|e| e.ok()) { let path = entry.path(); if path.is_file() { if let Some(ext) = path.extension() { let ext = ext.to_string_lossy().to_lowercase(); if matches!(ext.as_str(), "png"|"jpg"|"jpeg"|"gif"|"svg"|"ico"|"woff"|"woff2"|"ttf"|"eot"|"otf") { continue; } } if let Ok(text) = fs::read_to_string(path) { let rel_path = path.strip_prefix(&repo_path) .unwrap_or(path) .to_string_lossy() .to_string(); writer.add_document(doc!( filepath => rel_path, content => text, )).map_err(|e| PyErr::new::(format!("{}", e)))?; count += 1; } } } writer.commit() .map_err(|e| PyErr::new::(format!("{}", e)))?; Ok(count) } #[pyfunction] fn search(index_path: String, query: String) -> PyResult> { let index = Index::open_in_dir(&index_path) .map_err(|e| PyErr::new::(format!("{}", e)))?; let reader = index.reader_builder() .reload_policy(ReloadPolicy::OnCommitWithDelay) .try_into() .map_err(|e| PyErr::new::(format!("{}", e)))?; let searcher = reader.searcher(); let schema = index.schema(); let content_field = schema.get_field("content") .map_err(|e| PyErr::new::(format!("{}", e)))?; let filepath_field = schema.get_field("filepath") .map_err(|e| PyErr::new::(format!("{}", e)))?; let query_parser = QueryParser::for_index(&index, vec![content_field]); let query = query_parser.parse_query(&query) .map_err(|e| PyErr::new::(format!("{}", e)))?; let top_docs = searcher.search(&query, &TopDocs::with_limit(20)) .map_err(|e| PyErr::new::(format!("{}", e)))?; let mut results = Vec::new(); for (_score, doc_address) in top_docs { let doc = searcher.doc::(doc_address) .map_err(|e| PyErr::new::(format!("{}", e)))?; if let Some(path) = doc.get_first(filepath_field) { if let Some(text) = path.as_str() { results.push(text.to_string()); } } } Ok(results) } #[pymodule] fn uspoo_core(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(index_directory, m)?)?; m.add_function(wrap_pyfunction!(search, m)?)?; Ok(()) }