cargo / src /lib.rs
lljz66's picture
Update src/lib.rs
50b1c2c verified
use pyo3::prelude::*;
use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::*;
use tantivy::{doc, Index, ReloadPolicy};
use walkdir::WalkDir;
use std::path::PathBuf;
use std::fs;
#[pyfunction]
fn index_directory(index_path: String, repo_path: String) -> PyResult<u64> {
let mut schema_builder = Schema::builder();
let filepath = schema_builder.add_text_field("filepath", STRING | STORED);
let content = schema_builder.add_text_field("content", TEXT);
let schema = schema_builder.build();
let index_path_buf = PathBuf::from(&index_path);
let index = if index_path_buf.exists() {
Index::open_in_dir(&index_path_buf)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?
} else {
std::fs::create_dir_all(&index_path_buf)?;
Index::create_in_dir(&index_path_buf, schema.clone())
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?
};
let mut writer = index.writer(50_000_000)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
let mut count: u64 = 0;
for entry in WalkDir::new(&repo_path).into_iter().filter_map(|e| e.ok()) {
let path = entry.path();
if path.is_file() {
if let Some(ext) = path.extension() {
let ext = ext.to_string_lossy().to_lowercase();
if matches!(ext.as_str(), "png"|"jpg"|"jpeg"|"gif"|"svg"|"ico"|"woff"|"woff2"|"ttf"|"eot"|"otf") {
continue;
}
}
if let Ok(text) = fs::read_to_string(path) {
let rel_path = path.strip_prefix(&repo_path)
.unwrap_or(path)
.to_string_lossy()
.to_string();
writer.add_document(doc!(
filepath => rel_path,
content => text,
)).map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
count += 1;
}
}
}
writer.commit()
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
Ok(count)
}
#[pyfunction]
fn search(index_path: String, query: String) -> PyResult<Vec<String>> {
let index = Index::open_in_dir(&index_path)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
let reader = index.reader_builder()
.reload_policy(ReloadPolicy::OnCommitWithDelay)
.try_into()
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
let searcher = reader.searcher();
let schema = index.schema();
let content_field = schema.get_field("content")
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;
let filepath_field = schema.get_field("filepath")
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;
let query_parser = QueryParser::for_index(&index, vec![content_field]);
let query = query_parser.parse_query(&query)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;
let top_docs = searcher.search(&query, &TopDocs::with_limit(20))
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
let mut results = Vec::new();
for (_score, doc_address) in top_docs {
let doc = searcher.doc::<TantivyDocument>(doc_address)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
if let Some(path) = doc.get_first(filepath_field) {
if let Some(text) = path.as_str() {
results.push(text.to_string());
}
}
}
Ok(results)
}
#[pymodule]
fn uspoo_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(index_directory, m)?)?;
m.add_function(wrap_pyfunction!(search, m)?)?;
Ok(())
}