File size: 4,003 Bytes
0db8564
 
 
 
50b1c2c
0db8564
 
 
 
 
 
 
 
 
 
50b1c2c
0db8564
50b1c2c
 
0db8564
 
50b1c2c
 
0db8564
 
 
 
 
 
 
50b1c2c
0db8564
 
 
 
50b1c2c
0db8564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50b1c2c
 
 
 
0db8564
 
 
 
 
 
 
 
 
 
 
 
50b1c2c
0db8564
 
 
 
 
50b1c2c
0db8564
 
 
50b1c2c
0db8564
 
50b1c2c
0db8564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use pyo3::prelude::*;
use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::*;
use tantivy::{doc, Index, ReloadPolicy};
use walkdir::WalkDir;
use std::path::PathBuf;
use std::fs;

#[pyfunction]
fn index_directory(index_path: String, repo_path: String) -> PyResult<u64> {
    let mut schema_builder = Schema::builder();
    let filepath = schema_builder.add_text_field("filepath", STRING | STORED);
    let content = schema_builder.add_text_field("content", TEXT);
    let schema = schema_builder.build();
    let index_path_buf = PathBuf::from(&index_path);

    let index = if index_path_buf.exists() {
        Index::open_in_dir(&index_path_buf)
            .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?
    } else {
        std::fs::create_dir_all(&index_path_buf)?;
        Index::create_in_dir(&index_path_buf, schema.clone())
            .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?
    };

    let mut writer = index.writer(50_000_000)
        .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;

    let mut count: u64 = 0;
    for entry in WalkDir::new(&repo_path).into_iter().filter_map(|e| e.ok()) {
        let path = entry.path();
        if path.is_file() {
            if let Some(ext) = path.extension() {
                let ext = ext.to_string_lossy().to_lowercase();
                if matches!(ext.as_str(), "png"|"jpg"|"jpeg"|"gif"|"svg"|"ico"|"woff"|"woff2"|"ttf"|"eot"|"otf") {
                    continue;
                }
            }
            if let Ok(text) = fs::read_to_string(path) {
                let rel_path = path.strip_prefix(&repo_path)
                    .unwrap_or(path)
                    .to_string_lossy()
                    .to_string();
                writer.add_document(doc!(
                    filepath => rel_path,
                    content => text,
                )).map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
                count += 1;
            }
        }
    }

    writer.commit()
        .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;

    Ok(count)
}

#[pyfunction]
fn search(index_path: String, query: String) -> PyResult<Vec<String>> {
    let index = Index::open_in_dir(&index_path)
        .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
    let reader = index.reader_builder()
        .reload_policy(ReloadPolicy::OnCommitWithDelay)
        .try_into()
        .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
    let searcher = reader.searcher();

    let schema = index.schema();
    let content_field = schema.get_field("content")
        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;
    let filepath_field = schema.get_field("filepath")
        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;

    let query_parser = QueryParser::for_index(&index, vec![content_field]);
    let query = query_parser.parse_query(&query)
        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;

    let top_docs = searcher.search(&query, &TopDocs::with_limit(20))
        .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;

    let mut results = Vec::new();
    for (_score, doc_address) in top_docs {
        let doc = searcher.doc::<TantivyDocument>(doc_address)
            .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
        if let Some(path) = doc.get_first(filepath_field) {
            if let Some(text) = path.as_str() {
                results.push(text.to_string());
            }
        }
    }
    Ok(results)
}

#[pymodule]
fn uspoo_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_function(wrap_pyfunction!(index_directory, m)?)?;
    m.add_function(wrap_pyfunction!(search, m)?)?;
    Ok(())
}