Create src/lib.rs
Browse files- src/lib.rs +107 -0
src/lib.rs
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
use pyo3::prelude::*;
|
| 2 |
+
use tantivy::collector::TopDocs;
|
| 3 |
+
use tantivy::query::QueryParser;
|
| 4 |
+
use tantivy::schema::*;
|
| 5 |
+
use tantivy::{doc, Index, IndexWriter, ReloadPolicy};
|
| 6 |
+
use walkdir::WalkDir;
|
| 7 |
+
use std::path::PathBuf;
|
| 8 |
+
use std::fs;
|
| 9 |
+
use std::sync::Mutex;
|
| 10 |
+
|
| 11 |
+
/// يقوم بإنشاء الفهرس أو فتحه، ثم فهرسة جميع الملفات النصية داخل مسار معين.
|
| 12 |
+
#[pyfunction]
|
| 13 |
+
fn index_directory(index_path: String, repo_path: String) -> PyResult<u64> {
|
| 14 |
+
// إعداد هيكل الفهرس
|
| 15 |
+
let mut schema_builder = Schema::builder();
|
| 16 |
+
let filepath = schema_builder.add_text_field("filepath", STRING | STORED);
|
| 17 |
+
let content = schema_builder.add_text_field("content", TEXT);
|
| 18 |
+
let schema = schema_builder.build();
|
| 19 |
+
let index_path = PathBuf::from(&index_path);
|
| 20 |
+
|
| 21 |
+
// إنشاء أو فتح الفهرس
|
| 22 |
+
let index = if index_path.exists() {
|
| 23 |
+
Index::open_in_dir(&index_path)
|
| 24 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?
|
| 25 |
+
} else {
|
| 26 |
+
fs::create_dir_all(&index_path)?;
|
| 27 |
+
Index::create_in_dir(&index_path, schema.clone())
|
| 28 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?
|
| 29 |
+
};
|
| 30 |
+
|
| 31 |
+
// ضبط إعدادات الكتابة
|
| 32 |
+
let mut writer = index.writer(50_000_000)
|
| 33 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
|
| 34 |
+
|
| 35 |
+
let mut count: u64 = 0;
|
| 36 |
+
|
| 37 |
+
// المرور على جميع الملفات في المسار المحدد
|
| 38 |
+
for entry in WalkDir::new(&repo_path)
|
| 39 |
+
.into_iter()
|
| 40 |
+
.filter_map(|e| e.ok())
|
| 41 |
+
{
|
| 42 |
+
let path = entry.path();
|
| 43 |
+
if path.is_file() {
|
| 44 |
+
// تجاهل الملفات غير النصية والمجلدات المخفية
|
| 45 |
+
if let Some(ext) = path.extension() {
|
| 46 |
+
let ext = ext.to_string_lossy().to_lowercase();
|
| 47 |
+
if matches!(ext.as_str(), "png" | "jpg" | "jpeg" | "gif" | "svg" | "ico" | "woff" | "woff2" | "ttf" | "eot" | "otf") {
|
| 48 |
+
continue;
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
if let Ok(text) = fs::read_to_string(path) {
|
| 52 |
+
let rel_path = path.strip_prefix(&repo_path)
|
| 53 |
+
.unwrap_or(path)
|
| 54 |
+
.to_string_lossy()
|
| 55 |
+
.to_string();
|
| 56 |
+
writer.add_document(doc!(
|
| 57 |
+
filepath => rel_path,
|
| 58 |
+
content => text,
|
| 59 |
+
)).map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
|
| 60 |
+
count += 1;
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
writer.commit()?;
|
| 65 |
+
Ok(count)
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
/// ينفذ استعلام بحث على الفهرس ويعيد أفضل 20 نتيجة.
|
| 69 |
+
#[pyfunction]
|
| 70 |
+
fn search(index_path: String, query: String) -> PyResult<Vec<String>> {
|
| 71 |
+
let index = Index::open_in_dir(&index_path)
|
| 72 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
|
| 73 |
+
let reader = index.reader_builder()
|
| 74 |
+
.reload_policy(ReloadPolicy::OnCommitWithDelay)
|
| 75 |
+
.try_into()
|
| 76 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
|
| 77 |
+
let searcher = reader.searcher();
|
| 78 |
+
let schema = index.schema();
|
| 79 |
+
let content_field = schema.get_field("content")
|
| 80 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;
|
| 81 |
+
let filepath_field = schema.get_field("filepath")
|
| 82 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;
|
| 83 |
+
let query_parser = QueryParser::for_index(&index, vec![content_field]);
|
| 84 |
+
let query = query_parser.parse_query(&query)
|
| 85 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;
|
| 86 |
+
let top_docs = searcher.search(&query, &TopDocs::with_limit(20))
|
| 87 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
|
| 88 |
+
let mut results = Vec::new();
|
| 89 |
+
for (_score, doc_address) in top_docs {
|
| 90 |
+
let doc = searcher.doc::<TantivyDocument>(doc_address)
|
| 91 |
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))?;
|
| 92 |
+
if let Some(path) = doc.get_first(filepath_field) {
|
| 93 |
+
if let Some(text) = path.as_str() {
|
| 94 |
+
results.push(text.to_string());
|
| 95 |
+
}
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
Ok(results)
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
/// وحدة Python تُصدر دوال Rust.
|
| 102 |
+
#[pymodule]
|
| 103 |
+
fn uspoo_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
| 104 |
+
m.add_function(wrap_pyfunction!(index_directory, m)?)?;
|
| 105 |
+
m.add_function(wrap_pyfunction!(search, m)?)?;
|
| 106 |
+
Ok(())
|
| 107 |
+
}
|