Spaces:
Running
Running
| import onnxruntime as ort | |
| import re | |
| import numpy as np | |
| from transformers import AutoTokenizer | |
| from functools import lru_cache | |
| import multiprocessing | |
| # ============================== | |
| # إعداد النموذج والتوكنيزر - محسّن | |
| # ============================== | |
| MODEL_PATH = "lib/intfloat_multilingual-e5-small_merged_int8.onnx" | |
| TOKENIZER_PATH = "./lib/" | |
| # تحسين جلسة ONNX | |
| session_options = ort.SessionOptions() | |
| session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL | |
| session_options.enable_cpu_mem_arena = True | |
| session_options.intra_op_num_threads = multiprocessing.cpu_count() # استخدام كل أنوية السيرفر | |
| session_options.inter_op_num_threads = 1 | |
| session_options.optimized_model_filepath = "optimized_model.onnx" | |
| session = ort.InferenceSession( | |
| MODEL_PATH, | |
| providers=[('CPUExecutionProvider', {})], | |
| sess_options=session_options | |
| ) | |
| # تحميل التوكنيزر مع التخزين المؤقت | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| TOKENIZER_PATH, | |
| local_files_only=True, | |
| use_fast=True | |
| ) | |
| # ============================== | |
| # دوال مساعدة - محسّنة | |
| # ============================== | |
| def normalize_arabic(text: str) -> str: | |
| """تطبيع النص العربي مع تخزين مؤقت""" | |
| text = re.sub(r'[ًٌٍَُِّْـ]', '', text) | |
| text = re.sub(r'[إأآ]', 'ا', text) | |
| text = re.sub(r'ى', 'ي', text) | |
| text = re.sub(r'ؤ', 'و', text) | |
| text = re.sub(r'ئ', 'ي', text) | |
| text = re.sub(r'ة\b', 'ه', text) | |
| text = re.sub(r'[^\w\s]', ' ', text) | |
| text = re.sub(r'\s+', ' ', text) | |
| return text.strip() | |
| # ============================== | |
| # دالة Embedding المحسّنة | |
| # ============================== | |
| def query_to_embedding(query: str, normalize: bool = True) -> np.ndarray: | |
| """تحويل الاستعلام إلى embedding بكفاءة عالية""" | |
| if not query or not query.strip(): | |
| return None | |
| query = query.strip() | |
| if normalize: | |
| query = normalize_arabic(query) | |
| # بما أن الاستعلام قصير (≤ 15 كلمة) نستخدم max_length صغير | |
| inputs = tokenizer( | |
| "query: " + query, | |
| return_tensors="np", | |
| truncation=True, | |
| padding="max_length", | |
| max_length=64, # كافي للاستعلامات القصيرة | |
| return_attention_mask=True, | |
| return_token_type_ids=False | |
| ) | |
| # تشغيل النموذج | |
| ort_outs = session.run(None, dict(inputs)) | |
| vector = ort_outs[1][0] | |
| # تطبيع المتجه بكفاءة | |
| norm = np.linalg.norm(vector) | |
| if norm > 0: | |
| vector = vector / norm | |
| return vector.astype(np.float32) |