File size: 1,865 Bytes
8124364 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | """
Libra IR - Entity Information Retrieval Module
A standalone IR module for entity retrieval with sensitive topic detection.
Optimized for Precision, Recall, and Latency.
Architecture Levels:
- Level 3 (Current): Alias Lookup + BM25 + Reranking + Sensitive Detection
- Level 4 (Planned): + Dense Retrieval (Embeddings + FAISS)
- Level 5 (Planned): + Hybrid Fusion (BM25 + Dense + RRF)
- Level 6 (Planned): + LLM Verification
Usage:
from libra_shield.ir import EntityRetriever
retriever = EntityRetriever()
result = retriever.retrieve("Tell me about MBZ")
# Check for sensitive topics
if result.is_sensitive:
print(f"Sensitive: {result.reframe_guidance}")
for r in result.entities:
print(f"{r.entity.name}: {r.score:.2f}")
"""
from .knowledge_base import KnowledgeBase
from .models import (
BenchmarkResult,
Entity,
RetrievalConfig,
RetrievalResult,
)
from .normalizer import ArabicNameNormalizer, get_normalizer
from .reranker import EntityReranker, rerank_results
from .retriever import EntityRetriever, EnhancedAliasRetriever, RetrievalOutput
from .retrievers.alias import AliasRetriever
from .retrievers.bm25 import BM25Retriever
from .sensitive_detector import SensitiveMatch, SensitiveTopicDetector
__all__ = [
# Main entry point
"EntityRetriever",
"RetrievalOutput",
# Knowledge Base
"KnowledgeBase",
# Sensitive Detection
"SensitiveTopicDetector",
"SensitiveMatch",
# Core components
"ArabicNameNormalizer",
"get_normalizer",
# Retrievers (internal, for advanced usage)
"EnhancedAliasRetriever",
"AliasRetriever",
"BM25Retriever",
# Reranker
"EntityReranker",
"rerank_results",
# Models
"Entity",
"RetrievalResult",
"RetrievalConfig",
"BenchmarkResult",
]
|