FOIA_Doc_Search / ingest /semantic.py
GodsDevProject's picture
Create ingest/semantic.py
9743c80 verified
import numpy as np
try:
import faiss
from sentence_transformers import SentenceTransformer
except ImportError:
faiss = None
SentenceTransformer = None
_model = None
def semantic_available():
return faiss is not None and SentenceTransformer is not None
def _get_model():
global _model
if _model is None:
_model = SentenceTransformer("all-MiniLM-L6-v2")
return _model
def semantic_refine(query, results, k=5):
"""
In-memory, ephemeral semantic refinement.
No persistence, no external calls.
"""
if not semantic_available() or not results:
return results
texts = [r.get("snippet", "") for r in results]
model = _get_model()
embeddings = model.encode(texts, normalize_embeddings=True)
index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(np.array(embeddings))
q_emb = model.encode([query], normalize_embeddings=True)
_, idx = index.search(np.array(q_emb), min(k, len(results)))
return [results[i] for i in idx[0]]