Knowledge-Universe / src /scoring /coverage_confidence.py
vlsiddarth's picture
fixed: crawlers and routes
4d5254b
"""
Knowledge Universe — Coverage Confidence Score (Blend Mode — John's Optimization)
==================================================================================
JOHN'S OPTIMIZATION:
Added compute_from_embeddings() — accepts pre-computed embeddings from the
reranker so we never call model.encode() twice in the same request.
Before: 2x model.encode() = ~800ms overhead
After: 1x model.encode() + cosine similarity = ~10ms overhead
The API is unchanged. compute() still works for standalone use.
compute_from_embeddings() is the fast path used by request_handler.
"""
import logging
from typing import List, Dict, Any, Optional
logger = logging.getLogger(__name__)
_HIGH_CONFIDENCE = 0.65
_MEDIUM_CONFIDENCE = 0.45
_ACRONYM_EXPANSIONS = {
"rlhf": "reinforcement learning from human feedback",
"llm": "large language model",
"rag": "retrieval augmented generation",
"nlp": "natural language processing",
"gnn": "graph neural network",
"cnn": "convolutional neural network",
"rnn": "recurrent neural network",
"lstm": "long short-term memory",
"bert": "bidirectional encoder representations transformers",
"gpt": "generative pre-trained transformer",
"vae": "variational autoencoder",
"gan": "generative adversarial network",
"moe": "mixture of experts",
"peft": "parameter efficient fine tuning",
"lora": "low rank adaptation",
"dpo": "direct preference optimization",
"sft": "supervised fine tuning",
"mlops": "machine learning operations",
}
_QUERY_REFINEMENTS = {
"transformer architecture": [
"attention mechanism self-attention explained",
"transformer encoder decoder tutorial",
"attention is all you need paper explained",
],
"machine learning": [
"machine learning beginner tutorial python",
"supervised learning algorithms explained",
"machine learning fundamentals course",
],
"deep learning": [
"neural network backpropagation explained",
"deep learning pytorch tutorial beginner",
"convolutional neural network image classification",
],
"rlhf": [
"reinforcement learning from human feedback explained",
"reward model training language model",
"RLHF PPO implementation tutorial",
],
"mixture of experts": [
"sparse mixture of experts architecture explained",
"MoE gating network routing mechanism",
"mixture of experts transformer tutorial",
],
}
DOMAIN_QUERY_PATTERNS = {
# Physics/quantum — suggest arXiv-specific terms
("quantum", "entanglement", "qubit", "hamiltonian", "decoherence"): [
"{topic} arxiv review",
"{topic} lecture notes pdf",
"{topic} error analysis",
],
# ML frameworks — suggest code and docs
("langchain", "llamaindex", "langgraph", "haystack", "dspy"): [
"{topic} github example",
"{topic} documentation tutorial",
"{topic} cookbook python",
],
# RAG/retrieval — suggest implementation
("rag", "retrieval", "vector", "embedding", "chunking"): [
"{topic} implementation guide",
"{topic} evaluation metrics",
"production {topic} architecture",
],
# Finance/trading
("fintech", "trading", "portfolio", "risk", "quant"): [
"{topic} research paper 2025",
"{topic} python implementation",
"{topic} backtesting guide",
],
# Deep learning techniques
("transformer", "attention", "bert", "gpt", "llm", "fine-tuning"): [
"{topic} paper explained",
"{topic} implementation pytorch",
"{topic} benchmark comparison",
],
# MLOps/deployment
("mlops", "deployment", "inference", "serving", "vllm"): [
"{topic} production guide",
"{topic} kubernetes setup",
"{topic} optimization tutorial",
],
# Materials science / physics
("superconductor", "lk-99", "quantum", "material", "semiconductor",
"perovskite", "graphene", "topological"): [
"{topic} arxiv preprint 2024",
"{topic} experimental results review",
"{topic} physics mechanism explained",
],
# Neuroscience / biology
("neural", "neuron", "brain", "cortex", "synapse"): [
"{topic} research paper",
"{topic} computational model",
"{topic} biological mechanism",
],
}
class CoverageConfidenceScorer:
"""
Computes how well returned results match the query intent.
Fast path: compute_from_embeddings() — reuses embeddings from reranker.
Slow path: compute() — encodes from scratch (standalone use only).
"""
def __init__(self):
self._model = None
def _get_model(self):
if self._model is None:
try:
from src.integrations.shared_model import get_shared_model
self._model = get_shared_model()
except Exception as e:
logger.error(f"Failed to get shared model: {e}")
return None
return self._model
def compute_from_embeddings(
self,
query: str,
sources: List[Dict[str, Any]],
query_emb,
doc_embs,
top_k: int = 5,
) -> Dict[str, Any]:
"""
FAST PATH — John's optimization.
Uses pre-computed embeddings from the reranker.
Zero additional model.encode() calls.
Cost: ~5ms (just cosine similarity computation).
Args:
query: Original query string
sources: Result dicts (same order as doc_embs)
query_emb: Tensor from reranker's model.encode(query)
doc_embs: Tensor from reranker's model.encode(texts)
top_k: How many results to score
"""
if query_emb is None or doc_embs is None or not sources:
return self._no_results_response(query) if not sources else self._unavailable_response()
try:
from sentence_transformers import util
top_sources = sources[:top_k]
# doc_embs contains all documents — slice to top_k
top_embs = doc_embs[:top_k]
sims = util.cos_sim(query_emb, top_embs)[0]
sim_scores = [max(0.0, float(s)) for s in sims]
# Weighted average — top results count more
weights = [1.0 / (i + 1) for i in range(len(sim_scores))]
total_w = sum(weights)
confidence = sum(s * w for s, w in zip(sim_scores, weights)) / total_w
confidence = round(confidence, 3)
per_result = [
{
"title": top_sources[i].get("title", "")[:60],
"similarity": round(sim_scores[i], 3),
}
for i in range(len(sim_scores))
]
return self._build_response(query, confidence, per_result)
except Exception as e:
logger.error(f"Fast confidence scoring failed: {e}")
return self._unavailable_response()
def compute(
self,
query: str,
sources: List[Dict[str, Any]],
top_k: int = 5,
) -> Dict[str, Any]:
"""
SLOW PATH — standalone use only.
Encodes from scratch. Use compute_from_embeddings() in production.
"""
if not sources:
return self._no_results_response(query)
model = self._get_model()
if model is None:
return self._unavailable_response()
try:
from sentence_transformers import util
query_emb = model.encode(query, convert_to_tensor=True)
top_sources = sources[:top_k]
texts = [
f"{s.get('title', '')} {(s.get('summary') or '')[:200]}"
for s in top_sources
]
doc_embs = model.encode(texts, convert_to_tensor=True)
return self.compute_from_embeddings(
query, top_sources, query_emb, doc_embs, top_k
)
except Exception as e:
logger.error(f"Confidence scoring failed: {e}")
return self._unavailable_response()
def _build_response(
self,
query: str,
confidence: float,
per_result: List[Dict],
) -> Dict[str, Any]:
# Always generate suggestions — useful at all confidence levels
all_suggestions = self._suggest_queries(query)
if confidence >= _HIGH_CONFIDENCE:
label = "high"
warning = False
message = None
# Still provide 2 suggestions for query refinement even at high confidence
suggestions = all_suggestions[:2]
elif confidence >= _MEDIUM_CONFIDENCE:
label = "medium"
warning = False
message = (
"Results partially match your query. "
"Consider refining with more specific terms."
)
suggestions = all_suggestions[:3]
else:
label = "low"
warning = True
message = (
f"Low confidence ({confidence:.2f}) — results may not fully "
"match your query intent. Try the suggested queries below."
)
suggestions = all_suggestions[:3]
return {
"confidence": confidence,
"confidence_label": label,
"coverage_warning": warning,
"warning_message": message,
"suggested_queries": suggestions,
"top_result_similarities": per_result,
}
# ============================================================
# FEATURE 4: Domain-aware query refinement
# src/scoring/coverage_confidence.py
# Replace _suggest_queries() and add DOMAIN_QUERY_PATTERNS
# ============================================================
def _suggest_queries(self, query: str) -> List[str]:
"""
Domain-aware query refinement instead of generic 'explained/tutorial python'.
Matches domain patterns first, falls back to acronym expansion, then generic.
"""
suggestions = []
query_lower = query.lower().strip()
# 1. Explicit query refinements (highest priority)
for pattern, refinements in _QUERY_REFINEMENTS.items():
if pattern in query_lower:
suggestions.extend(refinements)
break
# 2. Domain-aware patterns
if not suggestions:
topic_words = set(query_lower.split())
for domain_keywords, patterns in DOMAIN_QUERY_PATTERNS.items():
if any(kw in query_lower for kw in domain_keywords):
# Extract the most meaningful topic phrase for {topic} substitution
topic = _extract_topic(query_lower, domain_keywords)
for pattern in patterns:
filled = pattern.format(topic=topic)
if filled not in suggestions:
suggestions.append(filled)
break
# 3. Acronym expansion
words = query_lower.split()
for word in words:
if word in _ACRONYM_EXPANSIONS:
expanded = _ACRONYM_EXPANSIONS[word]
expanded_query = query_lower.replace(word, expanded)
if expanded_query not in suggestions:
suggestions.append(expanded_query)
# 4. Generic fallback only if nothing matched
if not suggestions:
base = query_lower
suggestions = [
f"{base} tutorial 2025",
f"{base} implementation guide",
f"introduction to {base}",
]
seen = {query_lower}
unique = []
for s in suggestions:
if s not in seen:
seen.add(s)
unique.append(s)
return unique[:3]
def _no_results_response(self, query: str) -> Dict[str, Any]:
return {
"confidence": 0.0,
"confidence_label": "none",
"coverage_warning": True,
"warning_message": "No results returned for this query.",
"suggested_queries": self._suggest_queries(query),
"top_result_similarities": [],
}
def _unavailable_response(self) -> Dict[str, Any]:
return {
"confidence": None,
"confidence_label": "unavailable",
"coverage_warning": False,
"warning_message": None,
"suggested_queries": [],
"top_result_similarities": [],
}
def _extract_topic(query: str, domain_keywords: tuple) -> str:
"""Extract topic phrase by removing domain keyword from query."""
for kw in domain_keywords:
if kw in query:
# Return query with the matched keyword stripped of stop words
return query.strip()
return query.strip()