Spaces:
Running
Running
| """ | |
| Knowledge Universe — Coverage Confidence Score (Blend Mode — John's Optimization) | |
| ================================================================================== | |
| JOHN'S OPTIMIZATION: | |
| Added compute_from_embeddings() — accepts pre-computed embeddings from the | |
| reranker so we never call model.encode() twice in the same request. | |
| Before: 2x model.encode() = ~800ms overhead | |
| After: 1x model.encode() + cosine similarity = ~10ms overhead | |
| The API is unchanged. compute() still works for standalone use. | |
| compute_from_embeddings() is the fast path used by request_handler. | |
| """ | |
| import logging | |
| from typing import List, Dict, Any, Optional | |
| logger = logging.getLogger(__name__) | |
| _HIGH_CONFIDENCE = 0.65 | |
| _MEDIUM_CONFIDENCE = 0.45 | |
| _ACRONYM_EXPANSIONS = { | |
| "rlhf": "reinforcement learning from human feedback", | |
| "llm": "large language model", | |
| "rag": "retrieval augmented generation", | |
| "nlp": "natural language processing", | |
| "gnn": "graph neural network", | |
| "cnn": "convolutional neural network", | |
| "rnn": "recurrent neural network", | |
| "lstm": "long short-term memory", | |
| "bert": "bidirectional encoder representations transformers", | |
| "gpt": "generative pre-trained transformer", | |
| "vae": "variational autoencoder", | |
| "gan": "generative adversarial network", | |
| "moe": "mixture of experts", | |
| "peft": "parameter efficient fine tuning", | |
| "lora": "low rank adaptation", | |
| "dpo": "direct preference optimization", | |
| "sft": "supervised fine tuning", | |
| "mlops": "machine learning operations", | |
| } | |
| _QUERY_REFINEMENTS = { | |
| "transformer architecture": [ | |
| "attention mechanism self-attention explained", | |
| "transformer encoder decoder tutorial", | |
| "attention is all you need paper explained", | |
| ], | |
| "machine learning": [ | |
| "machine learning beginner tutorial python", | |
| "supervised learning algorithms explained", | |
| "machine learning fundamentals course", | |
| ], | |
| "deep learning": [ | |
| "neural network backpropagation explained", | |
| "deep learning pytorch tutorial beginner", | |
| "convolutional neural network image classification", | |
| ], | |
| "rlhf": [ | |
| "reinforcement learning from human feedback explained", | |
| "reward model training language model", | |
| "RLHF PPO implementation tutorial", | |
| ], | |
| "mixture of experts": [ | |
| "sparse mixture of experts architecture explained", | |
| "MoE gating network routing mechanism", | |
| "mixture of experts transformer tutorial", | |
| ], | |
| } | |
| DOMAIN_QUERY_PATTERNS = { | |
| # Physics/quantum — suggest arXiv-specific terms | |
| ("quantum", "entanglement", "qubit", "hamiltonian", "decoherence"): [ | |
| "{topic} arxiv review", | |
| "{topic} lecture notes pdf", | |
| "{topic} error analysis", | |
| ], | |
| # ML frameworks — suggest code and docs | |
| ("langchain", "llamaindex", "langgraph", "haystack", "dspy"): [ | |
| "{topic} github example", | |
| "{topic} documentation tutorial", | |
| "{topic} cookbook python", | |
| ], | |
| # RAG/retrieval — suggest implementation | |
| ("rag", "retrieval", "vector", "embedding", "chunking"): [ | |
| "{topic} implementation guide", | |
| "{topic} evaluation metrics", | |
| "production {topic} architecture", | |
| ], | |
| # Finance/trading | |
| ("fintech", "trading", "portfolio", "risk", "quant"): [ | |
| "{topic} research paper 2025", | |
| "{topic} python implementation", | |
| "{topic} backtesting guide", | |
| ], | |
| # Deep learning techniques | |
| ("transformer", "attention", "bert", "gpt", "llm", "fine-tuning"): [ | |
| "{topic} paper explained", | |
| "{topic} implementation pytorch", | |
| "{topic} benchmark comparison", | |
| ], | |
| # MLOps/deployment | |
| ("mlops", "deployment", "inference", "serving", "vllm"): [ | |
| "{topic} production guide", | |
| "{topic} kubernetes setup", | |
| "{topic} optimization tutorial", | |
| ], | |
| # Materials science / physics | |
| ("superconductor", "lk-99", "quantum", "material", "semiconductor", | |
| "perovskite", "graphene", "topological"): [ | |
| "{topic} arxiv preprint 2024", | |
| "{topic} experimental results review", | |
| "{topic} physics mechanism explained", | |
| ], | |
| # Neuroscience / biology | |
| ("neural", "neuron", "brain", "cortex", "synapse"): [ | |
| "{topic} research paper", | |
| "{topic} computational model", | |
| "{topic} biological mechanism", | |
| ], | |
| } | |
| class CoverageConfidenceScorer: | |
| """ | |
| Computes how well returned results match the query intent. | |
| Fast path: compute_from_embeddings() — reuses embeddings from reranker. | |
| Slow path: compute() — encodes from scratch (standalone use only). | |
| """ | |
| def __init__(self): | |
| self._model = None | |
| def _get_model(self): | |
| if self._model is None: | |
| try: | |
| from src.integrations.shared_model import get_shared_model | |
| self._model = get_shared_model() | |
| except Exception as e: | |
| logger.error(f"Failed to get shared model: {e}") | |
| return None | |
| return self._model | |
| def compute_from_embeddings( | |
| self, | |
| query: str, | |
| sources: List[Dict[str, Any]], | |
| query_emb, | |
| doc_embs, | |
| top_k: int = 5, | |
| ) -> Dict[str, Any]: | |
| """ | |
| FAST PATH — John's optimization. | |
| Uses pre-computed embeddings from the reranker. | |
| Zero additional model.encode() calls. | |
| Cost: ~5ms (just cosine similarity computation). | |
| Args: | |
| query: Original query string | |
| sources: Result dicts (same order as doc_embs) | |
| query_emb: Tensor from reranker's model.encode(query) | |
| doc_embs: Tensor from reranker's model.encode(texts) | |
| top_k: How many results to score | |
| """ | |
| if query_emb is None or doc_embs is None or not sources: | |
| return self._no_results_response(query) if not sources else self._unavailable_response() | |
| try: | |
| from sentence_transformers import util | |
| top_sources = sources[:top_k] | |
| # doc_embs contains all documents — slice to top_k | |
| top_embs = doc_embs[:top_k] | |
| sims = util.cos_sim(query_emb, top_embs)[0] | |
| sim_scores = [max(0.0, float(s)) for s in sims] | |
| # Weighted average — top results count more | |
| weights = [1.0 / (i + 1) for i in range(len(sim_scores))] | |
| total_w = sum(weights) | |
| confidence = sum(s * w for s, w in zip(sim_scores, weights)) / total_w | |
| confidence = round(confidence, 3) | |
| per_result = [ | |
| { | |
| "title": top_sources[i].get("title", "")[:60], | |
| "similarity": round(sim_scores[i], 3), | |
| } | |
| for i in range(len(sim_scores)) | |
| ] | |
| return self._build_response(query, confidence, per_result) | |
| except Exception as e: | |
| logger.error(f"Fast confidence scoring failed: {e}") | |
| return self._unavailable_response() | |
| def compute( | |
| self, | |
| query: str, | |
| sources: List[Dict[str, Any]], | |
| top_k: int = 5, | |
| ) -> Dict[str, Any]: | |
| """ | |
| SLOW PATH — standalone use only. | |
| Encodes from scratch. Use compute_from_embeddings() in production. | |
| """ | |
| if not sources: | |
| return self._no_results_response(query) | |
| model = self._get_model() | |
| if model is None: | |
| return self._unavailable_response() | |
| try: | |
| from sentence_transformers import util | |
| query_emb = model.encode(query, convert_to_tensor=True) | |
| top_sources = sources[:top_k] | |
| texts = [ | |
| f"{s.get('title', '')} {(s.get('summary') or '')[:200]}" | |
| for s in top_sources | |
| ] | |
| doc_embs = model.encode(texts, convert_to_tensor=True) | |
| return self.compute_from_embeddings( | |
| query, top_sources, query_emb, doc_embs, top_k | |
| ) | |
| except Exception as e: | |
| logger.error(f"Confidence scoring failed: {e}") | |
| return self._unavailable_response() | |
| def _build_response( | |
| self, | |
| query: str, | |
| confidence: float, | |
| per_result: List[Dict], | |
| ) -> Dict[str, Any]: | |
| # Always generate suggestions — useful at all confidence levels | |
| all_suggestions = self._suggest_queries(query) | |
| if confidence >= _HIGH_CONFIDENCE: | |
| label = "high" | |
| warning = False | |
| message = None | |
| # Still provide 2 suggestions for query refinement even at high confidence | |
| suggestions = all_suggestions[:2] | |
| elif confidence >= _MEDIUM_CONFIDENCE: | |
| label = "medium" | |
| warning = False | |
| message = ( | |
| "Results partially match your query. " | |
| "Consider refining with more specific terms." | |
| ) | |
| suggestions = all_suggestions[:3] | |
| else: | |
| label = "low" | |
| warning = True | |
| message = ( | |
| f"Low confidence ({confidence:.2f}) — results may not fully " | |
| "match your query intent. Try the suggested queries below." | |
| ) | |
| suggestions = all_suggestions[:3] | |
| return { | |
| "confidence": confidence, | |
| "confidence_label": label, | |
| "coverage_warning": warning, | |
| "warning_message": message, | |
| "suggested_queries": suggestions, | |
| "top_result_similarities": per_result, | |
| } | |
| # ============================================================ | |
| # FEATURE 4: Domain-aware query refinement | |
| # src/scoring/coverage_confidence.py | |
| # Replace _suggest_queries() and add DOMAIN_QUERY_PATTERNS | |
| # ============================================================ | |
| def _suggest_queries(self, query: str) -> List[str]: | |
| """ | |
| Domain-aware query refinement instead of generic 'explained/tutorial python'. | |
| Matches domain patterns first, falls back to acronym expansion, then generic. | |
| """ | |
| suggestions = [] | |
| query_lower = query.lower().strip() | |
| # 1. Explicit query refinements (highest priority) | |
| for pattern, refinements in _QUERY_REFINEMENTS.items(): | |
| if pattern in query_lower: | |
| suggestions.extend(refinements) | |
| break | |
| # 2. Domain-aware patterns | |
| if not suggestions: | |
| topic_words = set(query_lower.split()) | |
| for domain_keywords, patterns in DOMAIN_QUERY_PATTERNS.items(): | |
| if any(kw in query_lower for kw in domain_keywords): | |
| # Extract the most meaningful topic phrase for {topic} substitution | |
| topic = _extract_topic(query_lower, domain_keywords) | |
| for pattern in patterns: | |
| filled = pattern.format(topic=topic) | |
| if filled not in suggestions: | |
| suggestions.append(filled) | |
| break | |
| # 3. Acronym expansion | |
| words = query_lower.split() | |
| for word in words: | |
| if word in _ACRONYM_EXPANSIONS: | |
| expanded = _ACRONYM_EXPANSIONS[word] | |
| expanded_query = query_lower.replace(word, expanded) | |
| if expanded_query not in suggestions: | |
| suggestions.append(expanded_query) | |
| # 4. Generic fallback only if nothing matched | |
| if not suggestions: | |
| base = query_lower | |
| suggestions = [ | |
| f"{base} tutorial 2025", | |
| f"{base} implementation guide", | |
| f"introduction to {base}", | |
| ] | |
| seen = {query_lower} | |
| unique = [] | |
| for s in suggestions: | |
| if s not in seen: | |
| seen.add(s) | |
| unique.append(s) | |
| return unique[:3] | |
| def _no_results_response(self, query: str) -> Dict[str, Any]: | |
| return { | |
| "confidence": 0.0, | |
| "confidence_label": "none", | |
| "coverage_warning": True, | |
| "warning_message": "No results returned for this query.", | |
| "suggested_queries": self._suggest_queries(query), | |
| "top_result_similarities": [], | |
| } | |
| def _unavailable_response(self) -> Dict[str, Any]: | |
| return { | |
| "confidence": None, | |
| "confidence_label": "unavailable", | |
| "coverage_warning": False, | |
| "warning_message": None, | |
| "suggested_queries": [], | |
| "top_result_similarities": [], | |
| } | |
| def _extract_topic(query: str, domain_keywords: tuple) -> str: | |
| """Extract topic phrase by removing domain keyword from query.""" | |
| for kw in domain_keywords: | |
| if kw in query: | |
| # Return query with the matched keyword stripped of stop words | |
| return query.strip() | |
| return query.strip() |