""" Coverage Analyzer Module Analyzes how well user explanation covers canonical concept graph """ from typing import List, Dict import numpy as np from sklearn.metrics.pairwise import cosine_similarity class CoverageAnalyzer: def __init__(self): self._ready = True def is_ready(self) -> bool: return self._ready async def analyze_coverage( self, user_claims: List[Dict], canonical_graph: Dict, explanation: str ) -> Dict: """ Analyze concept coverage by matching user claims to graph nodes Returns: { 'coverage_score': float (0-100), 'node_coverage': Dict[node_id, status], 'missing_concepts': List[Dict], 'weak_links': List[Dict], 'name_dropping': List[str] } """ # Extract embeddings from user claims claim_embeddings = [claim['embedding'] for claim in user_claims] claim_texts = [claim['text'] for claim in user_claims] # Analyze coverage for each node node_coverage = {} missing_concepts = [] weak_links = [] for node in canonical_graph['nodes']: node_id = node['id'] node_label = node['label'] # Check if concept is mentioned coverage_status = self._check_node_coverage( node_label=node_label, claim_texts=claim_texts, claim_embeddings=claim_embeddings, explanation=explanation ) node_coverage[node_id] = coverage_status if coverage_status['status'] == 'missing': severity = 'high' if node.get('type') == 'prerequisite' else 'medium' missing_concepts.append({ 'concept': node_label, 'severity': severity, 'description': f"This is a key {'prerequisite' if node.get('type') == 'prerequisite' else 'component'} for understanding the concept." }) elif coverage_status['status'] == 'weak': weak_links.append({ 'concept': node_label, 'user_quote': coverage_status.get('user_quote', ''), 'suggestion': 'Explain the mechanism or relationship, not just mention the term.' }) # Calculate coverage score coverage_score = self._calculate_coverage_score(node_coverage, canonical_graph) # Detect name-dropping (mentioned but not explained) name_dropping = self._detect_name_dropping(claim_texts, node_coverage) return { 'coverage_score': coverage_score, 'node_coverage': node_coverage, 'missing_concepts': missing_concepts, 'weak_links': weak_links, 'name_dropping': name_dropping } def _check_node_coverage( self, node_label: str, claim_texts: List[str], claim_embeddings: List[List[float]], explanation: str ) -> Dict: """Check if and how well a concept node is covered""" # Started with just keyword matching, works surprisingly well # might add semantic similarity later if needed node_lower = node_label.lower() explanation_lower = explanation.lower() # Check if mentioned at all if node_lower not in explanation_lower: return { 'status': 'missing', 'user_quote': None, 'coverage_strength': 0.0 } # Find best matching claim via semantic similarity # (In full implementation, would use actual embeddings of node_label) best_match_idx = None best_score = 0.0 for idx, claim_text in enumerate(claim_texts): if node_lower in claim_text.lower(): # Simple heuristic: longer explanation = better coverage coverage_strength = min(1.0, len(claim_text.split()) / 15.0) if coverage_strength > best_score: best_score = coverage_strength best_match_idx = idx if best_match_idx is not None: user_quote = claim_texts[best_match_idx] # Determine status based on coverage strength if best_score > 0.6: status = 'covered' elif best_score > 0.2: status = 'weak' else: status = 'missing' return { 'status': status, 'user_quote': user_quote, 'coverage_strength': best_score } # Mentioned but not in any claim (name-dropping) return { 'status': 'weak', 'user_quote': None, 'coverage_strength': 0.1 } def _calculate_coverage_score(self, node_coverage: Dict, canonical_graph: Dict) -> float: """Calculate overall coverage score""" if not node_coverage: return 0.0 # Weight by node importance total_weight = 0.0 covered_weight = 0.0 for node in canonical_graph['nodes']: node_id = node['id'] # Prerequisites are more important weight = 2.0 if node.get('type') == 'prerequisite' else 1.0 total_weight += weight coverage = node_coverage.get(node_id, {}) status = coverage.get('status', 'missing') if status == 'covered': covered_weight += weight elif status == 'weak': covered_weight += weight * 0.4 return (covered_weight / total_weight * 100) if total_weight > 0 else 0.0 def _detect_name_dropping(self, claim_texts: List[str], node_coverage: Dict) -> List[str]: """Detect concepts that are mentioned but not explained""" name_dropped = [] for node_id, coverage in node_coverage.items(): if coverage.get('coverage_strength', 0) < 0.3 and coverage.get('user_quote'): # Mentioned but weakly explained if coverage.get('user_quote'): name_dropped.append(coverage['user_quote']) return name_dropped[:3] # Limit to top 3