Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| """Gap identification for incomplete answers.""" | |
| from dataclasses import dataclass | |
| from typing import Any | |
| class InformationGap: | |
| """An identified information gap.""" | |
| description: str | |
| gap_type: str # "missing_fact", "unclear", "unverified", "outdated" | |
| severity: str # "low", "medium", "high" | |
| suggested_search: str | None = None | |
| class GapIdentifier: | |
| """Identifies gaps in responses that need additional research.""" | |
| def __init__(self): | |
| """Initialize the gap identifier.""" | |
| pass | |
| def identify_gaps( | |
| self, | |
| query: str, | |
| answer: str, | |
| sources: list[dict[str, str]] | None = None, | |
| ) -> list[InformationGap]: | |
| """Identify information gaps in an answer. | |
| Args: | |
| query: Original user query | |
| answer: Generated answer | |
| sources: List of sources used | |
| Returns: | |
| List of identified gaps | |
| """ | |
| gaps = [] | |
| # Check for question words not addressed | |
| question_gaps = self._check_question_coverage(query, answer) | |
| gaps.extend(question_gaps) | |
| # Check for unsourced claims | |
| unsourced_gaps = self._check_unsourced_claims(answer, sources) | |
| gaps.extend(unsourced_gaps) | |
| # Check for hedging language (uncertainty) | |
| uncertainty_gaps = self._check_uncertainty(answer) | |
| gaps.extend(uncertainty_gaps) | |
| # Check for time-sensitive information | |
| temporal_gaps = self._check_temporal_issues(query, answer) | |
| gaps.extend(temporal_gaps) | |
| return gaps | |
| def get_refinement_suggestions( | |
| self, | |
| gaps: list[InformationGap], | |
| ) -> list[str]: | |
| """Get search suggestions to fill gaps. | |
| Args: | |
| gaps: List of identified gaps | |
| Returns: | |
| List of suggested search queries | |
| """ | |
| suggestions = [] | |
| for gap in gaps: | |
| if gap.suggested_search: | |
| suggestions.append(gap.suggested_search) | |
| return list(set(suggestions)) # Deduplicate | |
| def prioritize_gaps( | |
| self, | |
| gaps: list[InformationGap], | |
| ) -> list[InformationGap]: | |
| """Prioritize gaps by severity. | |
| Args: | |
| gaps: List of gaps to prioritize | |
| Returns: | |
| Sorted list of gaps (highest severity first) | |
| """ | |
| severity_order = {"high": 0, "medium": 1, "low": 2} | |
| return sorted( | |
| gaps, | |
| key=lambda g: severity_order.get(g.severity, 3), | |
| ) | |
| def _check_question_coverage( | |
| self, | |
| query: str, | |
| answer: str, | |
| ) -> list[InformationGap]: | |
| """Check if question elements are addressed. | |
| Args: | |
| query: User query | |
| answer: Generated answer | |
| Returns: | |
| List of gaps for unaddressed question elements | |
| """ | |
| gaps = [] | |
| query_lower = query.lower() | |
| answer_lower = answer.lower() | |
| # Check for common question patterns | |
| question_patterns = { | |
| "why": ("reason", "because", "since", "due to"), | |
| "how": ("method", "process", "step", "by", "through"), | |
| "when": ("date", "time", "year", "month", "day"), | |
| "where": ("location", "place", "in", "at"), | |
| "who": ("person", "people", "company", "organization"), | |
| "what": ("definition", "is", "are", "means"), | |
| } | |
| for question_word, answer_indicators in question_patterns.items(): | |
| if question_word in query_lower: | |
| # Check if any indicators are in answer | |
| if not any(ind in answer_lower for ind in answer_indicators): | |
| gaps.append(InformationGap( | |
| description=f"Question asks '{question_word}' but answer may not fully address it", | |
| gap_type="missing_fact", | |
| severity="medium", | |
| suggested_search=f"{query} {question_word}", | |
| )) | |
| return gaps | |
| def _check_unsourced_claims( | |
| self, | |
| answer: str, | |
| sources: list[dict[str, str]] | None, | |
| ) -> list[InformationGap]: | |
| """Check for claims without source support. | |
| Args: | |
| answer: Generated answer | |
| sources: List of sources | |
| Returns: | |
| List of gaps for unsourced claims | |
| """ | |
| gaps = [] | |
| # If no sources at all | |
| if not sources: | |
| gaps.append(InformationGap( | |
| description="No sources provided to support claims", | |
| gap_type="unverified", | |
| severity="high", | |
| suggested_search=None, | |
| )) | |
| return gaps | |
| # Check for statistical claims without citation | |
| statistical_patterns = [ | |
| "percent", "%", "million", "billion", "number of", | |
| "majority", "most", "few", "many", "study shows", | |
| ] | |
| for pattern in statistical_patterns: | |
| if pattern in answer.lower(): | |
| # Check if claim appears near a citation marker | |
| # (simplified check) | |
| if "[" not in answer and not any( | |
| s.get("snippet", "") in answer for s in sources | |
| ): | |
| gaps.append(InformationGap( | |
| description=f"Statistical claim ({pattern}) may need verification", | |
| gap_type="unverified", | |
| severity="medium", | |
| suggested_search=None, | |
| )) | |
| break | |
| return gaps | |
| def _check_uncertainty(self, answer: str) -> list[InformationGap]: | |
| """Check for uncertainty language. | |
| Args: | |
| answer: Generated answer | |
| Returns: | |
| List of gaps for uncertain statements | |
| """ | |
| gaps = [] | |
| answer_lower = answer.lower() | |
| uncertainty_phrases = [ | |
| ("i'm not sure", "high"), | |
| ("unclear", "medium"), | |
| ("might be", "low"), | |
| ("could be", "low"), | |
| ("possibly", "low"), | |
| ("it appears", "low"), | |
| ("seems to be", "low"), | |
| ("no clear answer", "high"), | |
| ("insufficient information", "high"), | |
| ] | |
| for phrase, severity in uncertainty_phrases: | |
| if phrase in answer_lower: | |
| gaps.append(InformationGap( | |
| description=f"Answer contains uncertainty: '{phrase}'", | |
| gap_type="unclear", | |
| severity=severity, | |
| suggested_search=None, | |
| )) | |
| return gaps | |
| def _check_temporal_issues( | |
| self, | |
| query: str, | |
| answer: str, | |
| ) -> list[InformationGap]: | |
| """Check for time-sensitive information issues. | |
| Args: | |
| query: User query | |
| answer: Generated answer | |
| Returns: | |
| List of gaps for temporal issues | |
| """ | |
| gaps = [] | |
| query_lower = query.lower() | |
| # Check if query asks about current/latest information | |
| temporal_indicators = [ | |
| "current", "latest", "now", "today", "recent", | |
| "this year", "2024", "2025", "updated", | |
| ] | |
| is_temporal_query = any(ind in query_lower for ind in temporal_indicators) | |
| if is_temporal_query: | |
| # Check if answer mentions dates | |
| import re | |
| date_pattern = r'\b(20\d{2}|19\d{2}|january|february|march|april|may|june|july|august|september|october|november|december)\b' | |
| has_date = bool(re.search(date_pattern, answer.lower())) | |
| if not has_date: | |
| gaps.append(InformationGap( | |
| description="Query asks for current information but answer may be outdated", | |
| gap_type="outdated", | |
| severity="high", | |
| suggested_search=f"{query} latest", | |
| )) | |
| return gaps | |