Spaces:

debashis2007
/

ask-the-web-agent

Sleeping

App Files Files Community

ask-the-web-agent / src /feedback /gaps.py

debashis2007

Upload folder using huggingface_hub

75bea1c verified 2 months ago

raw

history blame contribute delete

8.21 kB

	from __future__ import annotations
	"""Gap identification for incomplete answers."""

	from dataclasses import dataclass
	from typing import Any


	@dataclass
	class InformationGap:
	"""An identified information gap."""

	description: str
	gap_type: str # "missing_fact", "unclear", "unverified", "outdated"
	severity: str # "low", "medium", "high"
	suggested_search: str \| None = None


	class GapIdentifier:
	"""Identifies gaps in responses that need additional research."""

	def __init__(self):
	"""Initialize the gap identifier."""
	pass

	def identify_gaps(
	self,
	query: str,
	answer: str,
	sources: list[dict[str, str]] \| None = None,
	) -> list[InformationGap]:
	"""Identify information gaps in an answer.

	Args:
	query: Original user query
	answer: Generated answer
	sources: List of sources used

	Returns:
	List of identified gaps
	"""
	gaps = []

	# Check for question words not addressed
	question_gaps = self._check_question_coverage(query, answer)
	gaps.extend(question_gaps)

	# Check for unsourced claims
	unsourced_gaps = self._check_unsourced_claims(answer, sources)
	gaps.extend(unsourced_gaps)

	# Check for hedging language (uncertainty)
	uncertainty_gaps = self._check_uncertainty(answer)
	gaps.extend(uncertainty_gaps)

	# Check for time-sensitive information
	temporal_gaps = self._check_temporal_issues(query, answer)
	gaps.extend(temporal_gaps)

	return gaps

	def get_refinement_suggestions(
	self,
	gaps: list[InformationGap],
	) -> list[str]:
	"""Get search suggestions to fill gaps.

	Args:
	gaps: List of identified gaps

	Returns:
	List of suggested search queries
	"""
	suggestions = []

	for gap in gaps:
	if gap.suggested_search:
	suggestions.append(gap.suggested_search)

	return list(set(suggestions)) # Deduplicate

	def prioritize_gaps(
	self,
	gaps: list[InformationGap],
	) -> list[InformationGap]:
	"""Prioritize gaps by severity.

	Args:
	gaps: List of gaps to prioritize

	Returns:
	Sorted list of gaps (highest severity first)
	"""
	severity_order = {"high": 0, "medium": 1, "low": 2}
	return sorted(
	gaps,
	key=lambda g: severity_order.get(g.severity, 3),
	)

	def _check_question_coverage(
	self,
	query: str,
	answer: str,
	) -> list[InformationGap]:
	"""Check if question elements are addressed.

	Args:
	query: User query
	answer: Generated answer

	Returns:
	List of gaps for unaddressed question elements
	"""
	gaps = []
	query_lower = query.lower()
	answer_lower = answer.lower()

	# Check for common question patterns
	question_patterns = {
	"why": ("reason", "because", "since", "due to"),
	"how": ("method", "process", "step", "by", "through"),
	"when": ("date", "time", "year", "month", "day"),
	"where": ("location", "place", "in", "at"),
	"who": ("person", "people", "company", "organization"),
	"what": ("definition", "is", "are", "means"),
	}

	for question_word, answer_indicators in question_patterns.items():
	if question_word in query_lower:
	# Check if any indicators are in answer
	if not any(ind in answer_lower for ind in answer_indicators):
	gaps.append(InformationGap(
	description=f"Question asks '{question_word}' but answer may not fully address it",
	gap_type="missing_fact",
	severity="medium",
	suggested_search=f"{query} {question_word}",
	))

	return gaps

	def _check_unsourced_claims(
	self,
	answer: str,
	sources: list[dict[str, str]] \| None,
	) -> list[InformationGap]:
	"""Check for claims without source support.

	Args:
	answer: Generated answer
	sources: List of sources

	Returns:
	List of gaps for unsourced claims
	"""
	gaps = []

	# If no sources at all
	if not sources:
	gaps.append(InformationGap(
	description="No sources provided to support claims",
	gap_type="unverified",
	severity="high",
	suggested_search=None,
	))
	return gaps

	# Check for statistical claims without citation
	statistical_patterns = [
	"percent", "%", "million", "billion", "number of",
	"majority", "most", "few", "many", "study shows",
	]

	for pattern in statistical_patterns:
	if pattern in answer.lower():
	# Check if claim appears near a citation marker
	# (simplified check)
	if "[" not in answer and not any(
	s.get("snippet", "") in answer for s in sources
	):
	gaps.append(InformationGap(
	description=f"Statistical claim ({pattern}) may need verification",
	gap_type="unverified",
	severity="medium",
	suggested_search=None,
	))
	break

	return gaps

	def _check_uncertainty(self, answer: str) -> list[InformationGap]:
	"""Check for uncertainty language.

	Args:
	answer: Generated answer

	Returns:
	List of gaps for uncertain statements
	"""
	gaps = []
	answer_lower = answer.lower()

	uncertainty_phrases = [
	("i'm not sure", "high"),
	("unclear", "medium"),
	("might be", "low"),
	("could be", "low"),
	("possibly", "low"),
	("it appears", "low"),
	("seems to be", "low"),
	("no clear answer", "high"),
	("insufficient information", "high"),
	]

	for phrase, severity in uncertainty_phrases:
	if phrase in answer_lower:
	gaps.append(InformationGap(
	description=f"Answer contains uncertainty: '{phrase}'",
	gap_type="unclear",
	severity=severity,
	suggested_search=None,
	))

	return gaps

	def _check_temporal_issues(
	self,
	query: str,
	answer: str,
	) -> list[InformationGap]:
	"""Check for time-sensitive information issues.

	Args:
	query: User query
	answer: Generated answer

	Returns:
	List of gaps for temporal issues
	"""
	gaps = []
	query_lower = query.lower()

	# Check if query asks about current/latest information
	temporal_indicators = [
	"current", "latest", "now", "today", "recent",
	"this year", "2024", "2025", "updated",
	]

	is_temporal_query = any(ind in query_lower for ind in temporal_indicators)

	if is_temporal_query:
	# Check if answer mentions dates
	import re
	date_pattern = r'\b(20\d{2}\|19\d{2}\|january\|february\|march\|april\|may\|june\|july\|august\|september\|october\|november\|december)\b'
	has_date = bool(re.search(date_pattern, answer.lower()))

	if not has_date:
	gaps.append(InformationGap(
	description="Query asks for current information but answer may be outdated",
	gap_type="outdated",
	severity="high",
	suggested_search=f"{query} latest",
	))

	return gaps