Spaces:

vn6295337
/

RAG-document-assistant

Sleeping

App Files Files Community

RAG-document-assistant / src /reasoning /chain.py

vn6295337

Initial commit: RAG Document Assistant with Zero-Storage Privacy

f866820 28 days ago

raw

history blame contribute delete

7.1 kB

	"""
	Chain-of-thought reasoning for RAG synthesis.

	Performs explicit reasoning over retrieved evidence.
	"""

	from dataclasses import dataclass
	from typing import List, Dict, Any, Optional


	@dataclass
	class ReasoningResult:
	"""Result of reasoning over evidence."""
	answer: str
	reasoning_steps: List[str]
	evidence_used: List[str]
	confidence: float
	reasoning_type: str


	# Prompts for different reasoning types
	SYNTHESIS_PROMPT = """Based on the evidence below, answer the query.
	Show your reasoning step by step, then provide the final answer.

	Query: {query}

	Evidence:
	{evidence}

	First, analyze each piece of evidence and its relevance.
	Then, synthesize the information to form a complete answer.
	Finally, provide your answer with citations [ID:chunk_id].

	Reasoning and Answer:"""

	COMPARATIVE_PROMPT = """Compare the following based on the evidence provided.

	Query: {query}

	Evidence:
	{evidence}

	Structure your response as:
	1. Key aspects of the first subject
	2. Key aspects of the second subject
	3. Similarities
	4. Differences
	5. Conclusion

	Include citations [ID:chunk_id] for each claim.

	Comparison:"""

	ANALYTICAL_PROMPT = """Analyze and explain based on the evidence provided.

	Query: {query}

	Evidence:
	{evidence}

	Structure your response as:
	1. Identify the main factors/causes
	2. Explain the relationships between them
	3. Draw conclusions
	4. Note any limitations in the available evidence

	Include citations [ID:chunk_id] for each claim.

	Analysis:"""


	def _format_evidence(chunks: List[Dict[str, Any]]) -> str:
	"""Format chunks as numbered evidence."""
	evidence_parts = []
	for i, chunk in enumerate(chunks, 1):
	chunk_id = chunk.get("id", f"chunk_{i}")
	text = chunk.get("text", "")[:800] # Limit length
	evidence_parts.append(f"[{chunk_id}]\n{text}")
	return "\n\n".join(evidence_parts)


	def _extract_reasoning_steps(text: str) -> List[str]:
	"""Extract reasoning steps from LLM response."""
	steps = []

	# Look for numbered steps
	import re
	numbered = re.findall(r'\d+\.\s*([^\n]+)', text)
	if numbered:
	steps.extend(numbered)

	# Look for bullet points
	bullets = re.findall(r'[-•]\s*([^\n]+)', text)
	if bullets:
	steps.extend(bullets)

	# If no structure found, split by sentences
	if not steps:
	sentences = re.split(r'(?<=[.!?])\s+', text)
	steps = [s.strip() for s in sentences[:5] if len(s) > 20]

	return steps


	def _extract_evidence_ids(text: str) -> List[str]:
	"""Extract cited evidence IDs from response."""
	import re
	# Match [ID:...] or ID:...
	ids = re.findall(r'\[?ID:([A-Za-z0-9_\-:.]+)\]?', text)
	return list(set(ids))


	def reason_over_evidence(
	query: str,
	chunks: List[Dict[str, Any]],
	query_type: str = "factual",
	use_chain_of_thought: bool = True
	) -> ReasoningResult:
	"""
	Apply reasoning over retrieved evidence.

	Args:
	query: User query
	chunks: Retrieved and shaped chunks
	query_type: Type of query for prompt selection
	use_chain_of_thought: Whether to request explicit reasoning

	Returns:
	ReasoningResult with answer and reasoning chain
	"""
	if not chunks:
	return ReasoningResult(
	answer="I don't have enough information to answer this question.",
	reasoning_steps=["No relevant evidence found"],
	evidence_used=[],
	confidence=0.0,
	reasoning_type="no_evidence"
	)

	try:
	from src.llm_providers import call_llm
	except ImportError:
	return ReasoningResult(
	answer="LLM not available for reasoning.",
	reasoning_steps=[],
	evidence_used=[],
	confidence=0.0,
	reasoning_type="error"
	)

	# Format evidence
	evidence = _format_evidence(chunks)

	# Select prompt based on query type
	if query_type == "comparative":
	prompt = COMPARATIVE_PROMPT.format(query=query, evidence=evidence)
	reasoning_type = "comparative"
	elif query_type == "analytical":
	prompt = ANALYTICAL_PROMPT.format(query=query, evidence=evidence)
	reasoning_type = "analytical"
	else:
	prompt = SYNTHESIS_PROMPT.format(query=query, evidence=evidence)
	reasoning_type = "synthesis"

	try:
	response = call_llm(prompt=prompt, temperature=0.0, max_tokens=800)
	text = response.get("text", "").strip()

	# Extract components
	reasoning_steps = _extract_reasoning_steps(text)
	evidence_ids = _extract_evidence_ids(text)

	# Estimate confidence based on evidence usage
	confidence = min(0.9, 0.3 + 0.1 * len(evidence_ids))

	return ReasoningResult(
	answer=text,
	reasoning_steps=reasoning_steps,
	evidence_used=evidence_ids,
	confidence=confidence,
	reasoning_type=reasoning_type
	)

	except Exception as e:
	return ReasoningResult(
	answer=f"Error during reasoning: {str(e)[:100]}",
	reasoning_steps=[],
	evidence_used=[],
	confidence=0.0,
	reasoning_type="error"
	)


	def iterative_retrieve_and_reason(
	query: str,
	initial_chunks: List[Dict[str, Any]],
	retrieve_fn,
	max_iterations: int = 2
	) -> ReasoningResult:
	"""
	Iteratively retrieve more evidence based on reasoning.

	Args:
	query: Original query
	initial_chunks: First retrieval results
	retrieve_fn: Function to retrieve more chunks (takes query, returns chunks)
	max_iterations: Maximum retrieval iterations

	Returns:
	ReasoningResult after iterative refinement
	"""
	all_chunks = list(initial_chunks)
	chunk_ids = {c.get("id") for c in all_chunks}

	try:
	from src.llm_providers import call_llm
	except ImportError:
	return reason_over_evidence(query, all_chunks)

	for i in range(max_iterations):
	# Check if we need more information
	evidence = _format_evidence(all_chunks)

	check_prompt = f"""Given this query and evidence, do we need more information?
	If yes, suggest a follow-up search query. If no, respond with "SUFFICIENT".

	Query: {query}

	Current evidence:
	{evidence[:2000]}

	Response (either "SUFFICIENT" or a follow-up search query):"""

	response = call_llm(prompt=check_prompt, temperature=0.0, max_tokens=100)
	text = response.get("text", "").strip()

	if "SUFFICIENT" in text.upper():
	break

	# Retrieve more based on suggested query
	follow_up = text.replace("Follow-up query:", "").strip()
	if follow_up and len(follow_up) > 5:
	try:
	new_chunks = retrieve_fn(follow_up)
	for chunk in new_chunks:
	if chunk.get("id") not in chunk_ids:
	all_chunks.append(chunk)
	chunk_ids.add(chunk.get("id"))
	except Exception:
	break

	return reason_over_evidence(query, all_chunks)