""" Chain-of-thought reasoning for RAG synthesis. Performs explicit reasoning over retrieved evidence. """ from dataclasses import dataclass from typing import List, Dict, Any, Optional @dataclass class ReasoningResult: """Result of reasoning over evidence.""" answer: str reasoning_steps: List[str] evidence_used: List[str] confidence: float reasoning_type: str # Prompts for different reasoning types SYNTHESIS_PROMPT = """Based on the evidence below, answer the query. Show your reasoning step by step, then provide the final answer. Query: {query} Evidence: {evidence} First, analyze each piece of evidence and its relevance. Then, synthesize the information to form a complete answer. Finally, provide your answer with citations [ID:chunk_id]. Reasoning and Answer:""" COMPARATIVE_PROMPT = """Compare the following based on the evidence provided. Query: {query} Evidence: {evidence} Structure your response as: 1. Key aspects of the first subject 2. Key aspects of the second subject 3. Similarities 4. Differences 5. Conclusion Include citations [ID:chunk_id] for each claim. Comparison:""" ANALYTICAL_PROMPT = """Analyze and explain based on the evidence provided. Query: {query} Evidence: {evidence} Structure your response as: 1. Identify the main factors/causes 2. Explain the relationships between them 3. Draw conclusions 4. Note any limitations in the available evidence Include citations [ID:chunk_id] for each claim. Analysis:""" def _format_evidence(chunks: List[Dict[str, Any]]) -> str: """Format chunks as numbered evidence.""" evidence_parts = [] for i, chunk in enumerate(chunks, 1): chunk_id = chunk.get("id", f"chunk_{i}") text = chunk.get("text", "")[:800] # Limit length evidence_parts.append(f"[{chunk_id}]\n{text}") return "\n\n".join(evidence_parts) def _extract_reasoning_steps(text: str) -> List[str]: """Extract reasoning steps from LLM response.""" steps = [] # Look for numbered steps import re numbered = re.findall(r'\d+\.\s*([^\n]+)', text) if numbered: steps.extend(numbered) # Look for bullet points bullets = re.findall(r'[-•]\s*([^\n]+)', text) if bullets: steps.extend(bullets) # If no structure found, split by sentences if not steps: sentences = re.split(r'(?<=[.!?])\s+', text) steps = [s.strip() for s in sentences[:5] if len(s) > 20] return steps def _extract_evidence_ids(text: str) -> List[str]: """Extract cited evidence IDs from response.""" import re # Match [ID:...] or ID:... ids = re.findall(r'\[?ID:([A-Za-z0-9_\-:.]+)\]?', text) return list(set(ids)) def reason_over_evidence( query: str, chunks: List[Dict[str, Any]], query_type: str = "factual", use_chain_of_thought: bool = True ) -> ReasoningResult: """ Apply reasoning over retrieved evidence. Args: query: User query chunks: Retrieved and shaped chunks query_type: Type of query for prompt selection use_chain_of_thought: Whether to request explicit reasoning Returns: ReasoningResult with answer and reasoning chain """ if not chunks: return ReasoningResult( answer="I don't have enough information to answer this question.", reasoning_steps=["No relevant evidence found"], evidence_used=[], confidence=0.0, reasoning_type="no_evidence" ) try: from src.llm_providers import call_llm except ImportError: return ReasoningResult( answer="LLM not available for reasoning.", reasoning_steps=[], evidence_used=[], confidence=0.0, reasoning_type="error" ) # Format evidence evidence = _format_evidence(chunks) # Select prompt based on query type if query_type == "comparative": prompt = COMPARATIVE_PROMPT.format(query=query, evidence=evidence) reasoning_type = "comparative" elif query_type == "analytical": prompt = ANALYTICAL_PROMPT.format(query=query, evidence=evidence) reasoning_type = "analytical" else: prompt = SYNTHESIS_PROMPT.format(query=query, evidence=evidence) reasoning_type = "synthesis" try: response = call_llm(prompt=prompt, temperature=0.0, max_tokens=800) text = response.get("text", "").strip() # Extract components reasoning_steps = _extract_reasoning_steps(text) evidence_ids = _extract_evidence_ids(text) # Estimate confidence based on evidence usage confidence = min(0.9, 0.3 + 0.1 * len(evidence_ids)) return ReasoningResult( answer=text, reasoning_steps=reasoning_steps, evidence_used=evidence_ids, confidence=confidence, reasoning_type=reasoning_type ) except Exception as e: return ReasoningResult( answer=f"Error during reasoning: {str(e)[:100]}", reasoning_steps=[], evidence_used=[], confidence=0.0, reasoning_type="error" ) def iterative_retrieve_and_reason( query: str, initial_chunks: List[Dict[str, Any]], retrieve_fn, max_iterations: int = 2 ) -> ReasoningResult: """ Iteratively retrieve more evidence based on reasoning. Args: query: Original query initial_chunks: First retrieval results retrieve_fn: Function to retrieve more chunks (takes query, returns chunks) max_iterations: Maximum retrieval iterations Returns: ReasoningResult after iterative refinement """ all_chunks = list(initial_chunks) chunk_ids = {c.get("id") for c in all_chunks} try: from src.llm_providers import call_llm except ImportError: return reason_over_evidence(query, all_chunks) for i in range(max_iterations): # Check if we need more information evidence = _format_evidence(all_chunks) check_prompt = f"""Given this query and evidence, do we need more information? If yes, suggest a follow-up search query. If no, respond with "SUFFICIENT". Query: {query} Current evidence: {evidence[:2000]} Response (either "SUFFICIENT" or a follow-up search query):""" response = call_llm(prompt=check_prompt, temperature=0.0, max_tokens=100) text = response.get("text", "").strip() if "SUFFICIENT" in text.upper(): break # Retrieve more based on suggested query follow_up = text.replace("Follow-up query:", "").strip() if follow_up and len(follow_up) > 5: try: new_chunks = retrieve_fn(follow_up) for chunk in new_chunks: if chunk.get("id") not in chunk_ids: all_chunks.append(chunk) chunk_ids.add(chunk.get("id")) except Exception: break return reason_over_evidence(query, all_chunks)