from transformers import pipeline from typing import List, Dict import re class AdvancedDecisionEngine: def __init__(self): self.qa_pipeline = pipeline( "question-answering", model="deepset/roberta-base-squad2", tokenizer="deepset/roberta-base-squad2" ) def make_decision(self, question: str, retrieved_chunks: List[Dict]) -> Dict: """ Given a question and retrieved chunks, produce structured decision JSON: { "answer": str, "supporting_clauses": [ { "text": str, "page": int, "score": float } ], "rationale": str, "confidence": float } """ # Step 1: Select top context best_context = self._select_best_context(question, retrieved_chunks) # Step 2: Get answer using QA model answer = self._answer_with_transformer(question, best_context) # Step 3: Build rationale and evidence supporting_clauses = [ { "text": chunk["text"], "page": chunk.get("page"), "score": chunk.get("score", 0) } for chunk in sorted(retrieved_chunks, key=lambda x: x["score"], reverse=True)[:3] ] rationale = self._build_rationale(question, answer, supporting_clauses) confidence = self._calculate_confidence(answer, best_context) return { "answer": answer, "supporting_clauses": supporting_clauses, "rationale": rationale, "confidence": confidence } def _select_best_context(self, question: str, chunks: List[Dict]) -> str: """Select and combine top chunks by relevance score.""" sorted_chunks = sorted(chunks, key=lambda x: x.get("score", 0), reverse=True) context = "" token_count = 0 for chunk in sorted_chunks[:5]: chunk_tokens = len(chunk["text"].split()) if token_count + chunk_tokens < 2000: context += chunk["text"] + "\n\n" token_count += chunk_tokens else: break return context.strip() def _answer_with_transformer(self, question: str, context: str) -> str: """Answer using transformer QA model.""" if not context: return "No relevant information found in the document." try: result = self.qa_pipeline(question=question, context=context) if result["score"] < 0.1: return "The information is not clearly available in the document." return result["answer"] except Exception as e: return f"Error processing question: {str(e)}" def _build_rationale(self, question: str, answer: str, supporting_clauses: List[Dict]) -> str: """Generate a short reasoning explanation.""" clause_refs = [f"page {c['page']}" for c in supporting_clauses if c.get("page") is not None] refs_str = ", ".join(clause_refs) if clause_refs else "the retrieved document clauses" return f"The answer '{answer}' was derived from {refs_str}, which matched the question '{question}' in meaning." def _calculate_confidence(self, answer: str, context: str) -> float: """Estimate confidence score.""" if "not available" in answer.lower() or "not found" in answer.lower(): return 0.2 if re.search(r'\d+', answer): return 0.85 return 0.65