my-fast-api / backend /decision_engine.py
strelizi's picture
updated
3806245
from transformers import pipeline
from typing import List, Dict
import re
class AdvancedDecisionEngine:
def __init__(self):
self.qa_pipeline = pipeline(
"question-answering",
model="deepset/roberta-base-squad2",
tokenizer="deepset/roberta-base-squad2"
)
def make_decision(self, question: str, retrieved_chunks: List[Dict]) -> Dict:
"""
Given a question and retrieved chunks, produce structured decision JSON:
{
"answer": str,
"supporting_clauses": [ { "text": str, "page": int, "score": float } ],
"rationale": str,
"confidence": float
}
"""
# Step 1: Select top context
best_context = self._select_best_context(question, retrieved_chunks)
# Step 2: Get answer using QA model
answer = self._answer_with_transformer(question, best_context)
# Step 3: Build rationale and evidence
supporting_clauses = [
{
"text": chunk["text"],
"page": chunk.get("page"),
"score": chunk.get("score", 0)
}
for chunk in sorted(retrieved_chunks, key=lambda x: x["score"], reverse=True)[:3]
]
rationale = self._build_rationale(question, answer, supporting_clauses)
confidence = self._calculate_confidence(answer, best_context)
return {
"answer": answer,
"supporting_clauses": supporting_clauses,
"rationale": rationale,
"confidence": confidence
}
def _select_best_context(self, question: str, chunks: List[Dict]) -> str:
"""Select and combine top chunks by relevance score."""
sorted_chunks = sorted(chunks, key=lambda x: x.get("score", 0), reverse=True)
context = ""
token_count = 0
for chunk in sorted_chunks[:5]:
chunk_tokens = len(chunk["text"].split())
if token_count + chunk_tokens < 2000:
context += chunk["text"] + "\n\n"
token_count += chunk_tokens
else:
break
return context.strip()
def _answer_with_transformer(self, question: str, context: str) -> str:
"""Answer using transformer QA model."""
if not context:
return "No relevant information found in the document."
try:
result = self.qa_pipeline(question=question, context=context)
if result["score"] < 0.1:
return "The information is not clearly available in the document."
return result["answer"]
except Exception as e:
return f"Error processing question: {str(e)}"
def _build_rationale(self, question: str, answer: str, supporting_clauses: List[Dict]) -> str:
"""Generate a short reasoning explanation."""
clause_refs = [f"page {c['page']}" for c in supporting_clauses if c.get("page") is not None]
refs_str = ", ".join(clause_refs) if clause_refs else "the retrieved document clauses"
return f"The answer '{answer}' was derived from {refs_str}, which matched the question '{question}' in meaning."
def _calculate_confidence(self, answer: str, context: str) -> float:
"""Estimate confidence score."""
if "not available" in answer.lower() or "not found" in answer.lower():
return 0.2
if re.search(r'\d+', answer):
return 0.85
return 0.65