Spaces:
Runtime error
Runtime error
| from transformers import pipeline | |
| from typing import List, Dict | |
| import re | |
| class AdvancedDecisionEngine: | |
| def __init__(self): | |
| self.qa_pipeline = pipeline( | |
| "question-answering", | |
| model="deepset/roberta-base-squad2", | |
| tokenizer="deepset/roberta-base-squad2" | |
| ) | |
| def make_decision(self, question: str, retrieved_chunks: List[Dict]) -> Dict: | |
| """ | |
| Given a question and retrieved chunks, produce structured decision JSON: | |
| { | |
| "answer": str, | |
| "supporting_clauses": [ { "text": str, "page": int, "score": float } ], | |
| "rationale": str, | |
| "confidence": float | |
| } | |
| """ | |
| # Step 1: Select top context | |
| best_context = self._select_best_context(question, retrieved_chunks) | |
| # Step 2: Get answer using QA model | |
| answer = self._answer_with_transformer(question, best_context) | |
| # Step 3: Build rationale and evidence | |
| supporting_clauses = [ | |
| { | |
| "text": chunk["text"], | |
| "page": chunk.get("page"), | |
| "score": chunk.get("score", 0) | |
| } | |
| for chunk in sorted(retrieved_chunks, key=lambda x: x["score"], reverse=True)[:3] | |
| ] | |
| rationale = self._build_rationale(question, answer, supporting_clauses) | |
| confidence = self._calculate_confidence(answer, best_context) | |
| return { | |
| "answer": answer, | |
| "supporting_clauses": supporting_clauses, | |
| "rationale": rationale, | |
| "confidence": confidence | |
| } | |
| def _select_best_context(self, question: str, chunks: List[Dict]) -> str: | |
| """Select and combine top chunks by relevance score.""" | |
| sorted_chunks = sorted(chunks, key=lambda x: x.get("score", 0), reverse=True) | |
| context = "" | |
| token_count = 0 | |
| for chunk in sorted_chunks[:5]: | |
| chunk_tokens = len(chunk["text"].split()) | |
| if token_count + chunk_tokens < 2000: | |
| context += chunk["text"] + "\n\n" | |
| token_count += chunk_tokens | |
| else: | |
| break | |
| return context.strip() | |
| def _answer_with_transformer(self, question: str, context: str) -> str: | |
| """Answer using transformer QA model.""" | |
| if not context: | |
| return "No relevant information found in the document." | |
| try: | |
| result = self.qa_pipeline(question=question, context=context) | |
| if result["score"] < 0.1: | |
| return "The information is not clearly available in the document." | |
| return result["answer"] | |
| except Exception as e: | |
| return f"Error processing question: {str(e)}" | |
| def _build_rationale(self, question: str, answer: str, supporting_clauses: List[Dict]) -> str: | |
| """Generate a short reasoning explanation.""" | |
| clause_refs = [f"page {c['page']}" for c in supporting_clauses if c.get("page") is not None] | |
| refs_str = ", ".join(clause_refs) if clause_refs else "the retrieved document clauses" | |
| return f"The answer '{answer}' was derived from {refs_str}, which matched the question '{question}' in meaning." | |
| def _calculate_confidence(self, answer: str, context: str) -> float: | |
| """Estimate confidence score.""" | |
| if "not available" in answer.lower() or "not found" in answer.lower(): | |
| return 0.2 | |
| if re.search(r'\d+', answer): | |
| return 0.85 | |
| return 0.65 | |