File size: 6,123 Bytes
b2fe8d1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | """
Explanation engine — deterministic, template-based structured diff.
NO LLM dependency. No API calls. Always fast. Always consistent.
Exported functions (both required by search_pipeline.py):
explain_similarity(query_case, retrieved_case, similarity_score) -> dict
explain_results(query_case, results) -> list[dict]
"""
def explain_similarity(
query_case: dict,
retrieved_case: dict,
similarity_score: float
) -> dict:
"""
Generate structured explanation comparing query to one retrieved case.
Args:
query_case: NLP-processed dict for the user's query text
retrieved_case: NLP-processed dict for a retrieved result
similarity_score: reranker relevance score (float)
Returns:
dict with 10 keys
"""
# IPC comparison
q_ipc = set(query_case.get("ipc_sections", []))
r_ipc = set(retrieved_case.get("ipc_sections", []))
shared_ipc = sorted(q_ipc & r_ipc)
q_only_ipc = sorted(q_ipc - r_ipc)
r_only_ipc = sorted(r_ipc - q_ipc)
# Evidence comparison
q_evidence = set(query_case.get("evidence_types", []))
r_evidence = set(retrieved_case.get("evidence_types", []))
shared_evidence = sorted(q_evidence & r_evidence)
q_only_evidence = sorted(q_evidence - r_evidence)
r_only_evidence = sorted(r_evidence - q_evidence)
# Case type
q_type = query_case.get("case_type", "unknown")
r_type = retrieved_case.get("case_type", "unknown")
shared_case_type = (q_type == r_type)
# Court and verdict
q_court = query_case.get("court", "unknown")
r_court = retrieved_case.get("court", "unknown")
q_verdict = query_case.get("verdict", "unknown")
r_verdict = retrieved_case.get("verdict", "unknown")
# ── Similarity reason ──────────────────────────────────────────────────
reasons = []
if shared_ipc:
reasons.append(f"both cite IPC {', '.join(shared_ipc)}")
if shared_case_type:
reasons.append(f"both are {q_type} cases")
if shared_evidence:
reasons.append(f"both involve {', '.join(shared_evidence)} evidence")
if not reasons:
reasons.append(
"high semantic similarity in legal language and factual context"
)
similarity_reason = "Similarity: " + "; ".join(reasons).capitalize() + "."
# ── Key differences ────────────────────────────────────────────────────
diffs = []
if q_only_ipc:
diffs.append(f"your case cites IPC {', '.join(q_only_ipc)} (absent here)")
if r_only_ipc:
diffs.append(f"this case additionally cites IPC {', '.join(r_only_ipc)}")
if q_only_evidence:
diffs.append(
f"your case has {', '.join(q_only_evidence)} evidence (absent here)"
)
if r_only_evidence:
diffs.append(
f"this case has {', '.join(r_only_evidence)} evidence (absent in yours)"
)
if not shared_case_type:
diffs.append(
f"case type differs: yours is {q_type}, this is {r_type}"
)
if q_court != r_court and r_court != "unknown":
diffs.append(f"decided by {r_court}")
if not diffs:
diffs.append("no major structural differences detected")
key_differences = "Differences: " + "; ".join(diffs).capitalize() + "."
# ── Verdict analysis ───────────────────────────────────────────────────
if q_verdict == "unknown" or r_verdict == "unknown":
verdict_analysis = (
"Verdict comparison: unable to extract verdicts reliably "
"from one or both cases."
)
elif q_verdict == r_verdict:
verdict_analysis = (
f"Verdict alignment: both cases resulted in {r_verdict}."
)
else:
verdict_factors = []
if "forensic" in r_evidence and "forensic" not in q_evidence:
verdict_factors.append("this case had forensic evidence")
if "eyewitness" in r_evidence and "eyewitness" not in q_evidence:
verdict_factors.append("this case had eyewitness testimony")
if "confession" in r_evidence and "confession" not in q_evidence:
verdict_factors.append("this case included a confession")
if "forensic" in q_evidence and "forensic" not in r_evidence:
verdict_factors.append(
"your case has forensic evidence this one lacked"
)
if verdict_factors:
verdict_analysis = (
f"Verdict divergence: your case trends {q_verdict}, "
f"this case was {r_verdict}. "
f"Possible factor: {'; '.join(verdict_factors)}."
)
else:
verdict_analysis = (
f"Verdict divergence: your case trends {q_verdict}, "
f"this case was {r_verdict}. "
f"Similar charges led to opposite outcomes — review carefully."
)
return {
"similarity_score": round(similarity_score, 3),
"similarity_reason": similarity_reason,
"key_differences": key_differences,
"verdict_analysis": verdict_analysis,
"shared_ipc": shared_ipc,
"shared_evidence": shared_evidence,
"shared_case_type": shared_case_type,
"retrieved_verdict": r_verdict,
"retrieved_court": r_court,
"retrieved_date": retrieved_case.get("date", ""),
}
def explain_results(query_case: dict, results: list) -> list:
"""
Run explanation for all reranked results.
Args:
query_case: NLP-processed dict for the query
results: list of (case_dict, score) tuples from reranker
Returns:
list of explanation dicts, one per result
"""
return [
explain_similarity(query_case, case, score)
for case, score in results
]
|