File size: 6,123 Bytes
b2fe8d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
Explanation engine — deterministic, template-based structured diff.

NO LLM dependency. No API calls. Always fast. Always consistent.

Exported functions (both required by search_pipeline.py):
  explain_similarity(query_case, retrieved_case, similarity_score) -> dict
  explain_results(query_case, results) -> list[dict]
"""


def explain_similarity(
    query_case: dict,
    retrieved_case: dict,
    similarity_score: float
) -> dict:
    """
    Generate structured explanation comparing query to one retrieved case.

    Args:
        query_case:       NLP-processed dict for the user's query text
        retrieved_case:   NLP-processed dict for a retrieved result
        similarity_score: reranker relevance score (float)

    Returns:
        dict with 10 keys
    """
    # IPC comparison
    q_ipc = set(query_case.get("ipc_sections", []))
    r_ipc = set(retrieved_case.get("ipc_sections", []))
    shared_ipc   = sorted(q_ipc & r_ipc)
    q_only_ipc   = sorted(q_ipc - r_ipc)
    r_only_ipc   = sorted(r_ipc - q_ipc)

    # Evidence comparison
    q_evidence = set(query_case.get("evidence_types", []))
    r_evidence = set(retrieved_case.get("evidence_types", []))
    shared_evidence = sorted(q_evidence & r_evidence)
    q_only_evidence = sorted(q_evidence - r_evidence)
    r_only_evidence = sorted(r_evidence - q_evidence)

    # Case type
    q_type = query_case.get("case_type", "unknown")
    r_type = retrieved_case.get("case_type", "unknown")
    shared_case_type = (q_type == r_type)

    # Court and verdict
    q_court   = query_case.get("court", "unknown")
    r_court   = retrieved_case.get("court", "unknown")
    q_verdict = query_case.get("verdict", "unknown")
    r_verdict = retrieved_case.get("verdict", "unknown")

    # ── Similarity reason ──────────────────────────────────────────────────
    reasons = []
    if shared_ipc:
        reasons.append(f"both cite IPC {', '.join(shared_ipc)}")
    if shared_case_type:
        reasons.append(f"both are {q_type} cases")
    if shared_evidence:
        reasons.append(f"both involve {', '.join(shared_evidence)} evidence")
    if not reasons:
        reasons.append(
            "high semantic similarity in legal language and factual context"
        )
    similarity_reason = "Similarity: " + "; ".join(reasons).capitalize() + "."

    # ── Key differences ────────────────────────────────────────────────────
    diffs = []
    if q_only_ipc:
        diffs.append(f"your case cites IPC {', '.join(q_only_ipc)} (absent here)")
    if r_only_ipc:
        diffs.append(f"this case additionally cites IPC {', '.join(r_only_ipc)}")
    if q_only_evidence:
        diffs.append(
            f"your case has {', '.join(q_only_evidence)} evidence (absent here)"
        )
    if r_only_evidence:
        diffs.append(
            f"this case has {', '.join(r_only_evidence)} evidence (absent in yours)"
        )
    if not shared_case_type:
        diffs.append(
            f"case type differs: yours is {q_type}, this is {r_type}"
        )
    if q_court != r_court and r_court != "unknown":
        diffs.append(f"decided by {r_court}")
    if not diffs:
        diffs.append("no major structural differences detected")
    key_differences = "Differences: " + "; ".join(diffs).capitalize() + "."

    # ── Verdict analysis ───────────────────────────────────────────────────
    if q_verdict == "unknown" or r_verdict == "unknown":
        verdict_analysis = (
            "Verdict comparison: unable to extract verdicts reliably "
            "from one or both cases."
        )
    elif q_verdict == r_verdict:
        verdict_analysis = (
            f"Verdict alignment: both cases resulted in {r_verdict}."
        )
    else:
        verdict_factors = []
        if "forensic" in r_evidence and "forensic" not in q_evidence:
            verdict_factors.append("this case had forensic evidence")
        if "eyewitness" in r_evidence and "eyewitness" not in q_evidence:
            verdict_factors.append("this case had eyewitness testimony")
        if "confession" in r_evidence and "confession" not in q_evidence:
            verdict_factors.append("this case included a confession")
        if "forensic" in q_evidence and "forensic" not in r_evidence:
            verdict_factors.append(
                "your case has forensic evidence this one lacked"
            )
        if verdict_factors:
            verdict_analysis = (
                f"Verdict divergence: your case trends {q_verdict}, "
                f"this case was {r_verdict}. "
                f"Possible factor: {'; '.join(verdict_factors)}."
            )
        else:
            verdict_analysis = (
                f"Verdict divergence: your case trends {q_verdict}, "
                f"this case was {r_verdict}. "
                f"Similar charges led to opposite outcomes — review carefully."
            )

    return {
        "similarity_score":  round(similarity_score, 3),
        "similarity_reason": similarity_reason,
        "key_differences":   key_differences,
        "verdict_analysis":  verdict_analysis,
        "shared_ipc":        shared_ipc,
        "shared_evidence":   shared_evidence,
        "shared_case_type":  shared_case_type,
        "retrieved_verdict": r_verdict,
        "retrieved_court":   r_court,
        "retrieved_date":    retrieved_case.get("date", ""),
    }


def explain_results(query_case: dict, results: list) -> list:
    """
    Run explanation for all reranked results.

    Args:
        query_case: NLP-processed dict for the query
        results:    list of (case_dict, score) tuples from reranker

    Returns:
        list of explanation dicts, one per result
    """
    return [
        explain_similarity(query_case, case, score)
        for case, score in results
    ]