File size: 4,184 Bytes
54c8522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
Analyze PurposeOfHearing patterns to identify ripeness indicators.

This script examines the historical hearing data to classify purposes
as RIPE (ready for hearing) vs UNRIPE (bottleneck exists).
"""

import polars as pl
from pathlib import Path

# Load hearing data
hear_df = pl.read_csv("Data/ISDMHack_Hear.csv")

print("=" * 80)
print("PURPOSEOFHEARING ANALYSIS FOR RIPENESS CLASSIFICATION")
print("=" * 80)

# 1. Unique values and frequency
print("\nPurposeOfHearing Frequency Distribution:")
print("-" * 80)
purpose_counts = hear_df.group_by("PurposeOfHearing").count().sort("count", descending=True)
print(purpose_counts.head(30))

print(f"\nTotal unique purposes: {hear_df['PurposeOfHearing'].n_unique()}")
print(f"Total hearings: {len(hear_df)}")

# 2. Map to Remappedstages (consolidation)
print("\n" + "=" * 80)
print("PURPOSEOFHEARING → REMAPPEDSTAGES MAPPING")
print("=" * 80)

# Group by both to see relationship
mapping = (
    hear_df
    .group_by(["PurposeOfHearing", "Remappedstages"])
    .count()
    .sort("count", descending=True)
)
print(mapping.head(40))

# 3. Identify potential bottleneck indicators
print("\n" + "=" * 80)
print("RIPENESS CLASSIFICATION HEURISTICS")
print("=" * 80)

# Keywords suggesting unripe status
unripe_keywords = ["SUMMONS", "NOTICE", "ISSUE", "SERVICE", "STAY", "PENDING"]
ripe_keywords = ["ARGUMENTS", "HEARING", "FINAL", "JUDGMENT", "ORDERS", "DISPOSAL"]

# Classify purposes
def classify_purpose(purpose_str):
    if purpose_str is None or purpose_str == "NA":
        return "UNKNOWN"
    
    purpose_upper = purpose_str.upper()
    
    # Check unripe keywords first (more specific)
    for keyword in unripe_keywords:
        if keyword in purpose_upper:
            return "UNRIPE"
    
    # Check ripe keywords
    for keyword in ripe_keywords:
        if keyword in purpose_upper:
            return "RIPE"
    
    # Default
    return "CONDITIONAL"

# Apply classification
purpose_with_classification = (
    purpose_counts
    .with_columns(
        pl.col("PurposeOfHearing")
        .map_elements(classify_purpose, return_dtype=pl.Utf8)
        .alias("Ripeness_Classification")
    )
)

print("\nPurpose Classification Summary:")
print("-" * 80)
print(purpose_with_classification.head(40))

# Summary stats
print("\n" + "=" * 80)
print("RIPENESS CLASSIFICATION SUMMARY")
print("=" * 80)
classification_summary = (
    purpose_with_classification
    .group_by("Ripeness_Classification")
    .agg([
        pl.col("count").sum().alias("total_hearings"),
        pl.col("PurposeOfHearing").count().alias("num_purposes")
    ])
    .with_columns(
        (pl.col("total_hearings") / pl.col("total_hearings").sum() * 100)
        .round(2)
        .alias("percentage")
    )
)
print(classification_summary)

# 4. Analyze by stage
print("\n" + "=" * 80)
print("RIPENESS BY STAGE")
print("=" * 80)

stage_purpose_analysis = (
    hear_df
    .filter(pl.col("Remappedstages").is_not_null())
    .filter(pl.col("Remappedstages") != "NA")
    .group_by(["Remappedstages", "PurposeOfHearing"])
    .count()
    .sort("count", descending=True)
)

print("\nTop Purpose-Stage combinations:")
print(stage_purpose_analysis.head(30))

# 5. Export classification mapping
output_path = Path("reports/ripeness_purpose_mapping.csv")
output_path.parent.mkdir(exist_ok=True)
purpose_with_classification.write_csv(output_path)
print(f"\n✓ Classification mapping saved to: {output_path}")

print("\n" + "=" * 80)
print("RECOMMENDATIONS FOR RIPENESS CLASSIFIER")
print("=" * 80)
print("""
Based on the analysis:

UNRIPE (Bottleneck exists):
- Purposes containing: SUMMONS, NOTICE, ISSUE, SERVICE, STAY, PENDING
- Cases waiting for procedural steps before substantive hearing

RIPE (Ready for hearing):
- Purposes containing: ARGUMENTS, HEARING, FINAL, JUDGMENT, ORDERS, DISPOSAL
- Cases ready for substantive judicial action

CONDITIONAL:
- Other purposes that may be ripe or unripe depending on context
- Needs additional logic based on stage, case age, hearing count

Use Remappedstages as secondary indicator:
- ADMISSION stage → more likely unripe (procedural)
- ORDERS/JUDGMENT stage → more likely ripe (substantive)
""")