Spaces:
Sleeping
Sleeping
| """ | |
| Analyze PurposeOfHearing patterns to identify ripeness indicators. | |
| This script examines the historical hearing data to classify purposes | |
| as RIPE (ready for hearing) vs UNRIPE (bottleneck exists). | |
| """ | |
| import polars as pl | |
| from pathlib import Path | |
| # Load hearing data | |
| hear_df = pl.read_csv("Data/ISDMHack_Hear.csv") | |
| print("=" * 80) | |
| print("PURPOSEOFHEARING ANALYSIS FOR RIPENESS CLASSIFICATION") | |
| print("=" * 80) | |
| # 1. Unique values and frequency | |
| print("\nPurposeOfHearing Frequency Distribution:") | |
| print("-" * 80) | |
| purpose_counts = hear_df.group_by("PurposeOfHearing").count().sort("count", descending=True) | |
| print(purpose_counts.head(30)) | |
| print(f"\nTotal unique purposes: {hear_df['PurposeOfHearing'].n_unique()}") | |
| print(f"Total hearings: {len(hear_df)}") | |
| # 2. Map to Remappedstages (consolidation) | |
| print("\n" + "=" * 80) | |
| print("PURPOSEOFHEARING → REMAPPEDSTAGES MAPPING") | |
| print("=" * 80) | |
| # Group by both to see relationship | |
| mapping = ( | |
| hear_df | |
| .group_by(["PurposeOfHearing", "Remappedstages"]) | |
| .count() | |
| .sort("count", descending=True) | |
| ) | |
| print(mapping.head(40)) | |
| # 3. Identify potential bottleneck indicators | |
| print("\n" + "=" * 80) | |
| print("RIPENESS CLASSIFICATION HEURISTICS") | |
| print("=" * 80) | |
| # Keywords suggesting unripe status | |
| unripe_keywords = ["SUMMONS", "NOTICE", "ISSUE", "SERVICE", "STAY", "PENDING"] | |
| ripe_keywords = ["ARGUMENTS", "HEARING", "FINAL", "JUDGMENT", "ORDERS", "DISPOSAL"] | |
| # Classify purposes | |
| def classify_purpose(purpose_str): | |
| if purpose_str is None or purpose_str == "NA": | |
| return "UNKNOWN" | |
| purpose_upper = purpose_str.upper() | |
| # Check unripe keywords first (more specific) | |
| for keyword in unripe_keywords: | |
| if keyword in purpose_upper: | |
| return "UNRIPE" | |
| # Check ripe keywords | |
| for keyword in ripe_keywords: | |
| if keyword in purpose_upper: | |
| return "RIPE" | |
| # Default | |
| return "CONDITIONAL" | |
| # Apply classification | |
| purpose_with_classification = ( | |
| purpose_counts | |
| .with_columns( | |
| pl.col("PurposeOfHearing") | |
| .map_elements(classify_purpose, return_dtype=pl.Utf8) | |
| .alias("Ripeness_Classification") | |
| ) | |
| ) | |
| print("\nPurpose Classification Summary:") | |
| print("-" * 80) | |
| print(purpose_with_classification.head(40)) | |
| # Summary stats | |
| print("\n" + "=" * 80) | |
| print("RIPENESS CLASSIFICATION SUMMARY") | |
| print("=" * 80) | |
| classification_summary = ( | |
| purpose_with_classification | |
| .group_by("Ripeness_Classification") | |
| .agg([ | |
| pl.col("count").sum().alias("total_hearings"), | |
| pl.col("PurposeOfHearing").count().alias("num_purposes") | |
| ]) | |
| .with_columns( | |
| (pl.col("total_hearings") / pl.col("total_hearings").sum() * 100) | |
| .round(2) | |
| .alias("percentage") | |
| ) | |
| ) | |
| print(classification_summary) | |
| # 4. Analyze by stage | |
| print("\n" + "=" * 80) | |
| print("RIPENESS BY STAGE") | |
| print("=" * 80) | |
| stage_purpose_analysis = ( | |
| hear_df | |
| .filter(pl.col("Remappedstages").is_not_null()) | |
| .filter(pl.col("Remappedstages") != "NA") | |
| .group_by(["Remappedstages", "PurposeOfHearing"]) | |
| .count() | |
| .sort("count", descending=True) | |
| ) | |
| print("\nTop Purpose-Stage combinations:") | |
| print(stage_purpose_analysis.head(30)) | |
| # 5. Export classification mapping | |
| output_path = Path("reports/ripeness_purpose_mapping.csv") | |
| output_path.parent.mkdir(exist_ok=True) | |
| purpose_with_classification.write_csv(output_path) | |
| print(f"\n✓ Classification mapping saved to: {output_path}") | |
| print("\n" + "=" * 80) | |
| print("RECOMMENDATIONS FOR RIPENESS CLASSIFIER") | |
| print("=" * 80) | |
| print(""" | |
| Based on the analysis: | |
| UNRIPE (Bottleneck exists): | |
| - Purposes containing: SUMMONS, NOTICE, ISSUE, SERVICE, STAY, PENDING | |
| - Cases waiting for procedural steps before substantive hearing | |
| RIPE (Ready for hearing): | |
| - Purposes containing: ARGUMENTS, HEARING, FINAL, JUDGMENT, ORDERS, DISPOSAL | |
| - Cases ready for substantive judicial action | |
| CONDITIONAL: | |
| - Other purposes that may be ripe or unripe depending on context | |
| - Needs additional logic based on stage, case age, hearing count | |
| Use Remappedstages as secondary indicator: | |
| - ADMISSION stage → more likely unripe (procedural) | |
| - ORDERS/JUDGMENT stage → more likely ripe (substantive) | |
| """) | |