|
|
import pandas as pd
|
|
|
import matplotlib.pyplot as plt
|
|
|
import seaborn as sns
|
|
|
import os
|
|
|
import csv
|
|
|
import os.path
|
|
|
import pandas as pd
|
|
|
import re
|
|
|
import numpy as np
|
|
|
import math
|
|
|
|
|
|
def kategorikan(amar):
|
|
|
if amar is None:
|
|
|
return None
|
|
|
a = amar.lower().strip()
|
|
|
|
|
|
if ("seumur hidup" in a):
|
|
|
return "penjara seumur hidup"
|
|
|
|
|
|
|
|
|
if ("pidana penjara" in a) or ("kurungan" in a) or ("subsider penjara" in a):
|
|
|
return "pidana penjara"
|
|
|
|
|
|
|
|
|
if ("pidana denda" in a) or ("subsider denda" in a):
|
|
|
return "pidana denda"
|
|
|
|
|
|
|
|
|
if "pidana mati" in a:
|
|
|
return "pidana mati"
|
|
|
|
|
|
|
|
|
if ("bebas dari dakwaan" in a) or ("lepas dari tuntutan" in a) \
|
|
|
or ("membebaskan" in a and "dakwaan" in a):
|
|
|
return "bebas dakwaan"
|
|
|
|
|
|
|
|
|
if ("pidana bersyarat" in a) or ("restorative justice" in a) \
|
|
|
or ("dikembalikan kepada orang tua" in a) \
|
|
|
or ("pidana tambahan" in a) \
|
|
|
or ("lain-lain" in a) or ("lain lain" in a) or ("lain-lain" in a) \
|
|
|
or ("penghentian pemeriksaan perkara" in a):
|
|
|
return "bebas bersyarat"
|
|
|
|
|
|
|
|
|
if "meninggal" in a:
|
|
|
return None
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
def extract_penjara(text):
|
|
|
if pd.isna(text):
|
|
|
return None
|
|
|
|
|
|
tahun = re.search(r'(\d+)\s*\(.*?\)\s*tahun', text, re.IGNORECASE)
|
|
|
tahun = int(tahun.group(1)) if tahun else 0
|
|
|
|
|
|
bulan = re.search(r'(\d+)\s*\(.*?\)\s*bulan', text, re.IGNORECASE)
|
|
|
bulan = int(bulan.group(1)) if bulan else 0
|
|
|
return tahun * 12 + bulan
|
|
|
|
|
|
|
|
|
def extract_denda(text):
|
|
|
if pd.isna(text):
|
|
|
return None
|
|
|
|
|
|
match = re.search(r'pidana denda\s*(sebesar|sejumlah|retribusi+\s+sebesar|restribusi+\s+sebesar)?\s*(?:rp\.?\s*){1,2}([\d.]+)', text, re.IGNORECASE)
|
|
|
|
|
|
if match:
|
|
|
|
|
|
return int(match.group(2).replace('.', ''))
|
|
|
return None
|
|
|
|
|
|
|
|
|
def proses_amar(row):
|
|
|
cat = row["kategori_bersih"]
|
|
|
text = row["catatan_amar"]
|
|
|
|
|
|
if cat == "pidana penjara":
|
|
|
return extract_penjara(text), None
|
|
|
elif cat == "pidana denda":
|
|
|
return None, extract_denda(text)
|
|
|
else:
|
|
|
|
|
|
return None, None
|
|
|
|
|
|
def ringkasan(df):
|
|
|
|
|
|
total_kasus = len(df)
|
|
|
|
|
|
|
|
|
def pct_cat(subdf, cat):
|
|
|
return round(100 * (subdf['kategori_bersih'] == cat).sum() / len(subdf), 3)
|
|
|
|
|
|
|
|
|
summary = []
|
|
|
|
|
|
for tp, group in df.groupby('kata_kunci'):
|
|
|
rata_penjara = round(group.loc[group['kategori_bersih']=='pidana penjara', 'lama_penjara'].mean(), 1)
|
|
|
rata_denda = round(group.loc[group['kategori_bersih']=='pidana denda', 'banyak_denda'].mean(), 0)
|
|
|
|
|
|
pct_penjara = pct_cat(group, 'pidana penjara')
|
|
|
pct_seumur = pct_cat(group, 'penjara seumur hidup')
|
|
|
pct_denda = pct_cat(group, 'pidana denda')
|
|
|
pct_bebas_bersyarat = pct_cat(group, 'bebas bersyarat')
|
|
|
pct_bebas_dakwaan = pct_cat(group, 'bebas dakwaan')
|
|
|
pct_mati = pct_cat(group, 'pidana mati')
|
|
|
|
|
|
pct_kasus = round(100 * len(group) / total_kasus, 3)
|
|
|
|
|
|
summary.append({
|
|
|
'tindak pidana': tp,
|
|
|
'rata-rata penjara': rata_penjara,
|
|
|
'rata-rata denda': rata_denda,
|
|
|
'penjara': f"{pct_penjara}",
|
|
|
'penjara seumur hidup': f"{pct_seumur}",
|
|
|
'denda': f"{pct_denda}",
|
|
|
'bebas bersyarat': f"{pct_bebas_bersyarat}",
|
|
|
'bebas dakwaan': f"{pct_bebas_dakwaan}",
|
|
|
'hukuman mati': f"{pct_mati}",
|
|
|
'kontribusi kasus': f"{pct_kasus}"
|
|
|
})
|
|
|
|
|
|
|
|
|
tabel_ringkasan = pd.DataFrame(summary)
|
|
|
|
|
|
WEIGHTS = {
|
|
|
'hukuman mati': 10.0,
|
|
|
'penjara seumur hidup': 8.0,
|
|
|
'penjara': 5.0,
|
|
|
'denda': 1.5,
|
|
|
'bebas bersyarat': -1.0,
|
|
|
'bebas dakwaan': -2.0
|
|
|
}
|
|
|
|
|
|
def to_float(x):
|
|
|
if x is None:
|
|
|
return 0.0
|
|
|
try:
|
|
|
v = float(str(x).replace('%','').replace(',',''))
|
|
|
if math.isnan(v):
|
|
|
return 0.0
|
|
|
return v
|
|
|
except:
|
|
|
return 0.0
|
|
|
|
|
|
|
|
|
def hitung_score(row):
|
|
|
|
|
|
hm = to_float(row['hukuman mati'])
|
|
|
sh = to_float(row['penjara seumur hidup'])
|
|
|
pj = to_float(row['penjara'])
|
|
|
dn = to_float(row['denda'])
|
|
|
bb = to_float(row['bebas bersyarat'])
|
|
|
bd = to_float(row['bebas dakwaan'])
|
|
|
|
|
|
base_score = (
|
|
|
hm * WEIGHTS['hukuman mati'] * 1.2 +
|
|
|
sh * WEIGHTS['penjara seumur hidup'] * 1.2 +
|
|
|
pj * WEIGHTS['penjara'] * 1.0 +
|
|
|
dn * WEIGHTS['denda'] * 1.0 +
|
|
|
bb * WEIGHTS['bebas bersyarat'] * 2.0 +
|
|
|
bd * WEIGHTS['bebas dakwaan'] * 2.0
|
|
|
)
|
|
|
|
|
|
rata_penjara = to_float(row.get('rata-rata penjara', 0))
|
|
|
penjara_boost = rata_penjara * 4
|
|
|
|
|
|
rata_denda = to_float(row.get('rata-rata denda', 0))
|
|
|
denda_boost = np.log10(rata_denda + 10) * 8 if rata_denda > 0 else 0
|
|
|
|
|
|
return base_score + penjara_boost + denda_boost
|
|
|
|
|
|
|
|
|
|
|
|
semua_skor = tabel_ringkasan.apply(hitung_score, axis=1).tolist()
|
|
|
|
|
|
|
|
|
p33 = np.percentile(semua_skor, 33)
|
|
|
p66 = np.percentile(semua_skor, 66)
|
|
|
|
|
|
|
|
|
def klasifikasi_pidana(row):
|
|
|
score = hitung_score(row)
|
|
|
|
|
|
|
|
|
if score <= p33:
|
|
|
return "light"
|
|
|
elif score <= p66:
|
|
|
return "moderate"
|
|
|
else:
|
|
|
return "serious"
|
|
|
|
|
|
|
|
|
|
|
|
tabel_ringkasan['kategori_pidana'] = tabel_ringkasan.apply(klasifikasi_pidana, axis=1)
|
|
|
|
|
|
return tabel_ringkasan
|
|
|
|
|
|
def normalize_ringkasan(df):
|
|
|
numeric_cols = [
|
|
|
"penjara",
|
|
|
"penjara seumur hidup",
|
|
|
"denda",
|
|
|
"bebas bersyarat",
|
|
|
"bebas dakwaan",
|
|
|
"hukuman mati",
|
|
|
"kontribusi kasus"
|
|
|
]
|
|
|
|
|
|
for col in numeric_cols:
|
|
|
df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
|
|
|
|
|
|
return df
|
|
|
|
|
|
def table_summary(df):
|
|
|
df["kategori_bersih"] = df["amar_lainnya"].apply(kategorikan)
|
|
|
|
|
|
|
|
|
df = df[df["kategori_bersih"].notna()]
|
|
|
|
|
|
|
|
|
df["lama_penjara"], df["banyak_denda"] = zip(*df.apply(proses_amar, axis=1))
|
|
|
|
|
|
tabel = normalize_ringkasan(ringkasan(df))
|
|
|
|
|
|
return tabel
|
|
|
|
|
|
|
|
|
|