cahyaBERT / app.py
almfz's picture
Update app.py
f48d676 verified
import gradio as gr
# UBAH BARIS INI: Import kelas BERT spesifik, bukan Auto
from transformers import BertTokenizerFast, BertForTokenClassification, pipeline
# Load model dari folder saat ini
model_path = "."
# UBAH BARIS INI: Gunakan BertTokenizerFast dan BertForTokenClassification
tokenizer = BertTokenizerFast.from_pretrained(model_path)
model = BertForTokenClassification.from_pretrained(model_path)
# Pipeline
nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
def ner_prediction(text):
results = nlp(text)
output = []
for entity in results:
output.append({
"entity": entity["entity_group"],
"score": float(entity["score"]),
"word": entity["word"],
"start": entity["start"],
"end": entity["end"]
})
return {"text": text, "entities": output}
# Deskripsi UI
examples = [
"PUTUSAN Nomor 25/Pid.Sus/2022/PN Pwd DEMI KEADILAN BERDASARKAN KETUHANAN YANG MAHA ESA Pengadilan Negeri Purwodadi",
]
description_md = """
### Deteksi Entitas Legal Indonesia (CahyaBERT)
Model ini mendeteksi: **VERN** (Nomor Putusan), **JUDG** (Hakim), **DEFN** (Terdakwa), dll.
"""
demo = gr.Interface(
fn=ner_prediction,
inputs=gr.Textbox(label="Teks Putusan", lines=5),
outputs=gr.HighlightedText(label="Hasil Analisis"),
title="Indonesian Legal NER Extraction",
description=description_md,
examples=examples,
theme="soft"
)
if __name__ == "__main__":
demo.launch()