| import gradio as gr |
| |
| from transformers import BertTokenizerFast, BertForTokenClassification, pipeline |
|
|
| |
| model_path = "." |
|
|
| |
| tokenizer = BertTokenizerFast.from_pretrained(model_path) |
| model = BertForTokenClassification.from_pretrained(model_path) |
|
|
| |
| nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") |
|
|
| def ner_prediction(text): |
| results = nlp(text) |
| output = [] |
| for entity in results: |
| output.append({ |
| "entity": entity["entity_group"], |
| "score": float(entity["score"]), |
| "word": entity["word"], |
| "start": entity["start"], |
| "end": entity["end"] |
| }) |
| return {"text": text, "entities": output} |
|
|
| |
| examples = [ |
| "PUTUSAN Nomor 25/Pid.Sus/2022/PN Pwd DEMI KEADILAN BERDASARKAN KETUHANAN YANG MAHA ESA Pengadilan Negeri Purwodadi", |
| ] |
|
|
| description_md = """ |
| ### Deteksi Entitas Legal Indonesia (CahyaBERT) |
| Model ini mendeteksi: **VERN** (Nomor Putusan), **JUDG** (Hakim), **DEFN** (Terdakwa), dll. |
| """ |
|
|
| demo = gr.Interface( |
| fn=ner_prediction, |
| inputs=gr.Textbox(label="Teks Putusan", lines=5), |
| outputs=gr.HighlightedText(label="Hasil Analisis"), |
| title="Indonesian Legal NER Extraction", |
| description=description_md, |
| examples=examples, |
| theme="soft" |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |