Spaces:
Runtime error
Runtime error
| from transformers import BertTokenizerFast, BertForTokenClassification | |
| import gradio as gr | |
| from src.legalNER import * | |
| ids_to_labels = {0: 'B_ADVO', 1: 'B_ARTV', 2: 'B_CRIA', 3: 'B_DEFN', 4: 'B_JUDG', 5: 'B_JUDP', 6: 'B_PENA', 7: 'B_PROS', 8: 'B_PUNI', 9: 'B_REGI', 10: 'B_TIMV', 11: 'B_VERN', 12: 'I_ADVO', 13: 'I_ARTV', 14: 'I_CRIA', 15: 'I_DEFN', 16: 'I_JUDG', 17: 'I_JUDP', 18: 'I_PENA', 19: 'I_PROS', 20: 'I_PUNI', 21: 'I_REGI', 22: 'I_TIMV', 23: 'I_VERN', 24: 'O'} | |
| indolem = 'indolem/indobert-base-uncased' | |
| indonlu = 'indobenchmark/indobert-base-p2' | |
| model_indolem = BertForTokenClassification.from_pretrained(indolem, num_labels=len(ids_to_labels)) | |
| model_indonlu = BertForTokenClassification.from_pretrained(indonlu, num_labels=len(ids_to_labels)) | |
| tokenizer_indolem = BertTokenizerFast.from_pretrained(indolem) | |
| tokenizer_indonlu = BertTokenizerFast.from_pretrained(indonlu) | |
| def text_extraction(text, model, progress=gr.Progress()): | |
| if model == 'IndoBERT (IndoLEM)': | |
| use_model = model_indolem | |
| use_tokenizer = tokenizer_indolem | |
| else: | |
| use_model = model_indonlu | |
| use_tokenizer = tokenizer_indonlu | |
| legalner = LegalNER(use_model, use_tokenizer, ids_to_labels, model) | |
| entitas = legalner.predict(text) | |
| new_text = legalner.tokenizer_decode | |
| return {"text": new_text, "entities": entitas} | |
| def pdf_extraction(doc, model, progress=gr.Progress()): | |
| if model == 'IndoBERT (IndoLEM)': | |
| use_model = model_indolem | |
| use_tokenizer = tokenizer_indolem | |
| else: | |
| use_model = model_indonlu | |
| use_tokenizer = tokenizer_indonlu | |
| legalner = LegalNER(use_model, use_tokenizer, ids_to_labels, model) | |
| return legalner.predict(doc) |