File size: 1,948 Bytes
375f639
 
 
 
 
 
 
 
 
 
 
 
 
 
5a2154a
375f639
 
 
5a2154a
375f639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b4ee34
375f639
3b4ee34
375f639
 
dda4a6d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from transformers import AutoConfig, AutoModelForTokenClassification, AutoTokenizer
import numpy as np
import torch
import gradio as gr
import requests

test_model = AutoModelForTokenClassification.from_pretrained("PRAli22/arabert_arabic_ner")
TOKENIZER =  AutoTokenizer.from_pretrained("PRAli22/arabert_arabic_ner")

label_map = {'B-LOC': 0, 'O': 1, 'B-PERS': 2, 'I-PERS': 3, 'B-ORG': 4, 'I-LOC': 5, 'I-ORG': 6, 'B-MISC': 7, 'I-MISC': 8}
inv_label_map = {0: 'B-LOC', 1: 'O', 2: 'B-PERS', 3: 'I-PERS', 4: 'B-ORG', 5: 'I-LOC', 6: 'I-ORG', 7: 'B-MISC', 8: 'I-MISC'}


def predict_sent(sentences):
    input_ids = TOKENIZER.encode(sentences, return_tensors='pt')

    with torch.no_grad():
        test_model.to('cpu')
        output = test_model(input_ids)
    label_indices = np.argmax(output[0].to('cpu').numpy(), axis=2)

    tokens = TOKENIZER.convert_ids_to_tokens(input_ids.to('cpu').numpy()[0])

    new_tokens, new_labels = [], []
    for token, label_idx in zip(tokens, label_indices[0]):
        if token.startswith("##"):
            new_tokens[-1] = new_tokens[-1] + token[2:]
        else:
            new_labels.append(inv_label_map[label_idx])
            new_tokens.append(token)

    output_string = "\n".join(["{}\t{}".format(label, token) for token, label in zip(new_tokens, new_labels)])

    return output_string

css_code='body{background-image:url("https://media.istockphoto.com/id/1256252051/vector/people-using-online-translation-app.jpg?s=612x612&w=0&k=20&c=aa6ykHXnSwqKu31fFR6r6Y1bYMS5FMAU9yHqwwylA94=");}'

demo = gr.Interface(
    fn=predict_sent,
    inputs=
        gr.Textbox(label="sentence", placeholder=" Enter the sentence "),
        
    
    outputs=[gr.Textbox(label="entity name")],
    title="Arabic Named Entity Recognition",
    description= "This is Arabic Named Entity Recognition System, it takes an arabian sentence as input and returns every entity name within it",
    css = css_code
)
demo.launch()