Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,25 +8,44 @@ from transformers import pipeline
|
|
| 8 |
|
| 9 |
ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)
|
| 10 |
|
| 11 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
|
|
|
|
|
|
| 13 |
def entities_to_df(text):
|
| 14 |
all_entities = []
|
| 15 |
-
#the NER model will be used on the input text
|
| 16 |
-
entities = ner(text)
|
| 17 |
|
|
|
|
| 18 |
for entity in entities:
|
|
|
|
| 19 |
all_entities.append({
|
| 20 |
"Entity": entity['word'],
|
| 21 |
-
"Type" : entity['entity_group'],
|
| 22 |
"Score": float((entity['score'])),
|
| 23 |
"Start": entity['start'],
|
| 24 |
"End": entity['end'],
|
| 25 |
-
"
|
| 26 |
})
|
| 27 |
|
| 28 |
df = pd.DataFrame(all_entities)
|
| 29 |
-
|
| 30 |
#the df in the output did not round the score above so I rounded it after creating the df
|
| 31 |
df['Score'] = df['Score'].round(4)
|
| 32 |
|
|
@@ -38,28 +57,25 @@ def highlight_entities(text):
|
|
| 38 |
df = entities_to_df(text)
|
| 39 |
highlighted_text = ""
|
| 40 |
last_idx = 0
|
| 41 |
-
|
| 42 |
-
# Iterating
|
| 43 |
for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
|
| 44 |
# Add the text before the entity
|
| 45 |
highlighted_text += text[last_idx:entity['Start']]
|
| 46 |
-
|
| 47 |
-
#highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
|
| 48 |
-
|
| 49 |
highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
|
| 50 |
-
|
| 51 |
#updating the index after the current entity
|
| 52 |
last_idx = entity['End']
|
| 53 |
-
|
| 54 |
# add the text after the last entity
|
| 55 |
highlighted_text += text[last_idx:]
|
| 56 |
-
|
| 57 |
-
# again we will use an HTML div to make the output looks better :)
|
| 58 |
return f"<div>{highlighted_text}</div>"
|
| 59 |
|
| 60 |
# The last function which will combine the two previous functions and will be used in the interface
|
| 61 |
def NER_output(text):
|
| 62 |
-
html = highlight_entities(text)
|
| 63 |
df = entities_to_df(text)
|
| 64 |
return html,df
|
| 65 |
|
|
@@ -68,9 +84,10 @@ default_value ="J.K. Rowling wrote the Harry Potter series, which was published
|
|
| 68 |
|
| 69 |
# Gradio Interface
|
| 70 |
demo = gr.Interface(
|
| 71 |
-
fn=NER_output,
|
| 72 |
-
inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
|
| 73 |
-
outputs=[gr.HTML(label="
|
|
|
|
| 74 |
#above, we used the NER_output, and since that function return the html and the df there will be two outputs
|
| 75 |
#The first is gr.HTML and the second gr.Datagrame
|
| 76 |
)
|
|
|
|
| 8 |
|
| 9 |
ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)
|
| 10 |
|
| 11 |
+
#a function to split each sentence containing an entity in the text by commas.
|
| 12 |
+
#start to comma, comma to comma, last comma to the remaining text
|
| 13 |
+
def split_sentences(text, start, end):
|
| 14 |
+
|
| 15 |
+
#comma before entity
|
| 16 |
+
start_comma = text.rfind(',', 0, start)
|
| 17 |
+
if start_comma == -1: #if rfind did not find a comma before the entity:
|
| 18 |
+
start_comma = 0 #start from the beginning (first sentence)
|
| 19 |
+
else:
|
| 20 |
+
start_comma += 1 #if comma found, then start from the char after the comma
|
| 21 |
+
|
| 22 |
+
# comma after the entity
|
| 23 |
+
end_comma = text.find(',', end)
|
| 24 |
+
if end_comma == -1:
|
| 25 |
+
return text[start_comma:].strip() #if it did not find a comma, return the text from the last comma to the end
|
| 26 |
+
else: #if it did find a comma, go to that comma
|
| 27 |
+
return text[start_comma:end_comma].strip()
|
| 28 |
|
| 29 |
+
|
| 30 |
+
#Conveting the NER output into a DataFrame:
|
| 31 |
def entities_to_df(text):
|
| 32 |
all_entities = []
|
| 33 |
+
entities = ner(text)#the NER model will be used on the input text
|
|
|
|
| 34 |
|
| 35 |
+
#putting the entities into a data frame with the needed keys + calling the split sentences fumction in the for loop
|
| 36 |
for entity in entities:
|
| 37 |
+
sentence = split_sentences(text, entity['start'], entity['end'])
|
| 38 |
all_entities.append({
|
| 39 |
"Entity": entity['word'],
|
| 40 |
+
"Type" : entity['entity_group'], #loc, org, per, misc
|
| 41 |
"Score": float((entity['score'])),
|
| 42 |
"Start": entity['start'],
|
| 43 |
"End": entity['end'],
|
| 44 |
+
"Sentence": sentence,
|
| 45 |
})
|
| 46 |
|
| 47 |
df = pd.DataFrame(all_entities)
|
| 48 |
+
|
| 49 |
#the df in the output did not round the score above so I rounded it after creating the df
|
| 50 |
df['Score'] = df['Score'].round(4)
|
| 51 |
|
|
|
|
| 57 |
df = entities_to_df(text)
|
| 58 |
highlighted_text = ""
|
| 59 |
last_idx = 0
|
| 60 |
+
|
| 61 |
+
# Iterating the DF rows in order
|
| 62 |
for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
|
| 63 |
# Add the text before the entity
|
| 64 |
highlighted_text += text[last_idx:entity['Start']]
|
| 65 |
+
#highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
|
|
|
|
|
|
|
| 66 |
highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
|
|
|
|
| 67 |
#updating the index after the current entity
|
| 68 |
last_idx = entity['End']
|
| 69 |
+
|
| 70 |
# add the text after the last entity
|
| 71 |
highlighted_text += text[last_idx:]
|
| 72 |
+
|
| 73 |
+
# again we will use an HTML div block to make the output looks better :)
|
| 74 |
return f"<div>{highlighted_text}</div>"
|
| 75 |
|
| 76 |
# The last function which will combine the two previous functions and will be used in the interface
|
| 77 |
def NER_output(text):
|
| 78 |
+
html = highlight_entities(text)
|
| 79 |
df = entities_to_df(text)
|
| 80 |
return html,df
|
| 81 |
|
|
|
|
| 84 |
|
| 85 |
# Gradio Interface
|
| 86 |
demo = gr.Interface(
|
| 87 |
+
fn=NER_output,
|
| 88 |
+
inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
|
| 89 |
+
outputs=[gr.HTML(label="Entities Highlighted"), gr.Dataframe(label="Entities in DataFrame format")],
|
| 90 |
+
title = "NER model with highlighted entities"
|
| 91 |
#above, we used the NER_output, and since that function return the html and the df there will be two outputs
|
| 92 |
#The first is gr.HTML and the second gr.Datagrame
|
| 93 |
)
|