correct formatting
Browse files
app.py
CHANGED
|
@@ -1,84 +1,100 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
|
| 4 |
-
get_completion = pipeline("summarization",model="sshleifer/distilbart-cnn-12-6")
|
| 5 |
get_ner = pipeline("ner", model="dslim/bert-base-NER")
|
| 6 |
-
get_zero = pipeline(
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def summarize_text(input):
|
| 9 |
output = get_completion(input)
|
| 10 |
-
return output[0][
|
|
|
|
| 11 |
|
| 12 |
def merge_tokens(tokens):
|
| 13 |
merged_tokens = []
|
| 14 |
for token in tokens:
|
| 15 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# If current token continues the entity of the last one, merge them
|
| 17 |
last_token = merged_tokens[-1]
|
| 18 |
-
last_token[
|
| 19 |
-
last_token[
|
| 20 |
-
last_token[
|
| 21 |
else:
|
| 22 |
# Otherwise, add the token to the list
|
| 23 |
merged_tokens.append(token)
|
| 24 |
return merged_tokens
|
| 25 |
|
|
|
|
| 26 |
def named_entity_recognition(input):
|
| 27 |
output = get_ner(input)
|
| 28 |
merged_output = merge_tokens(output)
|
| 29 |
return {"text": input, "entities": output}
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
|
|
|
| 33 |
return output
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
|
|
|
| 37 |
out = {}
|
| 38 |
-
for i,j in zip(zero_shot_out[
|
| 39 |
-
out.update({i:j})
|
| 40 |
print(out)
|
| 41 |
return out
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
if __name__ == "__main__":
|
| 84 |
demo.launch(enable_queue=True)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
|
| 4 |
+
get_completion = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 5 |
get_ner = pipeline("ner", model="dslim/bert-base-NER")
|
| 6 |
+
get_zero = pipeline(
|
| 7 |
+
"zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
|
| 11 |
def summarize_text(input):
|
| 12 |
output = get_completion(input)
|
| 13 |
+
return output[0]["summary_text"]
|
| 14 |
+
|
| 15 |
|
| 16 |
def merge_tokens(tokens):
|
| 17 |
merged_tokens = []
|
| 18 |
for token in tokens:
|
| 19 |
+
if (
|
| 20 |
+
merged_tokens
|
| 21 |
+
and token["entity"].startswith("I-")
|
| 22 |
+
and merged_tokens[-1]["entity"].endswith(token["entity"][2:])
|
| 23 |
+
):
|
| 24 |
# If current token continues the entity of the last one, merge them
|
| 25 |
last_token = merged_tokens[-1]
|
| 26 |
+
last_token["word"] += token["word"].replace("##", "")
|
| 27 |
+
last_token["end"] = token["end"]
|
| 28 |
+
last_token["score"] = (last_token["score"] + token["score"]) / 2
|
| 29 |
else:
|
| 30 |
# Otherwise, add the token to the list
|
| 31 |
merged_tokens.append(token)
|
| 32 |
return merged_tokens
|
| 33 |
|
| 34 |
+
|
| 35 |
def named_entity_recognition(input):
|
| 36 |
output = get_ner(input)
|
| 37 |
merged_output = merge_tokens(output)
|
| 38 |
return {"text": input, "entities": output}
|
| 39 |
|
| 40 |
+
|
| 41 |
+
def zero_shot_pred(text, check_labels):
|
| 42 |
+
output = get_zero(text, check_labels)
|
| 43 |
return output
|
| 44 |
|
| 45 |
+
|
| 46 |
+
def label_score_dict(text, check_labels):
|
| 47 |
+
zero_shot_out = zero_shot_pred(text, check_labels)
|
| 48 |
out = {}
|
| 49 |
+
for i, j in zip(zero_shot_out["labels"], zero_shot_out["scores"]):
|
| 50 |
+
out.update({i: j})
|
| 51 |
print(out)
|
| 52 |
return out
|
| 53 |
|
| 54 |
+
|
| 55 |
+
interface_summarise = gr.Interface(
|
| 56 |
+
fn=summarize_text,
|
| 57 |
+
inputs=[gr.Textbox(label="Text to summarise", lines=5)],
|
| 58 |
+
outputs=[gr.Textbox(label="Summary")],
|
| 59 |
+
title="Text Summarizer",
|
| 60 |
+
description="Summary of text via `distillBART-CNN` model!",
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
interface_ner = gr.Interface(
|
| 64 |
+
fn=named_entity_recognition,
|
| 65 |
+
inputs=[gr.Textbox(label="Text to find entities", lines=2)],
|
| 66 |
+
outputs=[gr.HighlightedText(label="Text with entities")],
|
| 67 |
+
title="NER with dslim/bert-base-NER",
|
| 68 |
+
description="Find entities using the `dslim/bert-base-NER` model under the hood!",
|
| 69 |
+
allow_flagging="never",
|
| 70 |
+
examples=[
|
| 71 |
+
"Tim Cook is the CEO of Apple, stays in California and makes iPhones ",
|
| 72 |
+
"My name is Bose and I am a physicist living in Delhi",
|
| 73 |
+
],
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
interface_zero_shot = gr.Interface(
|
| 77 |
+
fn=label_score_dict,
|
| 78 |
+
inputs=[
|
| 79 |
+
gr.Textbox(label="Text to classify", lines=2),
|
| 80 |
+
gr.Textbox(label="Check for labels"),
|
| 81 |
+
],
|
| 82 |
+
outputs=gr.Label(num_top_classes=4),
|
| 83 |
+
title="Zero-Shot Preds using DeBERTa-v3-base-mnli",
|
| 84 |
+
description="Classify sentence on self defined target vars",
|
| 85 |
+
examples=[
|
| 86 |
+
[
|
| 87 |
+
"Last week I upgraded my iOS version and ever since then my phone has been overheating whenever I use your app.",
|
| 88 |
+
"mobile, website, billing, account access",
|
| 89 |
+
],
|
| 90 |
+
# "My name is Bose and I am a physicist living in Delhi"
|
| 91 |
+
],
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
demo = gr.TabbedInterface(
|
| 95 |
+
[interface_summarise, interface_ner, interface_zero_shot],
|
| 96 |
+
["Text Summary ", "Named Entity Recognition", "Zero Shot Classifications"],
|
| 97 |
+
)
|
| 98 |
|
| 99 |
if __name__ == "__main__":
|
| 100 |
demo.launch(enable_queue=True)
|