Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import spacy | |
| from botocore.exceptions import ClientError | |
| from transformers import pipeline | |
| import boto3 | |
| nlp = spacy.load("en_core_web_sm") | |
| ner_pipeline = pipeline("ner", model="Jean-Baptiste/roberta-large-ner-english", aggregation_strategy="simple", grouped_entities=True) | |
| def greet(model_type, text): | |
| if model_type == "Spacy": | |
| doc = nlp(text) | |
| pos_tokens = [] | |
| for token in doc: | |
| if token.ent_type_ != "": | |
| pos_tokens.append((token.text, token.ent_type_)) | |
| else: | |
| pos_tokens.append((token.text, None)) | |
| return pos_tokens | |
| elif model_type == "Roberta": | |
| output = ner_pipeline(text) | |
| print(output) | |
| return {"text": text, "entities": [ | |
| {"word": entity["word"], "entity": entity["entity_group"], "start": entity['start'], | |
| 'end': entity['end']} | |
| for entity in output]} | |
| elif model_type == "AWS Comprehend": | |
| client = boto3.client('comprehend') | |
| try: | |
| response = client.detect_dominant_language(Text=text) | |
| languages = response['Languages'] | |
| print("Detected %s languages.", len(languages)) | |
| language = languages[0]['LanguageCode'] | |
| except ClientError: | |
| print("Couldn't detect languages.") | |
| language = 'en' | |
| response = client.detect_entities( | |
| Text=text, LanguageCode=language) | |
| print(response) | |
| return {"text": text, "entities": [{"word": entity["Text"], "entity": entity["Type"], "start": entity['BeginOffset'], 'end': entity['EndOffset']} | |
| for entity in response["Entities"]]} | |
| description = """Compare the NER outputs of Spacy, HuggingFace Roberta and AWS Comprehend. | |
| These models are off the shelf models, which have not been finetuned. This is just to show a baseline, | |
| before we start finetuning the models. All of them can be finetuned (including AWS Comprehend). | |
| AWS Comprehend can be finetuned using Entity lists, without having to annotate full documents by hand.""" | |
| demo = gr.Interface(fn=greet, inputs=[gr.Radio(["Spacy", "Roberta", "AWS Comprehend"]), "text"], | |
| outputs="highlight", title="Comparison of NER Options", | |
| description=description, | |
| examples=[["AWS Comprehend", """We hereby issue in your favour this documentary credit which is available by | |
| negotiation of your drafts at sight drawn on L/C Openers Bank at Chennai on account of | |
| M/s.TANGEDCO Limited bearing the number, date of the documentary credit and the | |
| name of the issuing bank of this credit for 100% invoice value accompanied by the | |
| following documents."""]],) | |
| demo.launch() | |