Spaces:
Runtime error
Runtime error
| # Gradio | |
| import gradio as gr | |
| # Hugging Face libraries | |
| from transformers import pipeline | |
| from transformers import AutoTokenizer | |
| # Model checkpoint | |
| model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english" | |
| # Instantiate the pipeline | |
| ner_task = pipeline(model=model_checkpoint, task="ner", | |
| aggregation_strategy="simple") | |
| # Instantiate the tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) | |
| # Sample sentences | |
| sentence1 = "Herbert Akroyd Stuart patented the first diesel engine, 1890" | |
| sentence2 = "May 10 A delegation tells Leopold III his return would be \ | |
| illtimed, 1945" | |
| sentence3 = "Fri May 10 Fred Astaire (Frederick Austerlitz) born in Omaha, Nebraska, 1899" | |
| sentence4 = "Fri May 10 Germany invades Low Countries, 1940" | |
| sentence5 = "Fri May 10 Nazi bookburning, 1933" | |
| sentence6 = "Fri May 10 Confederate Memorial Day in South Carolina" | |
| sentence7 = "Fri May 10 Mothers Day in Guatemala" | |
| sentence8 = "Fri May 10 Dave Mason is born in Worcester, England, 1945" | |
| # Gradio interface | |
| def predict(sentence): | |
| """ | |
| Use the corresponding tokenizer to tokenize the sentence. | |
| Use the model to predict the entities. | |
| """ | |
| # Get the tokens from the tokenizer | |
| processed_tokens = tokenizer(sentence) | |
| token_pieces = processed_tokens.tokens() | |
| # Get the prediction of ner from the model | |
| result_ner = ner_task(sentence) | |
| formatted_ner = "" | |
| entities_count = 0 | |
| # Print individual entities. | |
| # Start the count from 1 for intuitive reading. | |
| for i, result in enumerate(result_ner): | |
| # Only get the result where score is at least 0.8 | |
| if result['score'] < 0.8: | |
| continue; | |
| else: | |
| entities_count += 1 | |
| formatted_ner += f"Number: {entities_count} \n" \ | |
| + f"Entity: {result['entity_group']}\n" \ | |
| + f"Word group: {result['word']}\n" \ | |
| + f"Score: {result['score']}\n" | |
| formatted_ner += f"{result}\n\n" | |
| formatted_ner += f"Number of predicted entities: {entities_count}\n\n" | |
| return token_pieces, formatted_ner | |
| # Main Gradio interface | |
| demo = gr.Interface( | |
| fn = predict, | |
| inputs = [gr.TextArea(label="Place your sentence here", lines=10, | |
| show_copy_button=True)], | |
| outputs = | |
| [ | |
| gr.TextArea(label="Tokens input to the model", interactive=False, | |
| lines=10, show_copy_button=True), | |
| gr.TextArea(label="Prediction of entities", interactive=False, | |
| lines=10, show_copy_button=True) | |
| ], | |
| examples=[[sentence1], [sentence2], [sentence3], [sentence4], | |
| [sentence5], [sentence6], [sentence7], [sentence8]], | |
| title = "NER (Named Entities Recognition)", | |
| description = f""" | |
| ## Using model {model_checkpoint} to predict entities type | |
| <p style="font-size: 1.2rem;">Notes: </p> | |
| <ul style="font-size: 1.2rem; list-style-type:square"> | |
| <li> The examples are from the calendar utility in Linux. | |
| <li> The model cannot recognize date and time. | |
| <li> It can recongize PER (person), LOC (location), ORG (organization) and MIS (miscellaneous) | |
| entities. | |
| </ul> | |
| """ | |
| ) | |
| demo.launch() |