Spaces:
Build error
Build error
| import streamlit as st | |
| st.set_page_config( | |
| layout="centered", # Can be "centered" or "wide". In the future also "dashboard", etc. | |
| initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed" | |
| page_title='Extractive Summarization', # String or None. Strings get appended with "• Streamlit". | |
| page_icon='./favicon.png', # String, anything supported by st.image, or None. | |
| ) | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import sys | |
| sys.path.append(os.path.abspath('./')) | |
| import streamlit_apps_config as config | |
| from streamlit_ner_output import show_html2, jsl_display_annotations, get_color | |
| import sparknlp | |
| from sparknlp.base import * | |
| from sparknlp.annotator import * | |
| from pyspark.sql import functions as F | |
| from sparknlp_display import NerVisualizer | |
| from pyspark.ml import Pipeline | |
| from pyspark.sql.types import StringType | |
| spark= sparknlp.start() | |
| ## Marking down NER Style | |
| st.markdown(config.STYLE_CONFIG, unsafe_allow_html=True) | |
| root_path = config.project_path | |
| ########## To Remove the Main Menu Hamburger ######## | |
| hide_menu_style = """ | |
| <style> | |
| #MainMenu {visibility: hidden;} | |
| </style> | |
| """ | |
| st.markdown(hide_menu_style, unsafe_allow_html=True) | |
| ########## Side Bar ######## | |
| ## loading logo(newer version with href) | |
| import base64 | |
| def get_base64_of_bin_file(bin_file): | |
| with open(bin_file, 'rb') as f: | |
| data = f.read() | |
| return base64.b64encode(data).decode() | |
| def get_img_with_href(local_img_path, target_url): | |
| img_format = os.path.splitext(local_img_path)[-1].replace('.', '') | |
| bin_str = get_base64_of_bin_file(local_img_path) | |
| html_code = f''' | |
| <a href="{target_url}"> | |
| <img height="90%" width="90%" src="data:image/{img_format};base64,{bin_str}" /> | |
| </a>''' | |
| return html_code | |
| logo_html = get_img_with_href('./jsl-logo.png', 'https://www.johnsnowlabs.com/') | |
| st.sidebar.markdown(logo_html, unsafe_allow_html=True) | |
| #sidebar info | |
| model_name= ["nerdl_fewnerd_100d"] | |
| st.sidebar.title("Pretrained model to test") | |
| selected_model = st.sidebar.selectbox("", model_name) | |
| ######## Main Page ######### | |
| app_title= "Detect up to 8 entity types in general domain texts" | |
| app_description= "Named Entity Recognition model aimed to detect up to 8 entity types from general domain texts. This model was trained on the Few-NERD/inter public dataset using Spark NLP, and is available in Spark NLP Models hub (https://nlp.johnsnowlabs.com/models)" | |
| st.title(app_title) | |
| st.markdown("<h2>"+app_description+"</h2>" , unsafe_allow_html=True) | |
| if selected_model == "nerdl_fewnerd_100d": | |
| st.markdown("**`PERSON`** **,** **`ORGANIZATION`** **,** **`LOCATION`** **,** **`ART`** **,** **`BUILDING`** **,** **`PRODUCT`** **,** **`EVENT`** **,** **`OTHER`**", unsafe_allow_html=True) | |
| st.subheader("") | |
| #### Running model and creating pipeline | |
| st.cache(allow_output_mutation=True) | |
| def get_pipeline(text): | |
| documentAssembler = DocumentAssembler()\ | |
| .setInputCol("text")\ | |
| .setOutputCol("document") | |
| sentenceDetector= SentenceDetector()\ | |
| .setInputCols(["document"])\ | |
| .setOutputCol("sentence") | |
| tokenizer = Tokenizer()\ | |
| .setInputCols(["sentence"])\ | |
| .setOutputCol("token") | |
| embeddings= WordEmbeddingsModel.pretrained("glove_100d")\ | |
| .setInputCols(["sentence", "token"])\ | |
| .setOutputCol("embeddings") | |
| ner= NerDLModel.pretrained("nerdl_fewnerd_100d")\ | |
| .setInputCols(["document", "token", "embeddings"])\ | |
| .setOutputCol("ner") | |
| ner_converter= NerConverter()\ | |
| .setInputCols(["sentence", "token", "ner"])\ | |
| .setOutputCol("ner_chunk") | |
| pipeline = Pipeline( | |
| stages = [ | |
| documentAssembler, | |
| sentenceDetector, | |
| tokenizer, | |
| embeddings, | |
| ner, | |
| ner_converter | |
| ]) | |
| empty_df = spark.createDataFrame([[""]]).toDF("text") | |
| pipeline_model = pipeline.fit(empty_df) | |
| text_df= spark.createDataFrame(pd.DataFrame({"text": [text]})) | |
| result= pipeline_model.transform(text_df).toPandas() | |
| return result | |
| text= st.text_input("Type here your text and press enter to run:") | |
| result= get_pipeline(text) | |
| #Displaying Ner Visualization | |
| df= pd.DataFrame({"ner_chunk": result["ner_chunk"].iloc[0]}) | |
| labels_set = set() | |
| for i in df['ner_chunk'].values: | |
| labels_set.add(i[4]['entity']) | |
| labels_set = list(labels_set) | |
| labels = st.sidebar.multiselect( | |
| "NER Labels", options=labels_set, default=list(labels_set) | |
| ) | |
| show_html2(text, df, labels, "Text annotated with identified Named Entities") | |