|
|
|
|
|
import os |
|
|
|
|
|
import nltk |
|
|
nltk.download('punkt_tab') |
|
|
nltk.download('averaged_perceptron_tagger_eng') |
|
|
|
|
|
import streamlit as st |
|
|
from PIL import Image |
|
|
from accord_nlp.information_extraction.convertor import entity_pairing, graph_building |
|
|
from accord_nlp.information_extraction.ie_pipeline import InformationExtractor |
|
|
|
|
|
from trubrics.integrations.streamlit import FeedbackCollector |
|
|
|
|
|
ner_args = { |
|
|
"labels_list": ["O", "B-quality", "B-property", "I-property", "I-quality", "B-object", "I-object", "B-value", "I-value"], |
|
|
"use_multiprocessing": False, |
|
|
"process_count": 1 |
|
|
} |
|
|
|
|
|
re_args = { |
|
|
"labels_list": ["selection", "necessity", "none", "greater", "part-of", "equal", "greater-equal", "less-equal", "not-part-of", "less"], |
|
|
"special_tags": ["<e1>", "<e2>"], |
|
|
"use_multiprocessing": False, |
|
|
"process_count": 1 |
|
|
} |
|
|
|
|
|
@st.cache_resource |
|
|
def init(): |
|
|
return InformationExtractor( |
|
|
ner_model_info=('roberta', 'ACCORD-NLP/ner-roberta-large', ner_args), |
|
|
re_model_info=('roberta', 'ACCORD-NLP/re-roberta-large', re_args)) |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title='ACCORD NLP Demo', |
|
|
initial_sidebar_state='expanded', |
|
|
layout='wide', |
|
|
) |
|
|
|
|
|
with st.spinner(text="Initialising..."): |
|
|
ie = init() |
|
|
|
|
|
|
|
|
collector = FeedbackCollector( |
|
|
|
|
|
email=st.secrets["TRUBRICS_EMAIL"], |
|
|
password=st.secrets["TRUBRICS_PASSWORD"], |
|
|
project="accord-nlp-ie" |
|
|
) |
|
|
|
|
|
|
|
|
def main(): |
|
|
image = Image.open(os.path.join(os.path.dirname(__file__), 'accord_logo.png')) |
|
|
st.sidebar.image(image) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.header("Information Extractor") |
|
|
st.sidebar.markdown("Extract entities and their relations from textual data") |
|
|
st.sidebar.markdown( |
|
|
"[codebase](https://github.com/Accord-Project/NLP-Framework)" |
|
|
) |
|
|
st.sidebar.markdown( |
|
|
"[models](https://huggingface.co/ACCORD-NLP)" |
|
|
) |
|
|
|
|
|
|
|
|
if 'text' not in st.session_state: |
|
|
st.session_state['text'] = '' |
|
|
if 'graph' not in st.session_state: |
|
|
st.session_state['graph'] = None |
|
|
|
|
|
st.header("Input a sentence") |
|
|
|
|
|
txt = st.text_area('Sentence') |
|
|
|
|
|
if txt: |
|
|
if txt == st.session_state['text']: |
|
|
st.header('Entity-Relation Representation') |
|
|
st.graphviz_chart(st.session_state['graph'], use_container_width=True) |
|
|
|
|
|
st.session_state['text'] = txt |
|
|
|
|
|
else: |
|
|
st.session_state['text'] = txt |
|
|
st.session_state['graph'] = None |
|
|
|
|
|
|
|
|
sentence = ie.preprocess(txt) |
|
|
|
|
|
|
|
|
with st.spinner(text="Recognising entities..."): |
|
|
ner_predictions, ner_raw_outputs = ie.ner_model.predict([sentence]) |
|
|
|
|
|
with st.spinner(text="Extracting relations..."): |
|
|
|
|
|
entity_pair_df = entity_pairing(sentence, ner_predictions[0]) |
|
|
|
|
|
|
|
|
re_predictions, re_raw_outputs = ie.re_model.predict(entity_pair_df['output'].tolist()) |
|
|
entity_pair_df['prediction'] = re_predictions |
|
|
|
|
|
with st.spinner(text="Building graph..."): |
|
|
|
|
|
graph = graph_building(entity_pair_df, view=False) |
|
|
|
|
|
st.header('Entity-Relation Representation') |
|
|
|
|
|
st.graphviz_chart(graph, use_container_width=True) |
|
|
|
|
|
st.session_state['graph'] = graph |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
main() |