stanza-digphil / stanza /utils /visualization /ssurgeon_visualizer.py
Albin Thörn Cleland
Clean initial commit with LFS
19b8775
"""
Visualization tooling for Ssurgeon
"""
import os
import sys
import stanza.utils.visualization.semgrex_visualizer as sv
import stanza.server.ssurgeon
from stanza.server.ssurgeon import process_doc_one_operation, convert_response_to_doc
from stanza.utils.conll import CoNLL
from stanza.utils.visualization.constants import *
import logging
def generate_edited_deprel_unadjusted(edited_doc, lang_code, visualize_xpos):
"""
Submit edited doc from ssurgeon to generate HTML for sentences output
:param edited_doc:
:param lang_code:
:param visualize_xpos:
:return:
"""
return sv.get_sentences_html(doc=edited_doc, language=lang_code, visualize_xpos=visualize_xpos)
def visualize_ssurgeon_deprel_adjusted_str_input(input_str, semgrex_query, ssurgeon_query, lang_code="en", visualize_xpos=False, render=False):
"""
Visualizes the edited side of the ssurgeon edit
:param unedited_doc:
:param semgrex_query:
:param ssurgeon_query:
:return:
"""
doc = CoNLL.conll2doc(input_str=input_str)
ssurgeon_response = process_doc_one_operation(doc, semgrex_query, ssurgeon_query)
updated_doc = convert_response_to_doc(doc, ssurgeon_response)
html_strings = generate_edited_deprel_unadjusted(updated_doc, lang_code, visualize_xpos=visualize_xpos)
edited_html_strings = []
for i in range(len(html_strings)):
edited_html = sv.adjust_dep_arrows(html_strings[i])
edited_html_strings.append(edited_html)
if render:
sv.render_html_strings(edited_html_strings)
return edited_html_strings
def main():
# Load classpath if not already existing
if not os.environ.get('CLASSPATH'):
logging.info("Load the path to wherever CoreNLP is installed on your machine to $CLASSPATH.")
# The default semgrex detects sentences in the UD_English-Pronouns dataset which have both nsubj and csubj on the same word.
# The default ssurgeon transforms the unwanted csubj to advcl
# See https://github.com/UniversalDependencies/docs/issues/923
ssurgeon = ["relabelNamedEdge -edge bad -reln advcl"] # example
semgrex = "{}=source >nsubj {} >csubj=bad {}" # example
SSURGEON_JAVA = "edu.stanford.nlp.semgraph.semgrex.ssurgeon.ProcessSsurgeonRequest"
doc = CoNLL.conll2doc(input_str=SAMPLE_SSURGEON_DOC)
print("{:C}".format(doc))
ssurgeon_response = process_doc_one_operation(doc, semgrex, ssurgeon)
updated_doc = convert_response_to_doc(doc, ssurgeon_response)
print("{:C}".format(updated_doc))
print(generate_edited_deprel_unadjusted(updated_doc, lang_code='en', visualize_xpos=False))
visualize_ssurgeon_deprel_adjusted_str_input(SAMPLE_SSURGEON_DOC, semgrex, ssurgeon)
if __name__ == '__main__':
main()