Spaces:
Runtime error
Runtime error
| ### Imports | |
| import stanza | |
| import pandas as pd | |
| import traceback | |
| class StanzaSegmentizer: | |
| ##========================================================================================================== | |
| """ | |
| Definition of attributes | |
| """ | |
| __nlp_stanza = None | |
| ##========================================================================================================== | |
| """ | |
| Function: __init__ | |
| """ | |
| def __init__(self): | |
| try: | |
| if self.__nlp_stanza == None: | |
| print("Initializing stanza") | |
| self.initialize_stanza() | |
| except Exception as excMsg: | |
| print(excMsg) | |
| ##========================================================================================================== | |
| """ | |
| Function: initialize_stanza | |
| """ | |
| def initialize_stanza(self): | |
| try: | |
| self.__nlp_stanza = stanza.Pipeline('en') | |
| except Exception as excmsg: | |
| print(f"An error happens in initialize_spacy(...) {traceback.format_exc()}.") | |
| self.__nlp_stanza = None | |
| return self.__nlp_stanza | |
| ##========================================================================================================== | |
| """ | |
| Function: segment_into_sentences | |
| """ | |
| def segment_into_sentences(self, src_text="", _format="str"): | |
| intermediate_result = None | |
| if isinstance(src_text, str): | |
| intermediate_result = [s for s in (self.__nlp_stanza(src_text)).sentences] | |
| elif isinstance(src_text, list): | |
| intermediate_result = list() | |
| for sent in src_text: | |
| intermediate_result.extend([s for s in (self.__nlp_stanza(sent)).sentences]) | |
| if _format == "str": | |
| sentences_new_doc = list() | |
| for intsent in intermediate_result: | |
| sentences_new_doc.append(intsent.text) | |
| return sentences_new_doc | |
| else: | |
| return intermediate_result | |
| ##========================================================================================================== | |
| ##========================================================================================================== |