Spaces:
Runtime error
Runtime error
| from sentence_transformers import SentenceTransformer, util | |
| from keybert import KeyBERT | |
| from preprocess_function import bert_preprocess | |
| import spacy | |
| import spacy_transformers | |
| import spacy.cli | |
| spacy.cli.download("en_core_web_trf") | |
| nlp = spacy.load("en_core_web_trf") | |
| model_sent = SentenceTransformer("all-mpnet-base-v2") | |
| kw_model = KeyBERT(model = model_sent) | |
| common_words = [] | |
| with open("common_words_v2.txt", "r") as file: | |
| for line in file: | |
| common_words.append(line.strip()) | |
| def extract_topics(event_value): | |
| keywords = kw_model.extract_keywords(bert_preprocess(event_value), top_n = 10) | |
| entity_remove = {'CARDINAL', 'DATE', 'GPE', 'LANGUAGE', 'LOC', | |
| 'NORP', 'ORDINAL', 'PERCENT', 'PERSON', 'QUANTITY', 'TIME'} | |
| keyword_bert = [token.lemma_ for key in keywords if key[1] >= 0.3 for token in nlp(key[0])] | |
| work_list = [] | |
| for word_check in keyword_bert: | |
| doc = nlp(word_check) | |
| if not any(word.label_ in entity_remove for word in doc.ents): | |
| work_list.extend(token.text for token in doc if token.pos_ != 'VERB') | |
| work_list = list(set(work_list) - set(common_words)) | |
| return work_list | |