Spaces:
Runtime error
Runtime error
File size: 1,223 Bytes
fcdc8ee 772fc72 ec5d4bf fcdc8ee 5b6ee8a 9217965 5b6ee8a 9dea803 5b6ee8a fcdc8ee 5b6ee8a fcdc8ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | from sentence_transformers import SentenceTransformer, util
from keybert import KeyBERT
from preprocess_function import bert_preprocess
import spacy
import spacy_transformers
import spacy.cli
spacy.cli.download("en_core_web_trf")
nlp = spacy.load("en_core_web_trf")
model_sent = SentenceTransformer("all-mpnet-base-v2")
kw_model = KeyBERT(model = model_sent)
common_words = []
with open("common_words_v2.txt", "r") as file:
for line in file:
common_words.append(line.strip())
def extract_topics(event_value):
keywords = kw_model.extract_keywords(bert_preprocess(event_value), top_n = 10)
entity_remove = {'CARDINAL', 'DATE', 'GPE', 'LANGUAGE', 'LOC',
'NORP', 'ORDINAL', 'PERCENT', 'PERSON', 'QUANTITY', 'TIME'}
keyword_bert = [token.lemma_ for key in keywords if key[1] >= 0.3 for token in nlp(key[0])]
work_list = []
for word_check in keyword_bert:
doc = nlp(word_check)
if not any(word.label_ in entity_remove for word in doc.ents):
work_list.extend(token.text for token in doc if token.pos_ != 'VERB')
work_list = list(set(work_list) - set(common_words))
return work_list
|