harshithakr commited on
Commit
fcdc8ee
·
1 Parent(s): d9987f2

Create kbert_topics.py

Browse files
Files changed (1) hide show
  1. kbert_topics.py +28 -0
kbert_topics.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer, util
2
+ from keybert import KeyBERT
3
+ from preprocess_function import bert_preprocess
4
+ import spacy
5
+ nlp = spacy.load("en_core_web_trf")
6
+
7
+ model_sent = SentenceTransformer("all-mpnet-base-v2")
8
+
9
+ kw_model = KeyBERT(model = model_sent)
10
+
11
+ def extract_topics(event_value):
12
+ keywords = kw_model.extract_keywords(bert_preprocess(event_value), top_n = 10)
13
+
14
+ entity_remove = {'CARDINAL', 'DATE', 'GPE', 'LANGUAGE', 'LOC',
15
+ 'NORP', 'ORDINAL', 'PERCENT', 'PERSON', 'QUANTITY', 'TIME'}
16
+
17
+
18
+ keyword_bert = [token.lemma_ for key in keywords if key[1] >= 0.3 for token in nlp(key[0])]
19
+
20
+ work_list = []
21
+
22
+ for word_check in keyword_bert:
23
+ doc = nlp(word_check)
24
+ if not any(word.label_ in entity_remove for word in doc.ents):
25
+ work_list.extend(token.text for token in doc if token.pos_ != 'VERB')
26
+
27
+ return work_list
28
+