harshithakr commited on
Commit
5b6ee8a
·
1 Parent(s): 988cc65

Update kbert_topics.py

Browse files
Files changed (1) hide show
  1. kbert_topics.py +9 -1
kbert_topics.py CHANGED
@@ -11,6 +11,12 @@ model_sent = SentenceTransformer("all-mpnet-base-v2")
11
 
12
  kw_model = KeyBERT(model = model_sent)
13
 
 
 
 
 
 
 
14
  def extract_topics(event_value):
15
  keywords = kw_model.extract_keywords(bert_preprocess(event_value), top_n = 10)
16
 
@@ -26,6 +32,8 @@ def extract_topics(event_value):
26
  doc = nlp(word_check)
27
  if not any(word.label_ in entity_remove for word in doc.ents):
28
  work_list.extend(token.text for token in doc if token.pos_ != 'VERB')
29
-
 
 
30
  return work_list
31
 
 
11
 
12
  kw_model = KeyBERT(model = model_sent)
13
 
14
+
15
+ common_words = []
16
+ with open("common_words.txt", "r") as file:
17
+ for line in file:
18
+ read_list.append(line.strip())
19
+
20
  def extract_topics(event_value):
21
  keywords = kw_model.extract_keywords(bert_preprocess(event_value), top_n = 10)
22
 
 
32
  doc = nlp(word_check)
33
  if not any(word.label_ in entity_remove for word in doc.ents):
34
  work_list.extend(token.text for token in doc if token.pos_ != 'VERB')
35
+
36
+ work_list = list(set(work_list) - set(common_words))
37
+
38
  return work_list
39