hylee
commited on
Commit
·
8f23661
1
Parent(s):
67d1b71
remove stopwords for word cloud
Browse files- handler.py +4 -1
- requirements.txt +2 -1
handler.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import Dict, List, Any
|
|
| 2 |
from scipy.special import softmax
|
| 3 |
import numpy as np
|
| 4 |
import weakref
|
|
|
|
| 5 |
|
| 6 |
from utils import clean_str, clean_str_nopunct
|
| 7 |
import torch
|
|
@@ -135,10 +136,12 @@ class Transcript:
|
|
| 135 |
teacher_dict = {}
|
| 136 |
student_dict = {}
|
| 137 |
uptake_teacher_dict = {}
|
|
|
|
|
|
|
| 138 |
for utt in self.utterances:
|
| 139 |
-
|
| 140 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
| 141 |
for word in words:
|
|
|
|
| 142 |
if utt.role == 'teacher':
|
| 143 |
if word not in teacher_dict:
|
| 144 |
teacher_dict[word] = 0
|
|
|
|
| 2 |
from scipy.special import softmax
|
| 3 |
import numpy as np
|
| 4 |
import weakref
|
| 5 |
+
import nltk
|
| 6 |
|
| 7 |
from utils import clean_str, clean_str_nopunct
|
| 8 |
import torch
|
|
|
|
| 136 |
teacher_dict = {}
|
| 137 |
student_dict = {}
|
| 138 |
uptake_teacher_dict = {}
|
| 139 |
+
stopwords = nltk.corpus.stopwords.word('english')
|
| 140 |
+
print("stopwords: ", stopwords)
|
| 141 |
for utt in self.utterances:
|
|
|
|
| 142 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
| 143 |
for word in words:
|
| 144 |
+
if word in stopwords: continue
|
| 145 |
if utt.role == 'teacher':
|
| 146 |
if word not in teacher_dict:
|
| 147 |
teacher_dict[word] = 0
|
requirements.txt
CHANGED
|
@@ -3,4 +3,5 @@ num2words==0.5.10
|
|
| 3 |
numpy==1.22.4
|
| 4 |
scipy==1.7.3
|
| 5 |
torch==1.10.2
|
| 6 |
-
transformers==4.29.1
|
|
|
|
|
|
| 3 |
numpy==1.22.4
|
| 4 |
scipy==1.7.3
|
| 5 |
torch==1.10.2
|
| 6 |
+
transformers==4.29.1
|
| 7 |
+
nltk==3.8.1
|