T Le commited on
Commit
6ce92d9
·
1 Parent(s): 84ed651

Update WordCloud

Browse files
Files changed (1) hide show
  1. pages/10 WordCloud.py +18 -5
pages/10 WordCloud.py CHANGED
@@ -5,7 +5,7 @@ from wordcloud import WordCloud
5
  from tools import sourceformat as sf
6
  import nltk
7
  from nltk.corpus import stopwords
8
- from gensim.parsing.preprocessing import remove_stopwords
9
  nltk.download('stopwords')
10
 
11
 
@@ -177,14 +177,23 @@ if uploaded_file is not None:
177
  texts = conv_txt(uploaded_file)
178
  colcho = c1.selectbox("Choose Column", list(texts))
179
  fulltext = " ".join(list(texts[colcho]))
180
- fulltext = remove_stopwords(fulltext)
 
 
 
 
181
 
182
 
183
  except:
184
  fulltext = read_txt(uploaded_file)
185
-
186
- if st.button("Submit"):
187
 
 
 
 
 
 
 
 
188
 
189
  wordcloud = WordCloud(max_font_size = max_font,
190
  max_words = max_words,
@@ -205,7 +214,11 @@ if uploaded_file is not None:
205
 
206
  fullcolumn = " ".join(list(texts[colcho]))
207
 
208
- fullcolumn = remove_stopwords(fullcolumn)
 
 
 
 
209
 
210
  if st.button("Submit"):
211
 
 
5
  from tools import sourceformat as sf
6
  import nltk
7
  from nltk.corpus import stopwords
8
+ from nltk.tokenize import word_tokenize
9
  nltk.download('stopwords')
10
 
11
 
 
177
  texts = conv_txt(uploaded_file)
178
  colcho = c1.selectbox("Choose Column", list(texts))
179
  fulltext = " ".join(list(texts[colcho]))
180
+ tokenized = word_tokenize(fulltext)
181
+
182
+ filtered = [word for word in tokenized if word.lower() not in stopwords.words('english')]
183
+
184
+ fulltext = ' '.join(filtered)
185
 
186
 
187
  except:
188
  fulltext = read_txt(uploaded_file)
 
 
189
 
190
+ tokenized = word_tokenize(fulltext)
191
+
192
+ filtered = [word for word in tokenized if word.lower() not in stopwords.words('english')]
193
+
194
+ fulltext = ' '.join(filtered)
195
+
196
+ if st.button("Submit"):
197
 
198
  wordcloud = WordCloud(max_font_size = max_font,
199
  max_words = max_words,
 
214
 
215
  fullcolumn = " ".join(list(texts[colcho]))
216
 
217
+ tokenized = word_tokenize(fullcolumn)
218
+
219
+ filtered = [word for word in tokenized if word.lower() not in stopwords.words('english')]
220
+
221
+ fullcolumn = ' '.join(filtered)
222
 
223
  if st.button("Submit"):
224