Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ import re
|
|
| 6 |
import warnings
|
| 7 |
from collections import Counter
|
| 8 |
import traceback
|
|
|
|
| 9 |
|
| 10 |
warnings.filterwarnings("ignore")
|
| 11 |
|
|
@@ -21,11 +22,20 @@ default_stopwords = set([
|
|
| 21 |
"๋ํ", "๊ทธ๋์", "๊ทธ๋ฆฌ๊ณ ", "ํ์ง๋ง", "๊ทธ๋ฌ๋", "๋ฐ๋ผ์", "๋๋ฌธ์"
|
| 22 |
])
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def extract_nouns(text):
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
@st.cache_data
|
| 31 |
def preprocess_text(text, user_stopwords):
|
|
@@ -44,7 +54,7 @@ def preprocess_text(text, user_stopwords):
|
|
| 44 |
def topic_modeling(texts, n_components):
|
| 45 |
stop_words_list = list(default_stopwords)
|
| 46 |
|
| 47 |
-
vectorizer = CountVectorizer(stop_words=stop_words_list, max_features=1000
|
| 48 |
data_vectorized = vectorizer.fit_transform(texts)
|
| 49 |
|
| 50 |
n_tokens = data_vectorized.shape[1]
|
|
|
|
| 6 |
import warnings
|
| 7 |
from collections import Counter
|
| 8 |
import traceback
|
| 9 |
+
import MeCab
|
| 10 |
|
| 11 |
warnings.filterwarnings("ignore")
|
| 12 |
|
|
|
|
| 22 |
"๋ํ", "๊ทธ๋์", "๊ทธ๋ฆฌ๊ณ ", "ํ์ง๋ง", "๊ทธ๋ฌ๋", "๋ฐ๋ผ์", "๋๋ฌธ์"
|
| 23 |
])
|
| 24 |
|
| 25 |
+
@st.cache_resource
|
| 26 |
+
def load_mecab():
|
| 27 |
+
return MeCab.Tagger()
|
| 28 |
+
|
| 29 |
+
mecab = load_mecab()
|
| 30 |
+
|
| 31 |
def extract_nouns(text):
|
| 32 |
+
nouns = []
|
| 33 |
+
nodes = mecab.parseToNode(text)
|
| 34 |
+
while nodes:
|
| 35 |
+
if nodes.feature.split(',')[0] == '๋ช
์ฌ':
|
| 36 |
+
nouns.append(nodes.surface)
|
| 37 |
+
nodes = nodes.next
|
| 38 |
+
return [noun for noun in nouns if len(noun) > 1 and noun not in default_stopwords]
|
| 39 |
|
| 40 |
@st.cache_data
|
| 41 |
def preprocess_text(text, user_stopwords):
|
|
|
|
| 54 |
def topic_modeling(texts, n_components):
|
| 55 |
stop_words_list = list(default_stopwords)
|
| 56 |
|
| 57 |
+
vectorizer = CountVectorizer(stop_words=stop_words_list, max_features=1000)
|
| 58 |
data_vectorized = vectorizer.fit_transform(texts)
|
| 59 |
|
| 60 |
n_tokens = data_vectorized.shape[1]
|