soojeongcrystal commited on
Commit
7235700
·
verified ·
1 Parent(s): bec772f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -50,19 +50,19 @@ def preprocess_text(text, user_stopwords):
50
  return ""
51
 
52
  def topic_modeling(texts, n_components):
53
- doc_count = len(texts)
54
- min_df = max(2, int(doc_count * 0.01))
55
- max_df = min(0.95, int(doc_count * 0.95))
56
-
57
  # default_stopwords를 리스트로 변환
58
  stop_words_list = list(default_stopwords)
59
 
60
- vectorizer = CountVectorizer(max_df=max_df, min_df=min_df, stop_words=stop_words_list)
61
  data_vectorized = vectorizer.fit_transform(texts)
62
 
63
  n_tokens = data_vectorized.shape[1]
64
  n_components = min(n_components, n_tokens)
65
 
 
 
 
 
66
  lda = LatentDirichletAllocation(n_components=n_components, random_state=42, max_iter=20)
67
  lda.fit(data_vectorized)
68
 
@@ -117,8 +117,11 @@ if uploaded_file is not None:
117
  st.subheader("토픽 모델링 결과")
118
  n_topics = st.slider("토픽 수 선택", min_value=2, max_value=10, value=5)
119
  topics = topic_modeling([preprocessed_text], n_topics)
120
- for topic, words in topics.items():
121
- st.write(f"{topic}: {', '.join(words)}")
 
 
 
122
 
123
  st.subheader("상위 10개 Trigram")
124
  top_trigrams = get_top_trigrams(preprocessed_text)
 
50
  return ""
51
 
52
  def topic_modeling(texts, n_components):
 
 
 
 
53
  # default_stopwords를 리스트로 변환
54
  stop_words_list = list(default_stopwords)
55
 
56
+ vectorizer = CountVectorizer(stop_words=stop_words_list)
57
  data_vectorized = vectorizer.fit_transform(texts)
58
 
59
  n_tokens = data_vectorized.shape[1]
60
  n_components = min(n_components, n_tokens)
61
 
62
+ if n_components < 2:
63
+ st.warning("추출된 고유 단어가 너무 적습니다. 더 긴 텍스트를 사용해 주세요.")
64
+ return {}
65
+
66
  lda = LatentDirichletAllocation(n_components=n_components, random_state=42, max_iter=20)
67
  lda.fit(data_vectorized)
68
 
 
117
  st.subheader("토픽 모델링 결과")
118
  n_topics = st.slider("토픽 수 선택", min_value=2, max_value=10, value=5)
119
  topics = topic_modeling([preprocessed_text], n_topics)
120
+ if topics:
121
+ for topic, words in topics.items():
122
+ st.write(f"{topic}: {', '.join(words)}")
123
+ else:
124
+ st.warning("토픽 모델링을 수행할 수 없습니다. 더 긴 텍스트를 사용해 주세요.")
125
 
126
  st.subheader("상위 10개 Trigram")
127
  top_trigrams = get_top_trigrams(preprocessed_text)