soojeongcrystal commited on
Commit
b5a82df
Β·
verified Β·
1 Parent(s): 4096d83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -18
app.py CHANGED
@@ -37,15 +37,18 @@ def extract_nouns(text):
37
  nouns = []
38
  for sentence in sentences:
39
  extracted = noun_extractor.extract(sentence)
40
- nouns.extend(extracted.keys())
 
41
 
42
  # 2음절 μ΄μƒμ˜ λͺ…μ‚¬λ§Œ 선택
43
  return [noun for noun in nouns if len(noun) > 1]
44
 
45
  @st.cache_data
46
  def preprocess_text(text, user_stopwords):
 
 
47
  nouns = extract_nouns(text)
48
- nouns = [noun for noun in nouns if noun not in user_stopwords]
49
  return ' '.join(nouns)
50
 
51
  def topic_modeling(texts, n_components):
@@ -112,26 +115,30 @@ if uploaded_file is not None:
112
  end = start + chunk_size if i < total_chunks - 1 else len(text)
113
  chunk = text[start:end]
114
  preprocessed_chunk = preprocess_text(chunk, user_stopwords)
115
- preprocessed_chunks.append(preprocessed_chunk)
 
116
  progress_bar.progress(min(1.0, (i + 1) / total_chunks))
117
 
118
  preprocessed_text = " ".join(preprocessed_chunks)
119
 
120
- st.subheader("ν† ν”½ λͺ¨λΈλ§ κ²°κ³Ό")
121
- n_topics = st.slider("ν† ν”½ 수 선택", min_value=2, max_value=10, value=5)
122
- topics = topic_modeling(preprocessed_chunks, n_topics)
123
- for topic, words in topics.items():
124
- st.write(f"{topic}: {', '.join(words)}")
125
-
126
- st.subheader("μƒμœ„ 10개 Trigram")
127
- top_trigrams = get_top_trigrams(preprocessed_text)
128
- for trigram, count in top_trigrams:
129
- st.write(f"{' '.join(trigram)}: {count}")
130
-
131
- st.subheader("단어 λΉˆλ„ 차트")
132
- color = st.color_picker("λ§‰λŒ€ 색상 선택", "#1f77b4")
133
- fig = generate_word_frequency_chart(preprocessed_text, color)
134
- st.pyplot(fig)
 
 
 
135
 
136
  except Exception as e:
137
  st.error(f"였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}")
 
37
  nouns = []
38
  for sentence in sentences:
39
  extracted = noun_extractor.extract(sentence)
40
+ if extracted: # None이 μ•„λ‹Œ κ²½μš°μ—λ§Œ 처리
41
+ nouns.extend(extracted.keys())
42
 
43
  # 2음절 μ΄μƒμ˜ λͺ…μ‚¬λ§Œ 선택
44
  return [noun for noun in nouns if len(noun) > 1]
45
 
46
  @st.cache_data
47
  def preprocess_text(text, user_stopwords):
48
+ if not text: # 빈 λ¬Έμžμ—΄ 체크
49
+ return ""
50
  nouns = extract_nouns(text)
51
+ nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
52
  return ' '.join(nouns)
53
 
54
  def topic_modeling(texts, n_components):
 
115
  end = start + chunk_size if i < total_chunks - 1 else len(text)
116
  chunk = text[start:end]
117
  preprocessed_chunk = preprocess_text(chunk, user_stopwords)
118
+ if preprocessed_chunk: # 빈 λ¬Έμžμ—΄μ΄ μ•„λ‹Œ κ²½μš°μ—λ§Œ μΆ”κ°€
119
+ preprocessed_chunks.append(preprocessed_chunk)
120
  progress_bar.progress(min(1.0, (i + 1) / total_chunks))
121
 
122
  preprocessed_text = " ".join(preprocessed_chunks)
123
 
124
+ if not preprocessed_text:
125
+ st.warning("처리된 ν…μŠ€νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€. λ‹€λ₯Έ νŒŒμΌμ„ μ—…λ‘œλ“œν•΄ μ£Όμ„Έμš”.")
126
+ else:
127
+ st.subheader("ν† ν”½ λͺ¨λΈλ§ κ²°κ³Ό")
128
+ n_topics = st.slider("ν† ν”½ 수 선택", min_value=2, max_value=10, value=5)
129
+ topics = topic_modeling(preprocessed_chunks, n_topics)
130
+ for topic, words in topics.items():
131
+ st.write(f"{topic}: {', '.join(words)}")
132
+
133
+ st.subheader("μƒμœ„ 10개 Trigram")
134
+ top_trigrams = get_top_trigrams(preprocessed_text)
135
+ for trigram, count in top_trigrams:
136
+ st.write(f"{' '.join(trigram)}: {count}")
137
+
138
+ st.subheader("단어 λΉˆλ„ 차트")
139
+ color = st.color_picker("λ§‰λŒ€ 색상 선택", "#1f77b4")
140
+ fig = generate_word_frequency_chart(preprocessed_text, color)
141
+ st.pyplot(fig)
142
 
143
  except Exception as e:
144
  st.error(f"였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}")