Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,15 +37,18 @@ def extract_nouns(text):
|
|
| 37 |
nouns = []
|
| 38 |
for sentence in sentences:
|
| 39 |
extracted = noun_extractor.extract(sentence)
|
| 40 |
-
|
|
|
|
| 41 |
|
| 42 |
# 2μμ μ΄μμ λͺ
μ¬λ§ μ ν
|
| 43 |
return [noun for noun in nouns if len(noun) > 1]
|
| 44 |
|
| 45 |
@st.cache_data
|
| 46 |
def preprocess_text(text, user_stopwords):
|
|
|
|
|
|
|
| 47 |
nouns = extract_nouns(text)
|
| 48 |
-
nouns = [noun for noun in nouns if noun not in user_stopwords]
|
| 49 |
return ' '.join(nouns)
|
| 50 |
|
| 51 |
def topic_modeling(texts, n_components):
|
|
@@ -112,26 +115,30 @@ if uploaded_file is not None:
|
|
| 112 |
end = start + chunk_size if i < total_chunks - 1 else len(text)
|
| 113 |
chunk = text[start:end]
|
| 114 |
preprocessed_chunk = preprocess_text(chunk, user_stopwords)
|
| 115 |
-
|
|
|
|
| 116 |
progress_bar.progress(min(1.0, (i + 1) / total_chunks))
|
| 117 |
|
| 118 |
preprocessed_text = " ".join(preprocessed_chunks)
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
st.
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
st.
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
except Exception as e:
|
| 137 |
st.error(f"μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}")
|
|
|
|
| 37 |
nouns = []
|
| 38 |
for sentence in sentences:
|
| 39 |
extracted = noun_extractor.extract(sentence)
|
| 40 |
+
if extracted: # Noneμ΄ μλ κ²½μ°μλ§ μ²λ¦¬
|
| 41 |
+
nouns.extend(extracted.keys())
|
| 42 |
|
| 43 |
# 2μμ μ΄μμ λͺ
μ¬λ§ μ ν
|
| 44 |
return [noun for noun in nouns if len(noun) > 1]
|
| 45 |
|
| 46 |
@st.cache_data
|
| 47 |
def preprocess_text(text, user_stopwords):
|
| 48 |
+
if not text: # λΉ λ¬Έμμ΄ μ²΄ν¬
|
| 49 |
+
return ""
|
| 50 |
nouns = extract_nouns(text)
|
| 51 |
+
nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
|
| 52 |
return ' '.join(nouns)
|
| 53 |
|
| 54 |
def topic_modeling(texts, n_components):
|
|
|
|
| 115 |
end = start + chunk_size if i < total_chunks - 1 else len(text)
|
| 116 |
chunk = text[start:end]
|
| 117 |
preprocessed_chunk = preprocess_text(chunk, user_stopwords)
|
| 118 |
+
if preprocessed_chunk: # λΉ λ¬Έμμ΄μ΄ μλ κ²½μ°μλ§ μΆκ°
|
| 119 |
+
preprocessed_chunks.append(preprocessed_chunk)
|
| 120 |
progress_bar.progress(min(1.0, (i + 1) / total_chunks))
|
| 121 |
|
| 122 |
preprocessed_text = " ".join(preprocessed_chunks)
|
| 123 |
|
| 124 |
+
if not preprocessed_text:
|
| 125 |
+
st.warning("μ²λ¦¬λ ν
μ€νΈκ° μμ΅λλ€. λ€λ₯Έ νμΌμ μ
λ‘λν΄ μ£ΌμΈμ.")
|
| 126 |
+
else:
|
| 127 |
+
st.subheader("ν ν½ λͺ¨λΈλ§ κ²°κ³Ό")
|
| 128 |
+
n_topics = st.slider("ν ν½ μ μ ν", min_value=2, max_value=10, value=5)
|
| 129 |
+
topics = topic_modeling(preprocessed_chunks, n_topics)
|
| 130 |
+
for topic, words in topics.items():
|
| 131 |
+
st.write(f"{topic}: {', '.join(words)}")
|
| 132 |
+
|
| 133 |
+
st.subheader("μμ 10κ° Trigram")
|
| 134 |
+
top_trigrams = get_top_trigrams(preprocessed_text)
|
| 135 |
+
for trigram, count in top_trigrams:
|
| 136 |
+
st.write(f"{' '.join(trigram)}: {count}")
|
| 137 |
+
|
| 138 |
+
st.subheader("λ¨μ΄ λΉλ μ°¨νΈ")
|
| 139 |
+
color = st.color_picker("λ§λ μμ μ ν", "#1f77b4")
|
| 140 |
+
fig = generate_word_frequency_chart(preprocessed_text, color)
|
| 141 |
+
st.pyplot(fig)
|
| 142 |
|
| 143 |
except Exception as e:
|
| 144 |
st.error(f"μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}")
|