Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,15 +32,14 @@ tokenizer = RegexTokenizer()
|
|
| 32 |
|
| 33 |
def extract_nouns(text):
|
| 34 |
try:
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
nouns.extend([word for word, score in extracted.items() if score > 0])
|
| 44 |
return [noun for noun in nouns if len(noun) > 1]
|
| 45 |
except Exception as e:
|
| 46 |
st.error(f"๋ช
์ฌ ์ถ์ถ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
|
@@ -116,28 +115,8 @@ if uploaded_file is not None:
|
|
| 116 |
text = str(uploaded_file.read(), 'utf-8')
|
| 117 |
st.text(f"ํ์ผ ๊ธธ์ด: {len(text)} ๋ฌธ์")
|
| 118 |
|
| 119 |
-
|
| 120 |
-
total_chunks = 100
|
| 121 |
-
chunk_size = max(1, len(text) // total_chunks)
|
| 122 |
-
preprocessed_chunks = []
|
| 123 |
|
| 124 |
-
for i in range(total_chunks):
|
| 125 |
-
start = i * chunk_size
|
| 126 |
-
end = start + chunk_size if i < total_chunks - 1 else len(text)
|
| 127 |
-
chunk = text[start:end]
|
| 128 |
-
st.text(f"์ฒญํฌ {i+1} ์ฒ๋ฆฌ ์ค: ๊ธธ์ด {len(chunk)} ๋ฌธ์")
|
| 129 |
-
preprocessed_chunk = preprocess_text(chunk, user_stopwords)
|
| 130 |
-
if preprocessed_chunk:
|
| 131 |
-
preprocessed_chunks.append(preprocessed_chunk)
|
| 132 |
-
else:
|
| 133 |
-
st.warning(f"์ฒญํฌ {i+1}์์ ์ ํจํ ํ
์คํธ๊ฐ ์ถ์ถ๋์ง ์์์ต๋๋ค.")
|
| 134 |
-
progress_bar.progress(min(1.0, (i + 1) / total_chunks))
|
| 135 |
-
|
| 136 |
-
if i % 10 == 0 or i == total_chunks - 1: # ๋งค 10๋ฒ์งธ ์ฒญํฌ์ ๋ง์ง๋ง ์ฒญํฌ์ ๋ํด ์ ๋ณด ์ถ๋ ฅ
|
| 137 |
-
st.text(f"์ฒ๋ฆฌ๋ ์ฒญํฌ: {i+1}/{total_chunks}, ํ์ฌ ์ฒญํฌ ๊ธธ์ด: {len(preprocessed_chunk)}")
|
| 138 |
-
|
| 139 |
-
st.text(f"์ฒ๋ฆฌ๋ ์ฒญํฌ ์: {len(preprocessed_chunks)}")
|
| 140 |
-
preprocessed_text = " ".join(preprocessed_chunks)
|
| 141 |
st.text(f"์ฒ๋ฆฌ๋ ํ
์คํธ ๊ธธ์ด: {len(preprocessed_text)} ๋ฌธ์")
|
| 142 |
|
| 143 |
if not preprocessed_text:
|
|
@@ -145,7 +124,7 @@ if uploaded_file is not None:
|
|
| 145 |
else:
|
| 146 |
st.subheader("ํ ํฝ ๋ชจ๋ธ๋ง ๊ฒฐ๊ณผ")
|
| 147 |
n_topics = st.slider("ํ ํฝ ์ ์ ํ", min_value=2, max_value=10, value=5)
|
| 148 |
-
topics = topic_modeling(
|
| 149 |
for topic, words in topics.items():
|
| 150 |
st.write(f"{topic}: {', '.join(words)}")
|
| 151 |
|
|
|
|
| 32 |
|
| 33 |
def extract_nouns(text):
|
| 34 |
try:
|
| 35 |
+
# ์ ์ฒด ํ
์คํธ์ ๋ํด ํ ๋ฒ๋ง extract ๋ฉ์๋ ํธ์ถ
|
| 36 |
+
extracted = noun_extractor.extract(text)
|
| 37 |
+
if extracted is None:
|
| 38 |
+
st.warning("๋ช
์ฌ ์ถ์ถ์ ์คํจํ์ต๋๋ค.")
|
| 39 |
+
return []
|
| 40 |
+
# score๊ฐ 0๋ณด๋ค ํฐ ๋จ์ด๋ง ์ ํ
|
| 41 |
+
nouns = [word for word, score in extracted.items() if score > 0]
|
| 42 |
+
# 2์์ ์ด์์ ๋ช
์ฌ๋ง ์ ํ
|
|
|
|
| 43 |
return [noun for noun in nouns if len(noun) > 1]
|
| 44 |
except Exception as e:
|
| 45 |
st.error(f"๋ช
์ฌ ์ถ์ถ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
|
|
|
| 115 |
text = str(uploaded_file.read(), 'utf-8')
|
| 116 |
st.text(f"ํ์ผ ๊ธธ์ด: {len(text)} ๋ฌธ์")
|
| 117 |
|
| 118 |
+
preprocessed_text = preprocess_text(text, user_stopwords)
|
|
|
|
|
|
|
|
|
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
st.text(f"์ฒ๋ฆฌ๋ ํ
์คํธ ๊ธธ์ด: {len(preprocessed_text)} ๋ฌธ์")
|
| 121 |
|
| 122 |
if not preprocessed_text:
|
|
|
|
| 124 |
else:
|
| 125 |
st.subheader("ํ ํฝ ๋ชจ๋ธ๋ง ๊ฒฐ๊ณผ")
|
| 126 |
n_topics = st.slider("ํ ํฝ ์ ์ ํ", min_value=2, max_value=10, value=5)
|
| 127 |
+
topics = topic_modeling([preprocessed_text], n_topics)
|
| 128 |
for topic, words in topics.items():
|
| 129 |
st.write(f"{topic}: {', '.join(words)}")
|
| 130 |
|