Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@ import traceback
|
|
| 11 |
|
| 12 |
warnings.filterwarnings("ignore")
|
| 13 |
|
| 14 |
-
# ํ๊ธ ํฐํธ ์ค์
|
| 15 |
plt.rcParams['font.family'] = 'NanumGothic'
|
| 16 |
|
| 17 |
# ์ด๊ธฐ ๋ถ์ฉ์ด ๋ชฉ๋ก
|
|
@@ -37,23 +37,28 @@ def extract_nouns(text):
|
|
| 37 |
for sentence in sentences:
|
| 38 |
if sentence.strip(): # ๋น ๋ฌธ์ฅ ๊ฑด๋๋ฐ๊ธฐ
|
| 39 |
extracted = noun_extractor.extract(sentence)
|
| 40 |
-
if extracted:
|
| 41 |
-
|
|
|
|
|
|
|
| 42 |
return [noun for noun in nouns if len(noun) > 1]
|
| 43 |
except Exception as e:
|
| 44 |
st.error(f"๋ช
์ฌ ์ถ์ถ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
|
|
|
| 45 |
return []
|
| 46 |
|
| 47 |
@st.cache_data
|
| 48 |
def preprocess_text(text, user_stopwords):
|
| 49 |
try:
|
| 50 |
if not text or not isinstance(text, str):
|
|
|
|
| 51 |
return ""
|
| 52 |
nouns = extract_nouns(text)
|
| 53 |
nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
|
| 54 |
return ' '.join(nouns)
|
| 55 |
except Exception as e:
|
| 56 |
st.error(f"ํ
์คํธ ์ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
|
|
|
| 57 |
return ""
|
| 58 |
|
| 59 |
def topic_modeling(texts, n_components):
|
|
@@ -120,12 +125,15 @@ if uploaded_file is not None:
|
|
| 120 |
start = i * chunk_size
|
| 121 |
end = start + chunk_size if i < total_chunks - 1 else len(text)
|
| 122 |
chunk = text[start:end]
|
|
|
|
| 123 |
preprocessed_chunk = preprocess_text(chunk, user_stopwords)
|
| 124 |
if preprocessed_chunk:
|
| 125 |
preprocessed_chunks.append(preprocessed_chunk)
|
|
|
|
|
|
|
| 126 |
progress_bar.progress(min(1.0, (i + 1) / total_chunks))
|
| 127 |
|
| 128 |
-
if i % 10 == 0: # ๋งค 10๋ฒ์งธ ์ฒญํฌ๋ง
|
| 129 |
st.text(f"์ฒ๋ฆฌ๋ ์ฒญํฌ: {i+1}/{total_chunks}, ํ์ฌ ์ฒญํฌ ๊ธธ์ด: {len(preprocessed_chunk)}")
|
| 130 |
|
| 131 |
st.text(f"์ฒ๋ฆฌ๋ ์ฒญํฌ ์: {len(preprocessed_chunks)}")
|
|
|
|
| 11 |
|
| 12 |
warnings.filterwarnings("ignore")
|
| 13 |
|
| 14 |
+
# ํ๊ธ ํฐํธ ์ค์ (ํ๊น
ํ์ด์ค ํ๊ฒฝ์ ๋ง๊ฒ ์์ ํ์ํ ์ ์์)
|
| 15 |
plt.rcParams['font.family'] = 'NanumGothic'
|
| 16 |
|
| 17 |
# ์ด๊ธฐ ๋ถ์ฉ์ด ๋ชฉ๋ก
|
|
|
|
| 37 |
for sentence in sentences:
|
| 38 |
if sentence.strip(): # ๋น ๋ฌธ์ฅ ๊ฑด๋๋ฐ๊ธฐ
|
| 39 |
extracted = noun_extractor.extract(sentence)
|
| 40 |
+
if extracted is None:
|
| 41 |
+
st.warning(f"๋ค์ ๋ฌธ์ฅ์์ ๋ช
์ฌ ์ถ์ถ ์คํจ: {sentence[:50]}...")
|
| 42 |
+
continue
|
| 43 |
+
nouns.extend([word for word, score in extracted.items() if score > 0])
|
| 44 |
return [noun for noun in nouns if len(noun) > 1]
|
| 45 |
except Exception as e:
|
| 46 |
st.error(f"๋ช
์ฌ ์ถ์ถ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
| 47 |
+
st.error(f"๋ฌธ์ ๊ฐ ๋ฐ์ํ ํ
์คํธ: {text[:100]}...")
|
| 48 |
return []
|
| 49 |
|
| 50 |
@st.cache_data
|
| 51 |
def preprocess_text(text, user_stopwords):
|
| 52 |
try:
|
| 53 |
if not text or not isinstance(text, str):
|
| 54 |
+
st.warning(f"์ ํจํ์ง ์์ ์
๋ ฅ: {type(text)}")
|
| 55 |
return ""
|
| 56 |
nouns = extract_nouns(text)
|
| 57 |
nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
|
| 58 |
return ' '.join(nouns)
|
| 59 |
except Exception as e:
|
| 60 |
st.error(f"ํ
์คํธ ์ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
| 61 |
+
st.error(f"๋ฌธ์ ๊ฐ ๋ฐ์ํ ํ
์คํธ: {text[:100]}...")
|
| 62 |
return ""
|
| 63 |
|
| 64 |
def topic_modeling(texts, n_components):
|
|
|
|
| 125 |
start = i * chunk_size
|
| 126 |
end = start + chunk_size if i < total_chunks - 1 else len(text)
|
| 127 |
chunk = text[start:end]
|
| 128 |
+
st.text(f"์ฒญํฌ {i+1} ์ฒ๋ฆฌ ์ค: ๊ธธ์ด {len(chunk)} ๋ฌธ์")
|
| 129 |
preprocessed_chunk = preprocess_text(chunk, user_stopwords)
|
| 130 |
if preprocessed_chunk:
|
| 131 |
preprocessed_chunks.append(preprocessed_chunk)
|
| 132 |
+
else:
|
| 133 |
+
st.warning(f"์ฒญํฌ {i+1}์์ ์ ํจํ ํ
์คํธ๊ฐ ์ถ์ถ๋์ง ์์์ต๋๋ค.")
|
| 134 |
progress_bar.progress(min(1.0, (i + 1) / total_chunks))
|
| 135 |
|
| 136 |
+
if i % 10 == 0 or i == total_chunks - 1: # ๋งค 10๋ฒ์งธ ์ฒญํฌ์ ๋ง์ง๋ง ์ฒญํฌ์ ๋ํด ์ ๋ณด ์ถ๋ ฅ
|
| 137 |
st.text(f"์ฒ๋ฆฌ๋ ์ฒญํฌ: {i+1}/{total_chunks}, ํ์ฌ ์ฒญํฌ ๊ธธ์ด: {len(preprocessed_chunk)}")
|
| 138 |
|
| 139 |
st.text(f"์ฒ๋ฆฌ๋ ์ฒญํฌ ์: {len(preprocessed_chunks)}")
|