Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -35,9 +35,10 @@ def extract_nouns(text):
|
|
| 35 |
sentences = re.split('[.!?]+', text)
|
| 36 |
nouns = []
|
| 37 |
for sentence in sentences:
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
| 41 |
return [noun for noun in nouns if len(noun) > 1]
|
| 42 |
except Exception as e:
|
| 43 |
st.error(f"명사 추출 중 오류 발생: {str(e)}")
|
|
@@ -46,7 +47,7 @@ def extract_nouns(text):
|
|
| 46 |
@st.cache_data
|
| 47 |
def preprocess_text(text, user_stopwords):
|
| 48 |
try:
|
| 49 |
-
if not text:
|
| 50 |
return ""
|
| 51 |
nouns = extract_nouns(text)
|
| 52 |
nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
|
|
@@ -123,6 +124,9 @@ if uploaded_file is not None:
|
|
| 123 |
if preprocessed_chunk:
|
| 124 |
preprocessed_chunks.append(preprocessed_chunk)
|
| 125 |
progress_bar.progress(min(1.0, (i + 1) / total_chunks))
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
st.text(f"처리된 청크 수: {len(preprocessed_chunks)}")
|
| 128 |
preprocessed_text = " ".join(preprocessed_chunks)
|
|
|
|
| 35 |
sentences = re.split('[.!?]+', text)
|
| 36 |
nouns = []
|
| 37 |
for sentence in sentences:
|
| 38 |
+
if sentence.strip(): # 빈 문장 건너뛰기
|
| 39 |
+
extracted = noun_extractor.extract(sentence)
|
| 40 |
+
if extracted:
|
| 41 |
+
nouns.extend([word for word, score in extracted.items() if score > 0])
|
| 42 |
return [noun for noun in nouns if len(noun) > 1]
|
| 43 |
except Exception as e:
|
| 44 |
st.error(f"명사 추출 중 오류 발생: {str(e)}")
|
|
|
|
| 47 |
@st.cache_data
|
| 48 |
def preprocess_text(text, user_stopwords):
|
| 49 |
try:
|
| 50 |
+
if not text or not isinstance(text, str):
|
| 51 |
return ""
|
| 52 |
nouns = extract_nouns(text)
|
| 53 |
nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
|
|
|
|
| 124 |
if preprocessed_chunk:
|
| 125 |
preprocessed_chunks.append(preprocessed_chunk)
|
| 126 |
progress_bar.progress(min(1.0, (i + 1) / total_chunks))
|
| 127 |
+
|
| 128 |
+
if i % 10 == 0: # 매 10번째 청크마다 정보 출력
|
| 129 |
+
st.text(f"처리된 청크: {i+1}/{total_chunks}, 현재 청크 길이: {len(preprocessed_chunk)}")
|
| 130 |
|
| 131 |
st.text(f"처리된 청크 수: {len(preprocessed_chunks)}")
|
| 132 |
preprocessed_text = " ".join(preprocessed_chunks)
|