Spaces:

soojeongcrystal
/

text

Sleeping

soojeongcrystal commited on Jul 26, 2024

Commit

4ea47f1

verified ·

1 Parent(s): 6bdbb72

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -35,9 +35,10 @@ def extract_nouns(text):
         sentences = re.split('[.!?]+', text)
         nouns = []
         for sentence in sentences:
-            extracted = noun_extractor.extract(sentence)
-            if extracted:
-                nouns.extend(extracted.keys())
         return [noun for noun in nouns if len(noun) > 1]
     except Exception as e:
         st.error(f"명사 추출 중 오류 발생: {str(e)}")
@@ -46,7 +47,7 @@ def extract_nouns(text):
 @st.cache_data
 def preprocess_text(text, user_stopwords):
     try:
-        if not text:
             return ""
         nouns = extract_nouns(text)
         nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
@@ -123,6 +124,9 @@ if uploaded_file is not None:
                 if preprocessed_chunk:
                     preprocessed_chunks.append(preprocessed_chunk)
                 progress_bar.progress(min(1.0, (i + 1) / total_chunks))
             st.text(f"처리된 청크 수: {len(preprocessed_chunks)}")
             preprocessed_text = " ".join(preprocessed_chunks)

         sentences = re.split('[.!?]+', text)
         nouns = []
         for sentence in sentences:
+            if sentence.strip():  # 빈 문장 건너뛰기
+                extracted = noun_extractor.extract(sentence)
+                if extracted:
+                    nouns.extend([word for word, score in extracted.items() if score > 0])
         return [noun for noun in nouns if len(noun) > 1]
     except Exception as e:
         st.error(f"명사 추출 중 오류 발생: {str(e)}")
 @st.cache_data
 def preprocess_text(text, user_stopwords):
     try:
+        if not text or not isinstance(text, str):
             return ""
         nouns = extract_nouns(text)
         nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
                 if preprocessed_chunk:
                     preprocessed_chunks.append(preprocessed_chunk)
                 progress_bar.progress(min(1.0, (i + 1) / total_chunks))
+                if i % 10 == 0:  # 매 10번째 청크마다 정보 출력
+                    st.text(f"처리된 청크: {i+1}/{total_chunks}, 현재 청크 길이: {len(preprocessed_chunk)}")
             st.text(f"처리된 청크 수: {len(preprocessed_chunks)}")
             preprocessed_text = " ".join(preprocessed_chunks)