soojeongcrystal commited on
Commit
4ea47f1
·
verified ·
1 Parent(s): 6bdbb72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -35,9 +35,10 @@ def extract_nouns(text):
35
  sentences = re.split('[.!?]+', text)
36
  nouns = []
37
  for sentence in sentences:
38
- extracted = noun_extractor.extract(sentence)
39
- if extracted:
40
- nouns.extend(extracted.keys())
 
41
  return [noun for noun in nouns if len(noun) > 1]
42
  except Exception as e:
43
  st.error(f"명사 추출 중 오류 발생: {str(e)}")
@@ -46,7 +47,7 @@ def extract_nouns(text):
46
  @st.cache_data
47
  def preprocess_text(text, user_stopwords):
48
  try:
49
- if not text:
50
  return ""
51
  nouns = extract_nouns(text)
52
  nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
@@ -123,6 +124,9 @@ if uploaded_file is not None:
123
  if preprocessed_chunk:
124
  preprocessed_chunks.append(preprocessed_chunk)
125
  progress_bar.progress(min(1.0, (i + 1) / total_chunks))
 
 
 
126
 
127
  st.text(f"처리된 청크 수: {len(preprocessed_chunks)}")
128
  preprocessed_text = " ".join(preprocessed_chunks)
 
35
  sentences = re.split('[.!?]+', text)
36
  nouns = []
37
  for sentence in sentences:
38
+ if sentence.strip(): # 빈 문장 건너뛰기
39
+ extracted = noun_extractor.extract(sentence)
40
+ if extracted:
41
+ nouns.extend([word for word, score in extracted.items() if score > 0])
42
  return [noun for noun in nouns if len(noun) > 1]
43
  except Exception as e:
44
  st.error(f"명사 추출 중 오류 발생: {str(e)}")
 
47
  @st.cache_data
48
  def preprocess_text(text, user_stopwords):
49
  try:
50
+ if not text or not isinstance(text, str):
51
  return ""
52
  nouns = extract_nouns(text)
53
  nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
 
124
  if preprocessed_chunk:
125
  preprocessed_chunks.append(preprocessed_chunk)
126
  progress_bar.progress(min(1.0, (i + 1) / total_chunks))
127
+
128
+ if i % 10 == 0: # 매 10번째 청크마다 정보 출력
129
+ st.text(f"처리된 청크: {i+1}/{total_chunks}, 현재 청크 길이: {len(preprocessed_chunk)}")
130
 
131
  st.text(f"처리된 청크 수: {len(preprocessed_chunks)}")
132
  preprocessed_text = " ".join(preprocessed_chunks)