soojeongcrystal commited on
Commit
76b2f6d
ยท
verified ยท
1 Parent(s): 4ea47f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -11,7 +11,7 @@ import traceback
11
 
12
  warnings.filterwarnings("ignore")
13
 
14
- # ํ•œ๊ธ€ ํฐํŠธ ์„ค์ •
15
  plt.rcParams['font.family'] = 'NanumGothic'
16
 
17
  # ์ดˆ๊ธฐ ๋ถˆ์šฉ์–ด ๋ชฉ๋ก
@@ -37,23 +37,28 @@ def extract_nouns(text):
37
  for sentence in sentences:
38
  if sentence.strip(): # ๋นˆ ๋ฌธ์žฅ ๊ฑด๋„ˆ๋›ฐ๊ธฐ
39
  extracted = noun_extractor.extract(sentence)
40
- if extracted:
41
- nouns.extend([word for word, score in extracted.items() if score > 0])
 
 
42
  return [noun for noun in nouns if len(noun) > 1]
43
  except Exception as e:
44
  st.error(f"๋ช…์‚ฌ ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
 
45
  return []
46
 
47
  @st.cache_data
48
  def preprocess_text(text, user_stopwords):
49
  try:
50
  if not text or not isinstance(text, str):
 
51
  return ""
52
  nouns = extract_nouns(text)
53
  nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
54
  return ' '.join(nouns)
55
  except Exception as e:
56
  st.error(f"ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
 
57
  return ""
58
 
59
  def topic_modeling(texts, n_components):
@@ -120,12 +125,15 @@ if uploaded_file is not None:
120
  start = i * chunk_size
121
  end = start + chunk_size if i < total_chunks - 1 else len(text)
122
  chunk = text[start:end]
 
123
  preprocessed_chunk = preprocess_text(chunk, user_stopwords)
124
  if preprocessed_chunk:
125
  preprocessed_chunks.append(preprocessed_chunk)
 
 
126
  progress_bar.progress(min(1.0, (i + 1) / total_chunks))
127
 
128
- if i % 10 == 0: # ๋งค 10๋ฒˆ์งธ ์ฒญํฌ๋งˆ๋‹ค ์ •๋ณด ์ถœ๋ ฅ
129
  st.text(f"์ฒ˜๋ฆฌ๋œ ์ฒญํฌ: {i+1}/{total_chunks}, ํ˜„์žฌ ์ฒญํฌ ๊ธธ์ด: {len(preprocessed_chunk)}")
130
 
131
  st.text(f"์ฒ˜๋ฆฌ๋œ ์ฒญํฌ ์ˆ˜: {len(preprocessed_chunks)}")
 
11
 
12
  warnings.filterwarnings("ignore")
13
 
14
+ # ํ•œ๊ธ€ ํฐํŠธ ์„ค์ • (ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์— ๋งž๊ฒŒ ์ˆ˜์ • ํ•„์š”ํ•  ์ˆ˜ ์žˆ์Œ)
15
  plt.rcParams['font.family'] = 'NanumGothic'
16
 
17
  # ์ดˆ๊ธฐ ๋ถˆ์šฉ์–ด ๋ชฉ๋ก
 
37
  for sentence in sentences:
38
  if sentence.strip(): # ๋นˆ ๋ฌธ์žฅ ๊ฑด๋„ˆ๋›ฐ๊ธฐ
39
  extracted = noun_extractor.extract(sentence)
40
+ if extracted is None:
41
+ st.warning(f"๋‹ค์Œ ๋ฌธ์žฅ์—์„œ ๋ช…์‚ฌ ์ถ”์ถœ ์‹คํŒจ: {sentence[:50]}...")
42
+ continue
43
+ nouns.extend([word for word, score in extracted.items() if score > 0])
44
  return [noun for noun in nouns if len(noun) > 1]
45
  except Exception as e:
46
  st.error(f"๋ช…์‚ฌ ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
47
+ st.error(f"๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ•œ ํ…์ŠคํŠธ: {text[:100]}...")
48
  return []
49
 
50
  @st.cache_data
51
  def preprocess_text(text, user_stopwords):
52
  try:
53
  if not text or not isinstance(text, str):
54
+ st.warning(f"์œ ํšจํ•˜์ง€ ์•Š์€ ์ž…๋ ฅ: {type(text)}")
55
  return ""
56
  nouns = extract_nouns(text)
57
  nouns = [noun for noun in nouns if noun and noun not in user_stopwords]
58
  return ' '.join(nouns)
59
  except Exception as e:
60
  st.error(f"ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
61
+ st.error(f"๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ•œ ํ…์ŠคํŠธ: {text[:100]}...")
62
  return ""
63
 
64
  def topic_modeling(texts, n_components):
 
125
  start = i * chunk_size
126
  end = start + chunk_size if i < total_chunks - 1 else len(text)
127
  chunk = text[start:end]
128
+ st.text(f"์ฒญํฌ {i+1} ์ฒ˜๋ฆฌ ์ค‘: ๊ธธ์ด {len(chunk)} ๋ฌธ์ž")
129
  preprocessed_chunk = preprocess_text(chunk, user_stopwords)
130
  if preprocessed_chunk:
131
  preprocessed_chunks.append(preprocessed_chunk)
132
+ else:
133
+ st.warning(f"์ฒญํฌ {i+1}์—์„œ ์œ ํšจํ•œ ํ…์ŠคํŠธ๊ฐ€ ์ถ”์ถœ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
134
  progress_bar.progress(min(1.0, (i + 1) / total_chunks))
135
 
136
+ if i % 10 == 0 or i == total_chunks - 1: # ๋งค 10๋ฒˆ์งธ ์ฒญํฌ์™€ ๋งˆ์ง€๋ง‰ ์ฒญํฌ์— ๋Œ€ํ•ด ์ •๋ณด ์ถœ๋ ฅ
137
  st.text(f"์ฒ˜๋ฆฌ๋œ ์ฒญํฌ: {i+1}/{total_chunks}, ํ˜„์žฌ ์ฒญํฌ ๊ธธ์ด: {len(preprocessed_chunk)}")
138
 
139
  st.text(f"์ฒ˜๋ฆฌ๋œ ์ฒญํฌ ์ˆ˜: {len(preprocessed_chunks)}")