Hellowish commited on
Commit
1f55837
·
verified ·
1 Parent(s): 16223b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -11
app.py CHANGED
@@ -120,8 +120,24 @@ except Exception as e:
120
  print("❌ 詞彙載入失敗:", e)
121
  vectorized_layer = None
122
 
123
- # ---------------- Python 特徵計算 ----------------
 
 
 
 
 
 
 
 
 
124
  def compute_features(text):
 
 
 
 
 
 
 
125
  words = text.split()
126
  word_count = len(words)
127
  unique_words = len(set(words))
@@ -130,16 +146,14 @@ def compute_features(text):
130
  punctuation_count = sum(1 for c in text if c in ".,!?;:")
131
  punctuation_ratio = punctuation_count / (len(text) + 1e-6)
132
  avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
133
- return [[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]]
134
 
135
- # ---------------- Python 標準化 ----------------
 
 
136
  def transform_features(feat):
137
- # 假設最大值:
138
- max_values = [500, 1.0, 1.0, 0.5, 10] # word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length
139
- transformed = []
140
- for i, val in enumerate(feat[0]):
141
- transformed.append(val / max_values[i])
142
- return [transformed]
143
 
144
  # ---------------- 生成解釋 ----------------
145
  def explain_prediction(text):
@@ -155,7 +169,7 @@ def explain_prediction(text):
155
  seq = vectorized_layer([text])
156
  seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
157
 
158
- # TensorFlow tensor
159
  seq = tf.convert_to_tensor(seq)
160
  feat = tf.convert_to_tensor(feat, dtype=tf.float32)
161
 
@@ -166,7 +180,6 @@ def explain_prediction(text):
166
 
167
  # ---------------- 判斷依據 ----------------
168
  reasons = []
169
- # 用原始特徵判斷
170
  if feat_raw[0][0] > 100: reasons.append("句子長度偏長")
171
  if feat_raw[0][2] > 0.3: reasons.append("重複率高")
172
  if feat_raw[0][1] < 0.2: reasons.append("詞彙多樣性低")
 
120
  print("❌ 詞彙載入失敗:", e)
121
  vectorized_layer = None
122
 
123
+ # ---------------- 載入 scaler ----------------
124
+ try:
125
+ with open("scaler.pkl", "rb") as f:
126
+ scaler = pickle.load(f)
127
+ print("✅ Scaler 載入成功")
128
+ except Exception as e:
129
+ print("❌ Scaler 載入失敗:", e)
130
+ scaler = None
131
+
132
+ # ---------------- 特徵計算 ----------------
133
  def compute_features(text):
134
+ if isinstance(text, tf.Tensor):
135
+ text = text.numpy().decode('utf-8') if text.dtype == tf.string else str(text.numpy())
136
+ elif isinstance(text, bytes):
137
+ text = text.decode('utf-8')
138
+ else:
139
+ text = str(text)
140
+
141
  words = text.split()
142
  word_count = len(words)
143
  unique_words = len(set(words))
 
146
  punctuation_count = sum(1 for c in text if c in ".,!?;:")
147
  punctuation_ratio = punctuation_count / (len(text) + 1e-6)
148
  avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
 
149
 
150
+ return np.array([[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]])
151
+
152
+ # ---------------- 使用 scaler ----------------
153
  def transform_features(feat):
154
+ if scaler is None:
155
+ return feat # 如果 scaler 沒載入,就直接回傳原始特徵
156
+ return scaler.transform(feat)
 
 
 
157
 
158
  # ---------------- 生成解釋 ----------------
159
  def explain_prediction(text):
 
169
  seq = vectorized_layer([text])
170
  seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
171
 
172
+ # 轉成 Tensor
173
  seq = tf.convert_to_tensor(seq)
174
  feat = tf.convert_to_tensor(feat, dtype=tf.float32)
175
 
 
180
 
181
  # ---------------- 判斷依據 ----------------
182
  reasons = []
 
183
  if feat_raw[0][0] > 100: reasons.append("句子長度偏長")
184
  if feat_raw[0][2] > 0.3: reasons.append("重複率高")
185
  if feat_raw[0][1] < 0.2: reasons.append("詞彙多樣性低")