Update app.py
Browse files
app.py
CHANGED
|
@@ -120,8 +120,24 @@ except Exception as e:
|
|
| 120 |
print("❌ 詞彙載入失敗:", e)
|
| 121 |
vectorized_layer = None
|
| 122 |
|
| 123 |
-
# ----------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
def compute_features(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
words = text.split()
|
| 126 |
word_count = len(words)
|
| 127 |
unique_words = len(set(words))
|
|
@@ -130,16 +146,14 @@ def compute_features(text):
|
|
| 130 |
punctuation_count = sum(1 for c in text if c in ".,!?;:")
|
| 131 |
punctuation_ratio = punctuation_count / (len(text) + 1e-6)
|
| 132 |
avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
|
| 133 |
-
return [[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]]
|
| 134 |
|
| 135 |
-
|
|
|
|
|
|
|
| 136 |
def transform_features(feat):
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
for i, val in enumerate(feat[0]):
|
| 141 |
-
transformed.append(val / max_values[i])
|
| 142 |
-
return [transformed]
|
| 143 |
|
| 144 |
# ---------------- 生成解釋 ----------------
|
| 145 |
def explain_prediction(text):
|
|
@@ -155,7 +169,7 @@ def explain_prediction(text):
|
|
| 155 |
seq = vectorized_layer([text])
|
| 156 |
seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
|
| 157 |
|
| 158 |
-
#
|
| 159 |
seq = tf.convert_to_tensor(seq)
|
| 160 |
feat = tf.convert_to_tensor(feat, dtype=tf.float32)
|
| 161 |
|
|
@@ -166,7 +180,6 @@ def explain_prediction(text):
|
|
| 166 |
|
| 167 |
# ---------------- 判斷依據 ----------------
|
| 168 |
reasons = []
|
| 169 |
-
# 用原始特徵判斷
|
| 170 |
if feat_raw[0][0] > 100: reasons.append("句子長度偏長")
|
| 171 |
if feat_raw[0][2] > 0.3: reasons.append("重複率高")
|
| 172 |
if feat_raw[0][1] < 0.2: reasons.append("詞彙多樣性低")
|
|
|
|
| 120 |
print("❌ 詞彙載入失敗:", e)
|
| 121 |
vectorized_layer = None
|
| 122 |
|
| 123 |
+
# ---------------- 載入 scaler ----------------
|
| 124 |
+
try:
|
| 125 |
+
with open("scaler.pkl", "rb") as f:
|
| 126 |
+
scaler = pickle.load(f)
|
| 127 |
+
print("✅ Scaler 載入成功")
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print("❌ Scaler 載入失敗:", e)
|
| 130 |
+
scaler = None
|
| 131 |
+
|
| 132 |
+
# ---------------- 特徵計算 ----------------
|
| 133 |
def compute_features(text):
|
| 134 |
+
if isinstance(text, tf.Tensor):
|
| 135 |
+
text = text.numpy().decode('utf-8') if text.dtype == tf.string else str(text.numpy())
|
| 136 |
+
elif isinstance(text, bytes):
|
| 137 |
+
text = text.decode('utf-8')
|
| 138 |
+
else:
|
| 139 |
+
text = str(text)
|
| 140 |
+
|
| 141 |
words = text.split()
|
| 142 |
word_count = len(words)
|
| 143 |
unique_words = len(set(words))
|
|
|
|
| 146 |
punctuation_count = sum(1 for c in text if c in ".,!?;:")
|
| 147 |
punctuation_ratio = punctuation_count / (len(text) + 1e-6)
|
| 148 |
avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
|
|
|
|
| 149 |
|
| 150 |
+
return np.array([[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]])
|
| 151 |
+
|
| 152 |
+
# ---------------- 使用 scaler ----------------
|
| 153 |
def transform_features(feat):
|
| 154 |
+
if scaler is None:
|
| 155 |
+
return feat # 如果 scaler 沒載入,就直接回傳原始特徵
|
| 156 |
+
return scaler.transform(feat)
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
# ---------------- 生成解釋 ----------------
|
| 159 |
def explain_prediction(text):
|
|
|
|
| 169 |
seq = vectorized_layer([text])
|
| 170 |
seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
|
| 171 |
|
| 172 |
+
# 轉成 Tensor
|
| 173 |
seq = tf.convert_to_tensor(seq)
|
| 174 |
feat = tf.convert_to_tensor(feat, dtype=tf.float32)
|
| 175 |
|
|
|
|
| 180 |
|
| 181 |
# ---------------- 判斷依據 ----------------
|
| 182 |
reasons = []
|
|
|
|
| 183 |
if feat_raw[0][0] > 100: reasons.append("句子長度偏長")
|
| 184 |
if feat_raw[0][2] > 0.3: reasons.append("重複率高")
|
| 185 |
if feat_raw[0][1] < 0.2: reasons.append("詞彙多樣性低")
|