Spaces:

Hellowish
/

AI_Detect

Sleeping

App Files Files Community

Hellowish commited on Nov 6, 2025

Commit

1f55837

verified ·

1 Parent(s): 16223b1

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -11

app.py CHANGED Viewed

@@ -120,8 +120,24 @@ except Exception as e:
     print("❌ 詞彙載入失敗:", e)
     vectorized_layer = None
-# ---------------- 純 Python 特徵計算 ----------------
 def compute_features(text):
     words = text.split()
     word_count = len(words)
     unique_words = len(set(words))
@@ -130,16 +146,14 @@ def compute_features(text):
     punctuation_count = sum(1 for c in text if c in ".,!?;:")
     punctuation_ratio = punctuation_count / (len(text) + 1e-6)
     avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
-    return [[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]]
-# ---------------- 純 Python 標準化 ----------------
 def transform_features(feat):
-    # 假設最大值：
-    max_values = [500, 1.0, 1.0, 0.5, 10]  # word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length
-    transformed = []
-    for i, val in enumerate(feat[0]):
-        transformed.append(val / max_values[i])
-    return [transformed]
 # ---------------- 生成解釋 ----------------
 def explain_prediction(text):
@@ -155,7 +169,7 @@ def explain_prediction(text):
         seq = vectorized_layer([text])
         seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
-        # TensorFlow tensor
         seq = tf.convert_to_tensor(seq)
         feat = tf.convert_to_tensor(feat, dtype=tf.float32)
@@ -166,7 +180,6 @@ def explain_prediction(text):
         # ---------------- 判斷依據 ----------------
         reasons = []
-        # 用原始特徵判斷
         if feat_raw[0][0] > 100: reasons.append("句子長度偏長")
         if feat_raw[0][2] > 0.3: reasons.append("重複率高")
         if feat_raw[0][1] < 0.2: reasons.append("詞彙多樣性低")

     print("❌ 詞彙載入失敗:", e)
     vectorized_layer = None
+# ---------------- 載入 scaler ----------------
+try:
+    with open("scaler.pkl", "rb") as f:
+        scaler = pickle.load(f)
+    print("✅ Scaler 載入成功")
+except Exception as e:
+    print("❌ Scaler 載入失敗:", e)
+    scaler = None
+# ---------------- 特徵計算 ----------------
 def compute_features(text):
+    if isinstance(text, tf.Tensor):
+        text = text.numpy().decode('utf-8') if text.dtype == tf.string else str(text.numpy())
+    elif isinstance(text, bytes):
+        text = text.decode('utf-8')
+    else:
+        text = str(text)
     words = text.split()
     word_count = len(words)
     unique_words = len(set(words))
     punctuation_count = sum(1 for c in text if c in ".,!?;:")
     punctuation_ratio = punctuation_count / (len(text) + 1e-6)
     avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
+    return np.array([[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]])
+# ---------------- 使用 scaler ----------------
 def transform_features(feat):
+    if scaler is None:
+        return feat  # 如果 scaler 沒載入，就直接回傳原始特徵
+    return scaler.transform(feat)
 # ---------------- 生成解釋 ----------------
 def explain_prediction(text):
         seq = vectorized_layer([text])
         seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
+        # 轉成 Tensor
         seq = tf.convert_to_tensor(seq)
         feat = tf.convert_to_tensor(feat, dtype=tf.float32)
         # ---------------- 判斷依據 ----------------
         reasons = []
         if feat_raw[0][0] > 100: reasons.append("句子長度偏長")
         if feat_raw[0][2] > 0.3: reasons.append("重複率高")
         if feat_raw[0][1] < 0.2: reasons.append("詞彙多樣性低")