Spaces:

Hellowish
/

AI_Detect

Sleeping

App Files Files Community

Hellowish commited on Nov 6, 2025

Commit

2e5e237

verified ·

1 Parent(s): bd45044

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -22

app.py CHANGED Viewed

@@ -101,10 +101,15 @@ import pickle
 # ---------------- 載入模型 ----------------
 model = tf.keras.models.load_model("AIDetect.h5")
-with open("vectorizer.pkl", "rb") as f:
-    vectorizer = pickle.load(f)
-with open("scaler.pkl", "rb") as f:
-    scaler = pickle.load(f)
 # ---------------- 純 Python 特徵計算 ----------------
 def compute_features(text):
@@ -116,28 +121,26 @@ def compute_features(text):
     punctuation_count = sum(1 for c in text if c in ".,!?;:")
     punctuation_ratio = punctuation_count / (len(text) + 1e-6)
     avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
-    return [[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]]
-# ---------------- 純 Python 標準化 ----------------
-def transform_features(feat):
-    # scaler 是舊的 scikit-learn StandardScaler，裡面有 mean_ 和 scale_
-    mean = scaler.mean_
-    scale = scaler.scale_
-    transformed = []
-    for i, val in enumerate(feat[0]):
-        transformed.append((val - mean[i]) / scale[i])
     return [transformed]
 # ---------------- 生成解釋 ----------------
 def explain_prediction(text):
     # 文字向量化
     seq = vectorizer([text])
-    seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
     # 統計特徵
     feat = compute_features(text)
-    feat = transform_features(feat)
     # 預測
     pred_prob = model.predict([seq, feat], verbose=0)[0][0]
     label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
@@ -145,11 +148,11 @@ def explain_prediction(text):
     # 判斷依據
     reasons = []
-    if feat[0][0] > 100: reasons.append("句子長度偏長")
     if feat[0][2] > 0.3: reasons.append("重複率高")
-    if feat[0][1] < 0.2: reasons.append("詞彙多樣性低")
-    if feat[0][3] < 0.01: reasons.append("標點符號少")
-    if feat[0][4] > 6: reasons.append("平均詞長偏長")
     if not reasons: reasons.append("句子長度與用詞平均")
     explanation = "；".join(reasons)

 # ---------------- 載入模型 ----------------
 model = tf.keras.models.load_model("AIDetect.h5")
+# ---------------- 載入詞表 ----------------
+with open("vocab.pkl", "rb") as f:
+    vocab = pickle.load(f)
+# 使用 Keras TextVectorization 來轉換文字
+from tensorflow.keras.layers import TextVectorization
+vectorizer = TextVectorization(max_tokens=len(vocab), output_sequence_length=50)
+vectorizer.set_vocabulary(vocab)
 # ---------------- 純 Python 特徵計算 ----------------
 def compute_features(text):
     punctuation_count = sum(1 for c in text if c in ".,!?;:")
     punctuation_ratio = punctuation_count / (len(text) + 1e-6)
     avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
+    # 簡單縮放：把值縮到大約 -1 ~ 1
+    transformed = [
+        word_count / 100.0,
+        unique_word_ratio * 2 - 1,
+        repeat_rate * 2 - 1,
+        punctuation_ratio * 100,
+        avg_word_length / 10.0
+    ]
     return [transformed]
 # ---------------- 生成解釋 ----------------
 def explain_prediction(text):
     # 文字向量化
     seq = vectorizer([text])
     # 統計特徵
     feat = compute_features(text)
     # 預測
     pred_prob = model.predict([seq, feat], verbose=0)[0][0]
     label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
     # 判斷依據
     reasons = []
+    if feat[0][0] > 1.0: reasons.append("句子長度偏長")
     if feat[0][2] > 0.3: reasons.append("重複率高")
+    if feat[0][1] < -0.6: reasons.append("詞彙多樣性低")
+    if feat[0][3] < 1: reasons.append("標點符號少")
+    if feat[0][4] > 0.6: reasons.append("平均詞長偏長")
     if not reasons: reasons.append("句子長度與用詞平均")
     explanation = "；".join(reasons)