Spaces:

Hellowish
/

AI_Detect

Sleeping

App Files Files Community

Hellowish commited on Nov 6, 2025

Commit

16223b1

verified ·

1 Parent(s): eeec4a7

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -16

app.py CHANGED Viewed

@@ -107,6 +107,7 @@ except Exception as e:
     print("❌ 模型載入失敗:", e)
     model = None
 try:
     with open("vocab.pkl", "rb") as f:
         vocab = pickle.load(f)
@@ -133,11 +134,11 @@ def compute_features(text):
 # ---------------- 純 Python 標準化 ----------------
 def transform_features(feat):
-    # 簡單標準化：除以最大值 (避免使用 scaler.pkl)
     transformed = []
     for i, val in enumerate(feat[0]):
-        max_val = max(val, 1)  # 防止除以0
-        transformed.append(val / max_val)
     return [transformed]
 # ---------------- 生成解釋 ----------------
@@ -146,26 +147,31 @@ def explain_prediction(text):
         return "❌ 模型或詞彙尚未載入，無法預測"
     try:
-        # 文字向量化
         seq = vectorized_layer([text])
         seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
-        # 統計特徵
-        feat = compute_features(text)
-        feat = transform_features(feat)
-        # 預測
-        pred_prob = model.predict([seq, feat], verbose=0)[0][0]
         label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
         prob = pred_prob * 100
-        # 判斷依據
         reasons = []
-        if feat[0][0] > 100: reasons.append("句子長度偏長")
-        if feat[0][2] > 0.3: reasons.append("重複率高")
-        if feat[0][1] < 0.2: reasons.append("詞彙多樣性低")
-        if feat[0][3] < 0.01: reasons.append("標點符號少")
-        if feat[0][4] > 6: reasons.append("平均詞長偏長")
         if not reasons: reasons.append("句子長度與用詞平均")
         explanation = "；".join(reasons)
@@ -183,4 +189,4 @@ iface = gr.Interface(
     description="輸入文章，模型會判斷是 AI 或人類撰寫，並給出機率與判斷依據"
 )
-iface.launch()

     print("❌ 模型載入失敗:", e)
     model = None
+# ---------------- 載入詞彙 ----------------
 try:
     with open("vocab.pkl", "rb") as f:
         vocab = pickle.load(f)
 # ---------------- 純 Python 標準化 ----------------
 def transform_features(feat):
+    # 假設最大值：
+    max_values = [500, 1.0, 1.0, 0.5, 10]  # word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length
     transformed = []
     for i, val in enumerate(feat[0]):
+        transformed.append(val / max_values[i])
     return [transformed]
 # ---------------- 生成解釋 ----------------
         return "❌ 模型或詞彙尚未載入，無法預測"
     try:
+        # ---------------- 特徵計算 ----------------
+        feat_raw = compute_features(text)
+        feat = transform_features(feat_raw)
+        # ---------------- 文字向量化 ----------------
         seq = vectorized_layer([text])
         seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
+        # TensorFlow tensor
+        seq = tf.convert_to_tensor(seq)
+        feat = tf.convert_to_tensor(feat, dtype=tf.float32)
+        # ---------------- 預測 ----------------
+        pred_prob = model([seq, feat], training=False).numpy()[0][0]
         label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
         prob = pred_prob * 100
+        # ---------------- 判斷依據 ----------------
         reasons = []
+        # 用原始特徵判斷
+        if feat_raw[0][0] > 100: reasons.append("句子長度偏長")
+        if feat_raw[0][2] > 0.3: reasons.append("重複率高")
+        if feat_raw[0][1] < 0.2: reasons.append("詞彙多樣性低")
+        if feat_raw[0][3] < 0.01: reasons.append("標點符號少")
+        if feat_raw[0][4] > 6: reasons.append("平均詞長偏長")
         if not reasons: reasons.append("句子長度與用詞平均")
         explanation = "；".join(reasons)
     description="輸入文章，模型會判斷是 AI 或人類撰寫，並給出機率與判斷依據"
 )
+iface.launch()