Spaces:

Hellowish
/

AI_Detect

Sleeping

App Files Files Community

Hellowish commited on Nov 6, 2025

Commit

eeec4a7

verified ·

1 Parent(s): 2e5e237

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -43

app.py CHANGED Viewed

@@ -100,16 +100,24 @@ import tensorflow as tf
 import pickle
 # ---------------- 載入模型 ----------------
-model = tf.keras.models.load_model("AIDetect.h5")
-# ---------------- 載入詞表 ----------------
-with open("vocab.pkl", "rb") as f:
-    vocab = pickle.load(f)
-# 使用 Keras TextVectorization 來轉換文字
-from tensorflow.keras.layers import TextVectorization
-vectorizer = TextVectorization(max_tokens=len(vocab), output_sequence_length=50)
-vectorizer.set_vocabulary(vocab)
 # ---------------- 純 Python 特徵計算 ----------------
 def compute_features(text):
@@ -121,42 +129,50 @@ def compute_features(text):
     punctuation_count = sum(1 for c in text if c in ".,!?;:")
     punctuation_ratio = punctuation_count / (len(text) + 1e-6)
     avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
-    # 簡單縮放：把值縮到大約 -1 ~ 1
-    transformed = [
-        word_count / 100.0,
-        unique_word_ratio * 2 - 1,
-        repeat_rate * 2 - 1,
-        punctuation_ratio * 100,
-        avg_word_length / 10.0
-    ]
     return [transformed]
 # ---------------- 生成解釋 ----------------
 def explain_prediction(text):
-    # 文字向量化
-    seq = vectorizer([text])
-    # 統計特徵
-    feat = compute_features(text)
-    # 預測
-    pred_prob = model.predict([seq, feat], verbose=0)[0][0]
-    label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
-    prob = pred_prob * 100
-    # 判斷依據
-    reasons = []
-    if feat[0][0] > 1.0: reasons.append("句子長度偏長")
-    if feat[0][2] > 0.3: reasons.append("重複率高")
-    if feat[0][1] < -0.6: reasons.append("詞彙多樣性低")
-    if feat[0][3] < 1: reasons.append("標點符號少")
-    if feat[0][4] > 0.6: reasons.append("平均詞長偏長")
-    if not reasons: reasons.append("句子長度與用詞平均")
-    explanation = "；".join(reasons)
-    return f"預測結果：{label}\nAI 機率：{prob:.2f}%\n判斷依據：{explanation}"
 # ---------------- Gradio 介面 ----------------
 iface = gr.Interface(
@@ -167,4 +183,4 @@ iface = gr.Interface(
     description="輸入文章，模型會判斷是 AI 或人類撰寫，並給出機率與判斷依據"
 )
-iface.launch()

 import pickle
 # ---------------- 載入模型 ----------------
+try:
+    model = tf.keras.models.load_model("AIDetect.h5")
+    print("✅ 模型載入成功")
+except Exception as e:
+    print("❌ 模型載入失敗:", e)
+    model = None
+try:
+    with open("vocab.pkl", "rb") as f:
+        vocab = pickle.load(f)
+        vectorized_layer = tf.keras.layers.TextVectorization(
+            max_tokens=len(vocab)+1, output_sequence_length=50
+        )
+        vectorized_layer.set_vocabulary(vocab)
+    print("✅ 詞彙載入成功")
+except Exception as e:
+    print("❌ 詞彙載入失敗:", e)
+    vectorized_layer = None
 # ---------------- 純 Python 特徵計算 ----------------
 def compute_features(text):
     punctuation_count = sum(1 for c in text if c in ".,!?;:")
     punctuation_ratio = punctuation_count / (len(text) + 1e-6)
     avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
+    return [[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]]
+# ---------------- 純 Python 標準化 ----------------
+def transform_features(feat):
+    # 簡單標準化：除以最大值 (避免使用 scaler.pkl)
+    transformed = []
+    for i, val in enumerate(feat[0]):
+        max_val = max(val, 1)  # 防止除以0
+        transformed.append(val / max_val)
     return [transformed]
 # ---------------- 生成解釋 ----------------
 def explain_prediction(text):
+    if model is None or vectorized_layer is None:
+        return "❌ 模型或詞彙尚未載入，無法預測"
+    try:
+        # 文字向量化
+        seq = vectorized_layer([text])
+        seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
+        # 統計特徵
+        feat = compute_features(text)
+        feat = transform_features(feat)
+        # 預測
+        pred_prob = model.predict([seq, feat], verbose=0)[0][0]
+        label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
+        prob = pred_prob * 100
+        # 判斷依據
+        reasons = []
+        if feat[0][0] > 100: reasons.append("句子長度偏長")
+        if feat[0][2] > 0.3: reasons.append("重複率高")
+        if feat[0][1] < 0.2: reasons.append("詞彙多樣性低")
+        if feat[0][3] < 0.01: reasons.append("標點符號少")
+        if feat[0][4] > 6: reasons.append("平均詞長偏長")
+        if not reasons: reasons.append("句子長度與用詞平均")
+        explanation = "；".join(reasons)
+        return f"預測結果：{label}\nAI 機率：{prob:.2f}%\n判斷依據：{explanation}"
+    except Exception as e:
+        return f"❌ 預測時發生錯誤: {e}"
 # ---------------- Gradio 介面 ----------------
 iface = gr.Interface(
     description="輸入文章，模型會判斷是 AI 或人類撰寫，並給出機率與判斷依據"
 )
+iface.launch()