Spaces:

Hellowish
/

AI_Detect

Sleeping

App Files Files Community

Hellowish commited on Nov 6, 2025

Commit

1c3315f

verified ·

1 Parent(s): 4960be3

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -2

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import re
 model = joblib.load("ai_detector_model.pkl")  # 確認路徑正確
 # 自訂簡單分句函數
-def simple_sent_tokenize(text):
     # 以句點、問號、驚嘆號拆分，保留句尾符號
     sentences = re.split(r'(?<=[.!?])\s+', text.strip())
     return [s for s in sentences if s]
@@ -89,4 +89,68 @@ demo = gr.Interface(
     description="上傳的模型為 .pkl 格式，根據語言特徵分析並判斷文本來源"
 )
-demo.launch()

 model = joblib.load("ai_detector_model.pkl")  # 確認路徑正確
 # 自訂簡單分句函數
+'''def simple_sent_tokenize(text):
     # 以句點、問號、驚嘆號拆分，保留句尾符號
     sentences = re.split(r'(?<=[.!?])\s+', text.strip())
     return [s for s in sentences if s]
     description="上傳的模型為 .pkl 格式，根據語言特徵分析並判斷文本來源"
 )
+demo.launch()'''
+import gradio as gr
+import tensorflow as tf
+import numpy as np
+import pickle
+# ---------------- 載入模型 ----------------
+model = tf.keras.models.load_model("model")  # 你的模型資料夾
+with open("vectorizer.pkl", "rb") as f:
+    vectorizer = pickle.load(f)
+with open("scaler.pkl", "rb") as f:
+    scaler = pickle.load(f)
+# ---------------- 特徵計算 ----------------
+def compute_features(text):
+    words = text.split()
+    word_count = len(words)
+    unique_word_ratio = len(set(words)) / (word_count + 1e-6)
+    repeat_rate = 1 - unique_word_ratio
+    punctuation_ratio = sum(1 for c in text if c in ".,!?;:") / (len(text) + 1e-6)
+    avg_word_length = np.mean([len(w) for w in words]) if words else 0
+    return np.array([word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]).reshape(1, -1)
+# ---------------- 生成解釋 ----------------
+def explain_prediction(text):
+    # 文字向量化
+    seq = vectorizer([text])
+    seq = tf.keras.utils.pad_sequences(seq.numpy(), maxlen=50, padding='pre')
+    # 統計特徵
+    feat = compute_features(text)
+    feat = scaler.transform(feat)
+    # 預測
+    pred_prob = model.predict([seq, feat])[0][0]
+    label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
+    prob = pred_prob * 100
+    # 判斷依據
+    reasons = []
+    if feat[0,0] > 100: reasons.append("句子長度偏長")
+    if feat[0,2] > 0.3: reasons.append("重複率高")
+    if feat[0,1] < 0.2: reasons.append("詞彙多樣性低")
+    if feat[0,3] < 0.01: reasons.append("標點符號少")
+    if feat[0,4] > 6: reasons.append("平均詞長偏長")
+    if not reasons: reasons.append("句子長度與用詞平均")
+    explanation = "；".join(reasons)
+    return f"預測結果：{label}\nAI 機率：{prob:.2f}%\n判斷依據：{explanation}"
+# ---------------- Gradio 介面 ----------------
+iface = gr.Interface(
+    fn=explain_prediction,
+    inputs=gr.Textbox(label="請輸入文章內容", lines=15, max_lines=50, placeholder="在此輸入文章…"),
+    outputs=gr.Textbox(label="預測結果", lines=15, max_lines=30, placeholder="結果會顯示在這裡…"),
+    title="AI vs Human 文本判斷",
+    description="輸入文章，模型會判斷是 AI 或人類撰寫，並給出機率與判斷依據"
+)
+iface.launch()