Hellowish commited on
Commit
1c3315f
·
verified ·
1 Parent(s): 4960be3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -2
app.py CHANGED
@@ -7,7 +7,7 @@ import re
7
  model = joblib.load("ai_detector_model.pkl") # 確認路徑正確
8
 
9
  # 自訂簡單分句函數
10
- def simple_sent_tokenize(text):
11
  # 以句點、問號、驚嘆號拆分,保留句尾符號
12
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
13
  return [s for s in sentences if s]
@@ -89,4 +89,68 @@ demo = gr.Interface(
89
  description="上傳的模型為 .pkl 格式,根據語言特徵分析並判斷文本來源"
90
  )
91
 
92
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  model = joblib.load("ai_detector_model.pkl") # 確認路徑正確
8
 
9
  # 自訂簡單分句函數
10
+ '''def simple_sent_tokenize(text):
11
  # 以句點、問號、驚嘆號拆分,保留句尾符號
12
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
13
  return [s for s in sentences if s]
 
89
  description="上傳的模型為 .pkl 格式,根據語言特徵分析並判斷文本來源"
90
  )
91
 
92
+ demo.launch()'''
93
+
94
+
95
+
96
+
97
+
98
+ import gradio as gr
99
+ import tensorflow as tf
100
+ import numpy as np
101
+ import pickle
102
+
103
+ # ---------------- 載入模型 ----------------
104
+ model = tf.keras.models.load_model("model") # 你的模型資料夾
105
+ with open("vectorizer.pkl", "rb") as f:
106
+ vectorizer = pickle.load(f)
107
+ with open("scaler.pkl", "rb") as f:
108
+ scaler = pickle.load(f)
109
+
110
+ # ---------------- 特徵計算 ----------------
111
+ def compute_features(text):
112
+ words = text.split()
113
+ word_count = len(words)
114
+ unique_word_ratio = len(set(words)) / (word_count + 1e-6)
115
+ repeat_rate = 1 - unique_word_ratio
116
+ punctuation_ratio = sum(1 for c in text if c in ".,!?;:") / (len(text) + 1e-6)
117
+ avg_word_length = np.mean([len(w) for w in words]) if words else 0
118
+ return np.array([word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]).reshape(1, -1)
119
+
120
+ # ---------------- 生成解釋 ----------------
121
+ def explain_prediction(text):
122
+ # 文字向量化
123
+ seq = vectorizer([text])
124
+ seq = tf.keras.utils.pad_sequences(seq.numpy(), maxlen=50, padding='pre')
125
+
126
+ # 統計特徵
127
+ feat = compute_features(text)
128
+ feat = scaler.transform(feat)
129
+
130
+ # 預測
131
+ pred_prob = model.predict([seq, feat])[0][0]
132
+ label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
133
+ prob = pred_prob * 100
134
+
135
+ # 判斷依據
136
+ reasons = []
137
+ if feat[0,0] > 100: reasons.append("句子長度偏長")
138
+ if feat[0,2] > 0.3: reasons.append("重複率高")
139
+ if feat[0,1] < 0.2: reasons.append("詞彙多樣性低")
140
+ if feat[0,3] < 0.01: reasons.append("標點符號少")
141
+ if feat[0,4] > 6: reasons.append("平均詞長偏長")
142
+ if not reasons: reasons.append("句子長度與用詞平均")
143
+ explanation = ";".join(reasons)
144
+
145
+ return f"預測結果:{label}\nAI 機率:{prob:.2f}%\n判斷依據:{explanation}"
146
+
147
+ # ---------------- Gradio 介面 ----------------
148
+ iface = gr.Interface(
149
+ fn=explain_prediction,
150
+ inputs=gr.Textbox(label="請輸入文章內容", lines=15, max_lines=50, placeholder="在此輸入文章…"),
151
+ outputs=gr.Textbox(label="預測結果", lines=15, max_lines=30, placeholder="結果會顯示在這裡…"),
152
+ title="AI vs Human 文本判斷",
153
+ description="輸入文章,模型會判斷是 AI 或人類撰寫,並給出機率與判斷依據"
154
+ )
155
+
156
+ iface.launch()