Hellowish commited on
Commit
eeec4a7
·
verified ·
1 Parent(s): 2e5e237

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -43
app.py CHANGED
@@ -100,16 +100,24 @@ import tensorflow as tf
100
  import pickle
101
 
102
  # ---------------- 載入模型 ----------------
103
- model = tf.keras.models.load_model("AIDetect.h5")
104
-
105
- # ---------------- 載入詞表 ----------------
106
- with open("vocab.pkl", "rb") as f:
107
- vocab = pickle.load(f)
108
-
109
- # 使用 Keras TextVectorization 來轉換文字
110
- from tensorflow.keras.layers import TextVectorization
111
- vectorizer = TextVectorization(max_tokens=len(vocab), output_sequence_length=50)
112
- vectorizer.set_vocabulary(vocab)
 
 
 
 
 
 
 
 
113
 
114
  # ---------------- 純 Python 特徵計算 ----------------
115
  def compute_features(text):
@@ -121,42 +129,50 @@ def compute_features(text):
121
  punctuation_count = sum(1 for c in text if c in ".,!?;:")
122
  punctuation_ratio = punctuation_count / (len(text) + 1e-6)
123
  avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
124
-
125
- # 簡單縮放:把值縮到大約 -1 ~ 1
126
- transformed = [
127
- word_count / 100.0,
128
- unique_word_ratio * 2 - 1,
129
- repeat_rate * 2 - 1,
130
- punctuation_ratio * 100,
131
- avg_word_length / 10.0
132
- ]
133
-
134
  return [transformed]
135
 
136
  # ---------------- 生成解釋 ----------------
137
  def explain_prediction(text):
138
- # 文字向量化
139
- seq = vectorizer([text])
140
-
141
- # 統計特徵
142
- feat = compute_features(text)
143
-
144
- # 預測
145
- pred_prob = model.predict([seq, feat], verbose=0)[0][0]
146
- label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
147
- prob = pred_prob * 100
148
-
149
- # 判斷依據
150
- reasons = []
151
- if feat[0][0] > 1.0: reasons.append("句子長度偏長")
152
- if feat[0][2] > 0.3: reasons.append("重複率高")
153
- if feat[0][1] < -0.6: reasons.append("詞彙多樣性低")
154
- if feat[0][3] < 1: reasons.append("標點符號少")
155
- if feat[0][4] > 0.6: reasons.append("平均詞長偏長")
156
- if not reasons: reasons.append("句子長度與用詞平均")
157
- explanation = ";".join(reasons)
158
-
159
- return f"預測結果:{label}\nAI 機率:{prob:.2f}%\n判斷依據:{explanation}"
 
 
 
 
 
 
 
 
 
160
 
161
  # ---------------- Gradio 介面 ----------------
162
  iface = gr.Interface(
@@ -167,4 +183,4 @@ iface = gr.Interface(
167
  description="輸入文章,模型會判斷是 AI 或人類撰寫,並給出機率與判斷依據"
168
  )
169
 
170
- iface.launch()
 
100
  import pickle
101
 
102
  # ---------------- 載入模型 ----------------
103
+ try:
104
+ model = tf.keras.models.load_model("AIDetect.h5")
105
+ print("✅ 模型載入成功")
106
+ except Exception as e:
107
+ print("❌ 模型載入失敗:", e)
108
+ model = None
109
+
110
+ try:
111
+ with open("vocab.pkl", "rb") as f:
112
+ vocab = pickle.load(f)
113
+ vectorized_layer = tf.keras.layers.TextVectorization(
114
+ max_tokens=len(vocab)+1, output_sequence_length=50
115
+ )
116
+ vectorized_layer.set_vocabulary(vocab)
117
+ print("✅ 詞彙載入成功")
118
+ except Exception as e:
119
+ print("❌ 詞彙載入失敗:", e)
120
+ vectorized_layer = None
121
 
122
  # ---------------- 純 Python 特徵計算 ----------------
123
  def compute_features(text):
 
129
  punctuation_count = sum(1 for c in text if c in ".,!?;:")
130
  punctuation_ratio = punctuation_count / (len(text) + 1e-6)
131
  avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
132
+ return [[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]]
133
+
134
+ # ---------------- 純 Python 標準化 ----------------
135
+ def transform_features(feat):
136
+ # 簡單標準化:除以最大值 (避免使用 scaler.pkl)
137
+ transformed = []
138
+ for i, val in enumerate(feat[0]):
139
+ max_val = max(val, 1) # 防止除以0
140
+ transformed.append(val / max_val)
 
141
  return [transformed]
142
 
143
  # ---------------- 生成解釋 ----------------
144
  def explain_prediction(text):
145
+ if model is None or vectorized_layer is None:
146
+ return "❌ 模型或詞彙尚未載入,無法預測"
147
+
148
+ try:
149
+ # 文字向量化
150
+ seq = vectorized_layer([text])
151
+ seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
152
+
153
+ # 統計特徵
154
+ feat = compute_features(text)
155
+ feat = transform_features(feat)
156
+
157
+ # 預測
158
+ pred_prob = model.predict([seq, feat], verbose=0)[0][0]
159
+ label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
160
+ prob = pred_prob * 100
161
+
162
+ # 判斷依據
163
+ reasons = []
164
+ if feat[0][0] > 100: reasons.append("句子長度偏長")
165
+ if feat[0][2] > 0.3: reasons.append("重複率高")
166
+ if feat[0][1] < 0.2: reasons.append("詞彙多樣性低")
167
+ if feat[0][3] < 0.01: reasons.append("標點符號少")
168
+ if feat[0][4] > 6: reasons.append("平均詞長偏長")
169
+ if not reasons: reasons.append("句子長度與用詞平均")
170
+ explanation = ";".join(reasons)
171
+
172
+ return f"預測結果:{label}\nAI 機率:{prob:.2f}%\n判斷依據:{explanation}"
173
+
174
+ except Exception as e:
175
+ return f"❌ 預測時發生錯誤: {e}"
176
 
177
  # ---------------- Gradio 介面 ----------------
178
  iface = gr.Interface(
 
183
  description="輸入文章,模型會判斷是 AI 或人類撰寫,並給出機率與判斷依據"
184
  )
185
 
186
+ iface.launch()