Hellowish commited on
Commit
16223b1
·
verified ·
1 Parent(s): eeec4a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -16
app.py CHANGED
@@ -107,6 +107,7 @@ except Exception as e:
107
  print("❌ 模型載入失敗:", e)
108
  model = None
109
 
 
110
  try:
111
  with open("vocab.pkl", "rb") as f:
112
  vocab = pickle.load(f)
@@ -133,11 +134,11 @@ def compute_features(text):
133
 
134
  # ---------------- 純 Python 標準化 ----------------
135
  def transform_features(feat):
136
- # 簡單標準化:除以最大值 (避免使用 scaler.pkl)
 
137
  transformed = []
138
  for i, val in enumerate(feat[0]):
139
- max_val = max(val, 1) # 防止除以0
140
- transformed.append(val / max_val)
141
  return [transformed]
142
 
143
  # ---------------- 生成解釋 ----------------
@@ -146,26 +147,31 @@ def explain_prediction(text):
146
  return "❌ 模型或詞彙尚未載入,無法預測"
147
 
148
  try:
149
- # 文字向量化
 
 
 
 
150
  seq = vectorized_layer([text])
151
  seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
152
 
153
- # 統計特徵
154
- feat = compute_features(text)
155
- feat = transform_features(feat)
156
 
157
- # 預測
158
- pred_prob = model.predict([seq, feat], verbose=0)[0][0]
159
  label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
160
  prob = pred_prob * 100
161
 
162
- # 判斷依據
163
  reasons = []
164
- if feat[0][0] > 100: reasons.append("句子長度偏長")
165
- if feat[0][2] > 0.3: reasons.append("重複率高")
166
- if feat[0][1] < 0.2: reasons.append("詞彙多樣性低")
167
- if feat[0][3] < 0.01: reasons.append("標點符號少")
168
- if feat[0][4] > 6: reasons.append("平均詞長偏長")
 
169
  if not reasons: reasons.append("句子長度與用詞平均")
170
  explanation = ";".join(reasons)
171
 
@@ -183,4 +189,4 @@ iface = gr.Interface(
183
  description="輸入文章,模型會判斷是 AI 或人類撰寫,並給出機率與判斷依據"
184
  )
185
 
186
- iface.launch()
 
107
  print("❌ 模型載入失敗:", e)
108
  model = None
109
 
110
+ # ---------------- 載入詞彙 ----------------
111
  try:
112
  with open("vocab.pkl", "rb") as f:
113
  vocab = pickle.load(f)
 
134
 
135
  # ---------------- 純 Python 標準化 ----------------
136
  def transform_features(feat):
137
+ # 假設最大值
138
+ max_values = [500, 1.0, 1.0, 0.5, 10] # word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length
139
  transformed = []
140
  for i, val in enumerate(feat[0]):
141
+ transformed.append(val / max_values[i])
 
142
  return [transformed]
143
 
144
  # ---------------- 生成解釋 ----------------
 
147
  return "❌ 模型或詞彙尚未載入,無法預測"
148
 
149
  try:
150
+ # ---------------- 特徵計算 ----------------
151
+ feat_raw = compute_features(text)
152
+ feat = transform_features(feat_raw)
153
+
154
+ # ---------------- 文字向量化 ----------------
155
  seq = vectorized_layer([text])
156
  seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
157
 
158
+ # TensorFlow tensor
159
+ seq = tf.convert_to_tensor(seq)
160
+ feat = tf.convert_to_tensor(feat, dtype=tf.float32)
161
 
162
+ # ---------------- 預測 ----------------
163
+ pred_prob = model([seq, feat], training=False).numpy()[0][0]
164
  label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
165
  prob = pred_prob * 100
166
 
167
+ # ---------------- 判斷依據 ----------------
168
  reasons = []
169
+ # 用原始特徵判斷
170
+ if feat_raw[0][0] > 100: reasons.append("句子長度偏長")
171
+ if feat_raw[0][2] > 0.3: reasons.append("重複率高")
172
+ if feat_raw[0][1] < 0.2: reasons.append("詞彙多樣性低")
173
+ if feat_raw[0][3] < 0.01: reasons.append("標點符號少")
174
+ if feat_raw[0][4] > 6: reasons.append("平均詞長偏長")
175
  if not reasons: reasons.append("句子長度與用詞平均")
176
  explanation = ";".join(reasons)
177
 
 
189
  description="輸入文章,模型會判斷是 AI 或人類撰寫,並給出機率與判斷依據"
190
  )
191
 
192
+ iface.launch()