Hellowish commited on
Commit
2e5e237
·
verified ·
1 Parent(s): bd45044

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -22
app.py CHANGED
@@ -101,10 +101,15 @@ import pickle
101
 
102
  # ---------------- 載入模型 ----------------
103
  model = tf.keras.models.load_model("AIDetect.h5")
104
- with open("vectorizer.pkl", "rb") as f:
105
- vectorizer = pickle.load(f)
106
- with open("scaler.pkl", "rb") as f:
107
- scaler = pickle.load(f)
 
 
 
 
 
108
 
109
  # ---------------- 純 Python 特徵計算 ----------------
110
  def compute_features(text):
@@ -116,28 +121,26 @@ def compute_features(text):
116
  punctuation_count = sum(1 for c in text if c in ".,!?;:")
117
  punctuation_ratio = punctuation_count / (len(text) + 1e-6)
118
  avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
119
- return [[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]]
120
-
121
- # ---------------- 純 Python 標準化 ----------------
122
- def transform_features(feat):
123
- # scaler 是舊的 scikit-learn StandardScaler,裡面有 mean_ 和 scale_
124
- mean = scaler.mean_
125
- scale = scaler.scale_
126
- transformed = []
127
- for i, val in enumerate(feat[0]):
128
- transformed.append((val - mean[i]) / scale[i])
129
  return [transformed]
130
 
131
  # ---------------- 生成解釋 ----------------
132
  def explain_prediction(text):
133
  # 文字向量化
134
  seq = vectorizer([text])
135
- seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre')
136
-
137
  # 統計特徵
138
  feat = compute_features(text)
139
- feat = transform_features(feat)
140
-
141
  # 預測
142
  pred_prob = model.predict([seq, feat], verbose=0)[0][0]
143
  label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
@@ -145,11 +148,11 @@ def explain_prediction(text):
145
 
146
  # 判斷依據
147
  reasons = []
148
- if feat[0][0] > 100: reasons.append("句子長度偏長")
149
  if feat[0][2] > 0.3: reasons.append("重複率高")
150
- if feat[0][1] < 0.2: reasons.append("詞彙多樣性低")
151
- if feat[0][3] < 0.01: reasons.append("標點符號少")
152
- if feat[0][4] > 6: reasons.append("平均詞長偏長")
153
  if not reasons: reasons.append("句子長度與用詞平均")
154
  explanation = ";".join(reasons)
155
 
 
101
 
102
  # ---------------- 載入模型 ----------------
103
  model = tf.keras.models.load_model("AIDetect.h5")
104
+
105
+ # ---------------- 載入詞表 ----------------
106
+ with open("vocab.pkl", "rb") as f:
107
+ vocab = pickle.load(f)
108
+
109
+ # 使用 Keras TextVectorization 來轉換文字
110
+ from tensorflow.keras.layers import TextVectorization
111
+ vectorizer = TextVectorization(max_tokens=len(vocab), output_sequence_length=50)
112
+ vectorizer.set_vocabulary(vocab)
113
 
114
  # ---------------- 純 Python 特徵計算 ----------------
115
  def compute_features(text):
 
121
  punctuation_count = sum(1 for c in text if c in ".,!?;:")
122
  punctuation_ratio = punctuation_count / (len(text) + 1e-6)
123
  avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1)
124
+
125
+ # 簡單縮放:把值縮到大約 -1 ~ 1
126
+ transformed = [
127
+ word_count / 100.0,
128
+ unique_word_ratio * 2 - 1,
129
+ repeat_rate * 2 - 1,
130
+ punctuation_ratio * 100,
131
+ avg_word_length / 10.0
132
+ ]
133
+
134
  return [transformed]
135
 
136
  # ---------------- 生成解釋 ----------------
137
  def explain_prediction(text):
138
  # 文字向量化
139
  seq = vectorizer([text])
140
+
 
141
  # 統計特徵
142
  feat = compute_features(text)
143
+
 
144
  # 預測
145
  pred_prob = model.predict([seq, feat], verbose=0)[0][0]
146
  label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫"
 
148
 
149
  # 判斷依據
150
  reasons = []
151
+ if feat[0][0] > 1.0: reasons.append("句子長度偏長")
152
  if feat[0][2] > 0.3: reasons.append("重複率高")
153
+ if feat[0][1] < -0.6: reasons.append("詞彙多樣性低")
154
+ if feat[0][3] < 1: reasons.append("標點符號少")
155
+ if feat[0][4] > 0.6: reasons.append("平均詞長偏長")
156
  if not reasons: reasons.append("句子長度與用詞平均")
157
  explanation = ";".join(reasons)
158