import gradio as gr import joblib import numpy as np import re # 載入你訓練好的 .pkl 模型 model = joblib.load("ai_detector_model.pkl") # 確認路徑正確 # 自訂簡單分句函數 def simple_sent_tokenize(text): # 以句點、問號、驚嘆號拆分,保留句尾符號 sentences = re.split(r'(?<=[.!?])\s+', text.strip()) return [s for s in sentences if s] # 自訂簡單分詞函數 def simple_word_tokenize(text): # 只抓字母和數字組成的單詞 return re.findall(r'\b\w+\b', text.lower()) def extract_features(text): sentences = simple_sent_tokenize(text) words_clean = [w for w in simple_word_tokenize(text) if w.isalpha()] features = {} features['text_length'] = len(text) features['word_count'] = len(words_clean) features['sentence_count'] = len(sentences) features['avg_sentence_length'] = len(words_clean) / max(len(sentences), 1) features['avg_word_length'] = np.mean([len(w) for w in words_clean]) if words_clean else 0 unique_words = set(words_clean) features['vocabulary_richness'] = len(unique_words) / max(len(words_clean), 1) sentence_lengths = [len(simple_word_tokenize(s)) for s in sentences] features['sentence_length_variance'] = np.var(sentence_lengths) if sentence_lengths else 0 features['comma_density'] = text.count(',') / max(len(text), 1) * 1000 features['period_density'] = text.count('.') / max(len(text), 1) * 1000 features['exclamation_density'] = text.count('!') / max(len(text), 1) * 1000 features['question_density'] = text.count('?') / max(len(text), 1) * 1000 complex_words = [w for w in words_clean if len(w) > 6] features['complex_word_ratio'] = len(complex_words) / max(len(words_clean), 1) ai_markers = ['furthermore', 'moreover', 'additionally', 'consequently', 'therefore', 'thus', 'hence'] human_markers = ['i think', 'i believe', 'personally', 'maybe', 'probably', 'actually', 'really'] text_lower = text.lower() features['ai_marker_count'] = sum(text_lower.count(marker) for marker in ai_markers) features['human_marker_count'] = sum(text_lower.count(marker) for marker in human_markers) return np.array(list(features.values())).reshape(1, -1), features def predict(text): if not text.strip(): return "請輸入文字內容。" try: X, feats = extract_features(text) if hasattr(model, "predict_proba"): prob = model.predict_proba(X)[0][1] else: prob = model.predict(X)[0] label = "AI 生成" if prob > 0.5 else "人類撰寫" reason = [] if feats['vocabulary_richness'] < 0.3: reason.append("詞彙多樣性較低") if feats['sentence_length_variance'] < 10: reason.append("句子長度平均,像 AI") if feats['ai_marker_count'] > feats['human_marker_count']: reason.append("包含常見 AI 連接詞") if feats['human_marker_count'] > feats['ai_marker_count']: reason.append("包含主觀語氣詞") if not reason: reason.append("整體語言特徵與模型預測一致") return f"預測結果:{label}\nAI 機率:{prob:.2%}\n判斷依據:{', '.join(reason)}" except Exception as e: return f"預測時出現錯誤: {str(e)}" # Gradio 介面 demo = gr.Interface( fn=predict, inputs=gr.Textbox(label="請輸入文章內容", lines=15, max_lines=50, placeholder="在此輸入文章…"), outputs=gr.Textbox(label="預測結果", lines=15, max_lines=30, placeholder="結果會顯示在這裡…"), title="AI / Human 判斷器", description="上傳的模型為 .pkl 格式,根據語言特徵分析並判斷文本來源" ) demo.launch() '''import gradio as gr import tensorflow as tf import pickle # ---------------- 載入模型 ---------------- try: model = tf.keras.models.load_model("AIDetect.h5") print("✅ 模型載入成功") except Exception as e: print("❌ 模型載入失敗:", e) model = None # ---------------- 載入詞彙 ---------------- try: with open("vocab.pkl", "rb") as f: vocab = pickle.load(f) vectorized_layer = tf.keras.layers.TextVectorization( max_tokens=len(vocab)+1, output_sequence_length=50 ) vectorized_layer.set_vocabulary(vocab) print("✅ 詞彙載入成功") except Exception as e: print("❌ 詞彙載入失敗:", e) vectorized_layer = None # ---------------- 載入 scaler ---------------- try: with open("scaler.pkl", "rb") as f: scaler = pickle.load(f) print("✅ Scaler 載入成功") except Exception as e: print("❌ Scaler 載入失敗:", e) scaler = None # ---------------- 特徵計算 ---------------- def compute_features(text): if isinstance(text, tf.Tensor): text = text.numpy().decode('utf-8') if text.dtype == tf.string else str(text.numpy()) elif isinstance(text, bytes): text = text.decode('utf-8') else: text = str(text) words = text.split() word_count = len(words) unique_words = len(set(words)) unique_word_ratio = unique_words / (word_count + 1e-6) repeat_rate = 1 - unique_word_ratio punctuation_count = sum(1 for c in text if c in ".,!?;:") punctuation_ratio = punctuation_count / (len(text) + 1e-6) avg_word_length = sum(len(w) for w in words) / (word_count if word_count else 1) return [[word_count, unique_word_ratio, repeat_rate, punctuation_ratio, avg_word_length]] # ---------------- 使用 scaler ---------------- def transform_features(feat): if scaler is None: return feat # 如果 scaler 沒載入,就直接回傳原始特徵 return scaler.transform(feat).tolist() # 轉成 list,避免使用 NumPy # ---------------- 生成解釋 ---------------- def explain_prediction(text): if model is None or vectorized_layer is None: return "❌ 模型或詞彙尚未載入,無法預測" try: # ---------------- 特徵計算 ---------------- feat_raw = compute_features(text) feat = transform_features(feat_raw) # ---------------- 文字向量化 ---------------- seq = vectorized_layer([text]) seq = tf.keras.utils.pad_sequences(seq, maxlen=50, padding='pre') # 轉成 TensorFlow tensor seq = tf.convert_to_tensor(seq) feat = tf.convert_to_tensor(feat, dtype=tf.float32) # ---------------- 預測 ---------------- pred_prob = model([seq, feat], training=False).numpy()[0][0] label = "AI 生成" if pred_prob >= 0.5 else "人類撰寫" prob = pred_prob * 100 # ---------------- 判斷依據 ---------------- reasons = [] if feat_raw[0][0] > 100: reasons.append("句子長度偏長") if feat_raw[0][2] > 0.3: reasons.append("重複率高") if feat_raw[0][1] < 0.2: reasons.append("詞彙多樣性低") if feat_raw[0][3] < 0.01: reasons.append("標點符號少") if feat_raw[0][4] > 6: reasons.append("平均詞長偏長") if not reasons: reasons.append("句子長度與用詞平均") explanation = ";".join(reasons) return f"預測結果:{label}\nAI 機率:{prob:.2f}%\n判斷依據:{explanation}" except Exception as e: return f"❌ 預測時發生錯誤: {e}" # ---------------- Gradio 介面 ---------------- iface = gr.Interface( fn=explain_prediction, inputs=gr.Textbox(label="請輸入文章內容", lines=15, max_lines=50, placeholder="在此輸入文章…"), outputs=gr.Textbox(label="預測結果", lines=15, max_lines=30, placeholder="結果會顯示在這裡…"), title="AI vs Human 文本判斷", description="輸入文章,模型會判斷是 AI 或人類撰寫,並給出機率與判斷依據" ) iface.launch()'''