Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import numpy as np
|
|
| 4 |
import re
|
| 5 |
import nltk
|
| 6 |
import os
|
|
|
|
| 7 |
|
| 8 |
# 建議用使用者目錄,確保可寫入權限
|
| 9 |
nltk_data_dir = os.path.join(os.path.expanduser("~"), "nltk_data")
|
|
@@ -19,9 +20,7 @@ except LookupError:
|
|
| 19 |
nltk.download("punkt", download_dir=nltk_data_dir, quiet=True)
|
| 20 |
|
| 21 |
# 載入你訓練好的 .pkl 模型
|
| 22 |
-
model = joblib.load("ai_detector_model.pkl") # 請確認檔名正確
|
| 23 |
-
|
| 24 |
-
from nltk.tokenize import sent_tokenize
|
| 25 |
|
| 26 |
def extract_features(text):
|
| 27 |
# 使用 sent_tokenize 分句
|
|
@@ -40,7 +39,6 @@ def extract_features(text):
|
|
| 40 |
unique_words = set(words_clean)
|
| 41 |
features['vocabulary_richness'] = len(unique_words) / max(len(words_clean), 1)
|
| 42 |
|
| 43 |
-
# 使用 word_tokenize 也明確指定
|
| 44 |
sentence_lengths = [len(word_tokenize(s)) for s in sentences]
|
| 45 |
features['sentence_length_variance'] = np.var(sentence_lengths) if sentence_lengths else 0
|
| 46 |
|
|
@@ -94,7 +92,8 @@ def predict(text):
|
|
| 94 |
demo = gr.Interface(
|
| 95 |
fn=predict,
|
| 96 |
inputs=gr.Textbox(label="請輸入文章內容", lines=15, max_lines=50, placeholder="在此輸入文章…"),
|
| 97 |
-
outputs=gr.Textbox(label="預測結果", lines=15, max_lines=30, placeholder="結果會顯示在這裡…"),
|
|
|
|
| 98 |
description="上傳的模型為 .pkl 格式,根據語言特徵分析並判斷文本來源"
|
| 99 |
)
|
| 100 |
|
|
|
|
| 4 |
import re
|
| 5 |
import nltk
|
| 6 |
import os
|
| 7 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 8 |
|
| 9 |
# 建議用使用者目錄,確保可寫入權限
|
| 10 |
nltk_data_dir = os.path.join(os.path.expanduser("~"), "nltk_data")
|
|
|
|
| 20 |
nltk.download("punkt", download_dir=nltk_data_dir, quiet=True)
|
| 21 |
|
| 22 |
# 載入你訓練好的 .pkl 模型
|
| 23 |
+
model = joblib.load("ai_detector_model.pkl") # 請確認檔名正確
|
|
|
|
|
|
|
| 24 |
|
| 25 |
def extract_features(text):
|
| 26 |
# 使用 sent_tokenize 分句
|
|
|
|
| 39 |
unique_words = set(words_clean)
|
| 40 |
features['vocabulary_richness'] = len(unique_words) / max(len(words_clean), 1)
|
| 41 |
|
|
|
|
| 42 |
sentence_lengths = [len(word_tokenize(s)) for s in sentences]
|
| 43 |
features['sentence_length_variance'] = np.var(sentence_lengths) if sentence_lengths else 0
|
| 44 |
|
|
|
|
| 92 |
demo = gr.Interface(
|
| 93 |
fn=predict,
|
| 94 |
inputs=gr.Textbox(label="請輸入文章內容", lines=15, max_lines=50, placeholder="在此輸入文章…"),
|
| 95 |
+
outputs=gr.Textbox(label="預測結果", lines=15, max_lines=30, placeholder="結果會顯示在這裡…"),
|
| 96 |
+
title="AI / Human 判斷器",
|
| 97 |
description="上傳的模型為 .pkl 格式,根據語言特徵分析並判斷文本來源"
|
| 98 |
)
|
| 99 |
|