Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +9 -6
src/streamlit_app.py
CHANGED
|
@@ -4,25 +4,28 @@ import numpy as np
|
|
| 4 |
import nltk
|
| 5 |
import os
|
| 6 |
from nltk.tokenize import sent_tokenize
|
|
|
|
| 7 |
|
| 8 |
# 🧠 Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
|
| 9 |
nltk_data_path = "/tmp/nltk_data"
|
| 10 |
nltk.download("punkt", download_dir=nltk_data_path)
|
| 11 |
nltk.data.path.append(nltk_data_path)
|
| 12 |
|
| 13 |
-
# 📦 Load model
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
# 🧠 Predict probability for one sentence
|
| 17 |
def predict_sentence_ai_probability(sentence):
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
return prob_ai
|
| 22 |
|
| 23 |
# 📊 Analyze full text
|
| 24 |
def predict_ai_generated_percentage(text, threshold=0.75):
|
| 25 |
-
text = text.strip()
|
| 26 |
sentences = sent_tokenize(text)
|
| 27 |
ai_sentence_count = 0
|
| 28 |
results = []
|
|
|
|
| 4 |
import nltk
|
| 5 |
import os
|
| 6 |
from nltk.tokenize import sent_tokenize
|
| 7 |
+
from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
|
| 8 |
|
| 9 |
# 🧠 Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
|
| 10 |
nltk_data_path = "/tmp/nltk_data"
|
| 11 |
nltk.download("punkt", download_dir=nltk_data_path)
|
| 12 |
nltk.data.path.append(nltk_data_path)
|
| 13 |
|
| 14 |
+
# 📦 Load tokenizer and model
|
| 15 |
+
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
|
| 16 |
+
model = TFDistilBertForSequenceClassification.from_pretrained("sundaram07/distilbert-sentence-classifier")
|
| 17 |
|
| 18 |
# 🧠 Predict probability for one sentence
|
| 19 |
def predict_sentence_ai_probability(sentence):
|
| 20 |
+
inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
|
| 21 |
+
outputs = model(inputs)
|
| 22 |
+
logits = outputs.logits
|
| 23 |
+
prob_ai = tf.sigmoid(logits)[0][0].numpy() # Assuming binary classification (single neuron)
|
| 24 |
return prob_ai
|
| 25 |
|
| 26 |
# 📊 Analyze full text
|
| 27 |
def predict_ai_generated_percentage(text, threshold=0.75):
|
| 28 |
+
text = text.strip()
|
| 29 |
sentences = sent_tokenize(text)
|
| 30 |
ai_sentence_count = 0
|
| 31 |
results = []
|