Spaces:

sundaram07
/

AI_Text_Detector

Sleeping

App Files Files Community

sundaram07 commited on Jun 28, 2025

Commit

f973f9e

verified ·

1 Parent(s): 283cc15

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +17 -13

src/streamlit_app.py CHANGED Viewed

@@ -6,24 +6,27 @@ import os
 from nltk.tokenize import sent_tokenize
 from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
-# 🧠 Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
 nltk_data_path = "/tmp/nltk_data"
 nltk.download("punkt", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
-# 📦 Load tokenizer and model
-tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
-model = TFDistilBertForSequenceClassification.from_pretrained("sundaram07/distilbert-sentence-classifier")
-# 🧠 Predict probability for one sentence
 def predict_sentence_ai_probability(sentence):
     inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
     outputs = model(inputs)
     logits = outputs.logits
-    prob_ai = tf.sigmoid(logits)[0][0].numpy()  # Assuming binary classification (single neuron)
     return prob_ai
-# 📊 Analyze full text
 def predict_ai_generated_percentage(text, threshold=0.75):
     text = text.strip()
     sentences = sent_tokenize(text)
@@ -41,19 +44,20 @@ def predict_ai_generated_percentage(text, threshold=0.75):
     ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
     return ai_percentage, results
-# 🚀 Streamlit UI
 st.title("🧠 AI Content Detector")
-st.markdown("This tool detects the percentage of **AI-generated content** based on sentence-level analysis.")
-user_input = st.text_area("📋 Paste your text here:", height=300)
 if st.button("🔍 Analyze"):
-    if user_input.strip() == "":
         st.warning("⚠️ Please enter some text to analyze.")
     else:
         ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
-        st.subheader("🔎 Sentence-level Analysis")
         for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
             label = "🟢 Human" if not is_ai else "🔴 AI"
             st.markdown(f"**{i}.** _{sentence}_\n\n→ **Probability AI:** `{prob:.2%}` → {label}")

 from nltk.tokenize import sent_tokenize
 from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
+# 📁 Use safe cache directory inside Hugging Face or Docker
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
+# 📥 Download NLTK tokenizer
 nltk_data_path = "/tmp/nltk_data"
 nltk.download("punkt", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
+# 🔄 Load tokenizer and model from Hugging Face
+tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased", cache_dir="/tmp/huggingface")
+model = TFDistilBertForSequenceClassification.from_pretrained("sundaram07/distilbert-sentence-classifier", cache_dir="/tmp/huggingface")
+# 🔮 Predict AI probability for a sentence
 def predict_sentence_ai_probability(sentence):
     inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
     outputs = model(inputs)
     logits = outputs.logits
+    prob_ai = tf.sigmoid(logits)[0][0].numpy()  # for binary classification
     return prob_ai
+# 📊 Analyze all sentences
 def predict_ai_generated_percentage(text, threshold=0.75):
     text = text.strip()
     sentences = sent_tokenize(text)
     ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
     return ai_percentage, results
+# 🌐 Streamlit Web App
+st.set_page_config(page_title="AI Detector", layout="wide")
 st.title("🧠 AI Content Detector")
+st.markdown("This app detects the percentage of **AI-generated content** based on sentence-level analysis using DistilBERT.")
+user_input = st.text_area("📋 Paste your text below to check for AI-generated sentences:", height=300)
 if st.button("🔍 Analyze"):
+    if not user_input.strip():
         st.warning("⚠️ Please enter some text to analyze.")
     else:
         ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
+        st.subheader("🔍 Sentence-level Analysis")
         for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
             label = "🟢 Human" if not is_ai else "🔴 AI"
             st.markdown(f"**{i}.** _{sentence}_\n\n→ **Probability AI:** `{prob:.2%}` → {label}")