Spaces:

sundaram07
/

AI_Text_Detector

Sleeping

App Files Files Community

AI_Text_Detector / src /streamlit_app.py

sundaram07

Update src/streamlit_app.py

3118f3a verified 4 months ago

raw

history blame contribute delete

3.19 kB

	import streamlit as st
	import tensorflow as tf
	import numpy as np
	import nltk
	import os
	from nltk.tokenize import sent_tokenize
	from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification

	# 📁 Hugging Face cache dir
	os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"

	# 📥 Download NLTK punkt tokenizer
	nltk_data_path = "/tmp/nltk_data"
	nltk.download("punkt_tab", download_dir=nltk_data_path) # ✅ fixed: should be "punkt", not "punkt_tab"
	nltk.data.path.append(nltk_data_path)

	# ✅ Cache the model/tokenizer
	@st.cache_resource
	def load_model_and_tokenizer():
	tokenizer = DistilBertTokenizerFast.from_pretrained(
	"distilbert-base-uncased",
	cache_dir="/tmp/huggingface"
	)
	model = TFDistilBertForSequenceClassification.from_pretrained(
	"sundaram07/distilbert-sentence-classifier",
	cache_dir="/tmp/huggingface"
	)
	return tokenizer, model

	tokenizer, model = load_model_and_tokenizer()

	# 🔮 Predict sentence AI probability
	def predict_sentence_ai_probability(sentence):
	inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
	outputs = model(inputs)
	logits = outputs.logits
	prob_ai = tf.sigmoid(logits)[0][0].numpy()
	return prob_ai

	# 📊 Analyze text
	def predict_ai_generated_percentage(text, threshold=0.15):
	text = text.strip()
	sentences = sent_tokenize(text)
	if len(sentences) == 0:
	return 0.0, []

	ai_sentence_count = 0
	results = []

	for sentence in sentences:
	prob = predict_sentence_ai_probability(sentence)
	is_ai = prob <= threshold
	results.append((sentence, prob, is_ai))
	if is_ai:
	ai_sentence_count += 1

	ai_percentage = (ai_sentence_count / len(sentences)) * 100
	return ai_percentage, results

	# 🖥️ Streamlit UI
	st.set_page_config(page_title="AI Detector", layout="wide")
	st.title("🧠 AI Content Detector")
	st.markdown(
	"This app detects the percentage of AI-generated content using "
	"sentence-level analysis with a fine-tuned DistilBERT model."
	)

	# 📋 Text input
	user_input = st.text_area(
	"📋 Paste your text below to check for AI-generated sentences:",
	height=300
	)

	# 📤 Output placeholder (to clear previous results)
	output_container = st.empty()

	# 🔍 Analyze button logic
	if st.button("🔍 Analyze"):
	if not user_input.strip():
	st.warning("⚠️ Please enter some text.")
	else:
	ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
	if len(analysis_results) == 0:
	st.warning("⚠️ Not enough valid sentences to analyze.")
	else:
	with output_container.container():
	st.subheader("🔍 Sentence-level Analysis")
	for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
	label = "🟢 Human" if not is_ai else "🔴 AI"
	st.markdown(f"{i}. _{sentence}_\n\n → {label} (prob={prob:.3f})")

	st.subheader("📊 Final Result")
	st.success(f"Estimated AI-generated content: {ai_percentage:.2f}%")