zay12121's picture
Update app.py
3cf1d98 verified
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
import torch
import numpy as np
# --- Streamlit Page Configuration ---
st.set_page_config(layout="wide", page_title="Free AI Detector")
# --- Custom CSS for Enhanced Aesthetics ---
st.markdown(
"""
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700&display=swap');
html, body, [class*="st-emotion"] {
font-family: 'Inter', sans-serif;
color: #333333;
}
.stApp {
background: linear-gradient(to right, #ffebee, #e1bee7);
}
.st-emotion-cache-1cypcdb {
padding: 2rem;
border-radius: 1rem;
background-color: rgba(255, 255, 255, 0.9);
box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
}
.st-emotion-cache-1dp5vir {
background: linear-gradient(to bottom, #d8bfd8, #e6e6fa);
border-radius: 1.5rem;
box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
padding: 1.5rem;
}
.st-emotion-cache-1dp5vir .st-emotion-cache-vk330y {
color: #4a004a;
}
.stTextArea textarea {
border-radius: 0.75rem;
border: 1px solid #e0b0ff;
padding: 1rem;
box-shadow: inset 0 1px 3px 0 rgba(0, 0, 0, 0.05);
color: #333333;
}
.stButton > button {
background-color: #8e24aa;
color: white;
border-radius: 0.75rem;
padding: 0.75rem 1.5rem;
font-weight: 600;
transition: all 0.2s ease-in-out;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -2px rgba(0, 0, 0, 0.06);
}
.stButton > button:hover {
background-color: #6a1b9a;
transform: translateY(-2px);
box-shadow: 0 6px 10px -2px rgba(0, 0, 0, 0.15), 0 4px 6px -3px rgba(0, 0, 0, 0.08);
}
.stSelectbox > div > div {
border-radius: 0.75rem;
border: 1px solid #e0b0ff;
box-shadow: inset 0 1px 3px 0 rgba(0, 0, 0, 0.05);
color: #333333;
}
.stSelectbox > div > div > div > div {
color: #333333;
}
.stMetric {
background-color: #f3e5f5;
border-radius: 0.75rem;
padding: 1rem;
margin-bottom: 1rem;
box-shadow: 0 2px 4px -1px rgba(0, 0, 0, 0.05), 0 1px 2px -1px rgba(0, 0, 0, 0.03);
}
.stMetric label {
color: #4a004a;
font-weight: 600;
}
.stMetric .css-10trblm {
color: #8e24aa;
font-size: 2.25rem;
font-weight: 700;
}
.stAlert {
border-radius: 0.75rem;
padding: 1rem 1.5rem;
margin-top: 1rem;
font-weight: 500;
}
.stAlert.st-emotion-cache-1f062a7 {
background-color: #ffebee !important;
border-left: 5px solid #d32f2f !important;
color: #d32f2f !important;
}
.stAlert.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7 {
background-color: #fff8e1 !important;
border-left: 5px solid #ffa000 !important;
color: #ffa000 !important;
}
.stAlert.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7 {
background-color: #e8f5e9 !important;
border-left: 5px solid #388e3c !important;
color: #388e3c !important;
}
.stAlert.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7 {
background-color: #e3f2fd !important;
border-left: 5px solid #1976d2 !important;
color: #1976d2 !important;
}
h1, h2, h3, h4, h5, h6 {
color: #4a004a;
font-weight: 700;
}
h1 {
font-size: 2.5rem;
margin-bottom: 1rem;
}
h2 {
font-size: 2rem;
margin-top: 1.5rem;
margin-bottom: 1rem;
}
h3 {
font-size: 1.5rem;
margin-top: 1.25rem;
margin-bottom: 0.75rem;
}
.st-emotion-cache-10trblm p {
line-height: 1.7;
font-size: 1.05rem;
color: #333333;
}
.st-emotion-cache-10trblm li {
line-height: 1.6;
font-size: 1.05rem;
color: #333333;
}
.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7.st-emotion-cache-1f062a7 {
color: #1976d2 !important;
}
.st-emotion-cache-1r6y40 {
border-radius: 0.5rem;
overflow: hidden;
margin-top: 0.5rem;
margin-bottom: 1rem;
height: 1.5rem;
background-color: #e0e0e0;
}
.st-emotion-cache-1r6y40 > div > div {
background-color: #8e24aa;
border-radius: 0.5rem;
}
</style>
""",
unsafe_allow_html=True
)
st.title("Free AI Detector (Public Models)")
st.markdown("Analyze text to determine the likelihood of it being AI-generated or human-written using publicly available models.")
@st.cache_resource
def load_model_and_tokenizer(model_name, is_causal_lm=False):
"""
Loads a model and tokenizer from Hugging Face.
Added a flag for causal language models (for perplexity).
Ensures no authentication is needed.
Moves the model to the appropriate device (GPU/CPU) immediately.
"""
try:
tokenizer = AutoTokenizer.from_pretrained(model_name)
if is_causal_lm:
model = AutoModelForCausalLM.from_pretrained(model_name)
# Ensure causal LMs have a padding token for perplexity calculation
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token # Use EOS token as pad token if not defined
st.info(f"Set pad_token to eos_token for {model_name} tokenizer.")
else:
model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
st.success(f"Successfully loaded model: {model_name} on {device}")
return tokenizer, model, device
except Exception as e:
st.error(f"Error loading model '{model_name}'. Please check the model name, ensure it's public, and verify your internet connection. Error: {e}")
return None, None, None
# --- Classification-Based AI Detector Models ---
# Using stable, widely-used public models.
CLASSIFICATION_DETECTOR_MODELS = [
("roberta-base-openai-detector", "OpenAI AI Detector (RoBERTa-base)"),
("Hello-SimpleAI/chatgpt-detector-roberta", "ChatGPT Detector (RoBERTa-base)"),
]
# --- Perplexity Models (Causal Language Models) ---
PERPLEXITY_MODELS = [
("gpt2", "GPT-2 (for Perplexity)"),
("distilgpt2", "DistilGPT-2 (for Perplexity) - Faster"),
("EleutherAI/gpt-neo-125M", "GPT-Neo-125M (for Perplexity)"),
]
# Load classification models with a spinner for user feedback
CLASSIFICATION_MODELS_LOADED = {}
with st.spinner("Loading classification AI detection models... This may take a moment."):
for name, label in CLASSIFICATION_DETECTOR_MODELS:
tokenizer, model, device = load_model_and_tokenizer(name)
if tokenizer and model:
CLASSIFICATION_MODELS_LOADED[label] = (tokenizer, model, device)
# Load perplexity models with a spinner for user feedback
PERPLEXITY_MODELS_LOADED = {}
with st.spinner("Loading perplexity models..."):
for name, label in PERPLEXITY_MODELS:
tokenizer, model, device = load_model_and_tokenizer(name, is_causal_lm=True)
if tokenizer and model:
PERPLEXITY_MODELS_LOADED[label] = (tokenizer, model, device)
# Check if any models loaded successfully
if not CLASSIFICATION_MODELS_LOADED and not PERPLEXITY_MODELS_LOADED:
st.error("No AI detection or perplexity models could be loaded. Please check your internet connection or the model names.")
st.stop()
# --- UI for Model Selection (Sidebar) ---
st.sidebar.header("Model Settings")
# Ensure the first available model is pre-selected if the dictionary is not empty
default_classification_index = 0
if CLASSIFICATION_MODELS_LOADED:
pass
model_choice = st.sidebar.selectbox(
"Choose a Classification-Based AI Detector:",
list(CLASSIFICATION_MODELS_LOADED.keys()),
index=default_classification_index
)
tokenizer_detector, model_detector, device_detector = CLASSIFICATION_MODELS_LOADED[model_choice]
perplexity_model_choice = st.sidebar.selectbox(
"Choose a Perplexity Model (Optional, for additional insight):",
list(PERPLEXITY_MODELS_LOADED.keys()))
tokenizer_perplexity, model_perplexity, device_perplexity = PERPLEXITY_MODELS_LOADED[perplexity_model_choice]
# --- Text Input ---
st.header("1. Enter Text to Analyze")
text = st.text_area(
"Paste your text here:",
height=300,
placeholder="Type or paste text here to detect if it's AI-generated or human-written."
)
# Debugging: Show current text input
st.info(f"Current text input length: {len(text)} characters.")
if text.strip():
st.info(f"First 50 characters of input: '{text.strip()[:50]}...'")
# --- Perplexity Calculation Function ---
def calculate_perplexity(text, tokenizer, model, device):
"""
Calculates the perplexity of the given text using a causal language model.
Lower perplexity often indicates more predictable (and thus potentially AI-generated) text.
"""
if not text.strip():
return None
try:
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=tokenizer.model_max_length, padding=True)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs, labels=inputs["input_ids"])
loss = outputs.loss
perplexity = torch.exp(loss).item()
return perplexity
except Exception as e:
st.warning(f"Could not calculate perplexity with {tokenizer.name_or_path}: {e}")
return None
# --- Detection Logic ---
if st.button("Detect AI"):
st.write("Button clicked! Starting analysis...")
if not text.strip():
st.warning("Please enter some text to analyze.")
else:
st.subheader("Detection Results:")
with st.spinner("Analyzing text..."):
# --- Classification-Based Detection ---
st.markdown("### πŸ” Classification Model Analysis")
all_ai_probs = [] # To store probabilities for ensembling
# Run detection for all loaded classification models
for label, (current_tokenizer, current_model, current_device) in CLASSIFICATION_MODELS_LOADED.items():
st.markdown(f"#### Results from: {label}")
try:
if len(current_tokenizer.encode(text)) > current_tokenizer.model_max_length:
st.info(f"Note: Your text was truncated to {current_tokenizer.model_max_length} tokens for analysis by {label}.")
inputs_detector = current_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
inputs_detector = {k: v.to(current_device) for k, v in inputs_detector.items()}
with torch.no_grad():
outputs_detector = current_model(**inputs_detector)
logits_detector = outputs_detector.logits
probs_detector = torch.softmax(logits_detector, dim=1).cpu().numpy()[0]
ai_prob_classifier = float(probs_detector[1])
human_prob_classifier = float(probs_detector[0])
all_ai_probs.append(ai_prob_classifier)
col1, col2 = st.columns(2)
with col1:
st.metric(label="AI Probability", value=f"{ai_prob_classifier:.2%}")
st.progress(ai_prob_classifier)
with col2:
st.metric(label="Human Probability", value=f"{human_prob_classifier:.2%}")
st.progress(human_prob_classifier)
# Provide direct interpretation for each model
if ai_prob_classifier > 0.8:
st.error(f"🚨 This text is **highly likely** to be AI-generated by {label}.", icon="πŸ€–")
elif ai_prob_classifier > 0.5:
st.warning(f"⚠️ This text is **likely** AI-generated by {label}, but could have human elements or be a mix.", icon="πŸ€–")
else:
st.success(f"βœ… This text is **likely human-written** by {label}.", icon="πŸ‘€")
except Exception as e:
st.error(f"Failed to run classification detection with '{label}': {e}")
st.exception(e)
st.markdown("---") # Separator for individual model results
# --- Ensemble Prediction ---
if all_ai_probs:
ensemble_ai_prob = np.mean(all_ai_probs)
ensemble_human_prob = 1.0 - ensemble_ai_prob
st.markdown("### πŸ“Š Overall Ensemble Prediction")
col1, col2 = st.columns(2)
with col1:
st.metric(label="Ensemble AI Probability (Average)", value=f"{ensemble_ai_prob:.2%}")
st.progress(ensemble_ai_prob)
with col2:
st.metric(label="Ensemble Human Probability (Average)", value=f"{ensemble_human_prob:.2%}")
st.progress(ensemble_human_prob)
# More nuanced interpretation for the ensemble
if ensemble_ai_prob > 0.9:
st.error("🚨 **Very High Confidence: Highly Likely AI-generated.**", icon="πŸ€–")
st.markdown("*(This text shows very strong patterns consistent with AI generation across multiple models. It is highly improbable to be human-written.)*")
elif ensemble_ai_prob > 0.7:
st.warning("🚨 **High Confidence: Likely AI-generated.**", icon="πŸ€–")
st.markdown("*(This text exhibits strong characteristics of AI generation. While not definitive, the evidence points towards AI authorship.)*")
elif ensemble_ai_prob > 0.5:
st.warning("⚠️ **Moderate Confidence: Possibly AI-generated or Mixed Content.**", icon="πŸ€–")
st.markdown("*(This text has some AI-like patterns, but also human elements. It could be AI-generated, heavily edited human text, or a mix. Human review is recommended.)*")
elif ensemble_ai_prob > 0.3:
st.info("πŸ’‘ **Moderate Confidence: Likely Human-written with some AI-like traits.**", icon="πŸ‘€")
st.markdown("*(This text is generally consistent with human writing, but contains minor patterns that might be seen in AI-generated content. This could be due to common phrases or stylistic conventions.)*")
else:
st.success("βœ… **High Confidence: Likely Human-written.**", icon="πŸ‘€")
st.markdown("*(This text's patterns are highly consistent with human authorship. It is very unlikely to be AI-generated.)*")
st.markdown(
"""
**Understanding Confidence:**
* **High Confidence:** The ensemble models largely agree on the classification.
* **Moderate Confidence:** The ensemble models show some disagreement, or the text exhibits characteristics of both human and AI writing.
"""
)
else:
st.warning("No classification models could provide a prediction for ensembling.")
st.divider()
# --- Perplexity-Based Detection ---
st.markdown("### πŸ“Š Perplexity Analysis (Lower is more AI-like)")
perplexity = calculate_perplexity(text, tokenizer_perplexity, model_perplexity, device_perplexity)
if perplexity is not None:
st.metric(label=f"Perplexity Score ({perplexity_model_choice})", value=f"{perplexity:.2f}")
st.markdown(
"""
**Perplexity Interpretation:**
* Perplexity measures how "surprised" a language model is by a sequence of words.
* **Lower perplexity** suggests the text is more predictable to the model, which is often a characteristic of AI-generated content (as AI models tend to produce highly probable sequences).
* **Higher perplexity** indicates more unexpected word choices, typical of diverse human writing.
* *Note:* There's no fixed universal threshold for perplexity. It's a relative indicator that depends on the model used and the type of text.
"""
)
else:
st.info("Could not calculate perplexity for the given text or model. This can happen with very short texts or certain model limitations.")
st.markdown("---")
st.markdown("### How this detector works:")
st.markdown(
"""
This tool employs two main methods for AI content detection, using models available on Hugging Face:
1. **Classification-based Models (Ensembled):** We utilize multiple fine-tuned models (like `roberta-base-openai-detector` and `Hello-SimpleAI/chatgpt-detector-roberta`) trained on datasets of both human-written and AI-generated text. Their individual predictions are then averaged to provide a more robust "Ensemble AI Probability." This helps to reduce false positives and negatives by leveraging the collective intelligence of several models.
2. **Perplexity-based Analysis:** This method uses a causal language model (like GPT-2) to measure the "surprise" factor of the text. AI-generated text often has lower perplexity because large language models tend to produce highly probable, less "surprising" word sequences. Human text, with its inherent creativity and variability, often results in higher perplexity.
**Important Considerations:**
* **No detector is 100% accurate.** AI generation and detection is an ongoing "arms race."
* **Short texts** are generally harder to classify reliably.
* **Newer AI models** may produce text that is harder for existing detectors to identify.
* Results from different models might vary. It's best to use them as indicators, not definitive proof.
""")
st.markdown("---")
st.markdown("Made with ❀️ using Streamlit and Hugging Face Transformers.")