import streamlit as st import joblib import torch from transformers import GPT2LMHeadModel, GPT2TokenizerFast import numpy as np # ------------------------------- # Load Logistic Regression model # ------------------------------- vectorizer = joblib.load('src/vectorizer.pkl') model = joblib.load('src/logistic_model.pkl') # ------------------------------- # Load GPT-2 model and tokenizer # ------------------------------- @st.cache_resource def load_gpt2(): tokenizer = GPT2TokenizerFast.from_pretrained("src/gpt2_local") gpt2_model = GPT2LMHeadModel.from_pretrained( "src/gpt2_local", trust_remote_code=True, local_files_only=True # Use only local files in HF Spaces ) gpt2_model.to(torch.device("cpu")) # Use "cuda" if on GPU space gpt2_model.eval() return tokenizer, gpt2_model tokenizer, gpt2_model = load_gpt2() # ------------------------------- # Calculate Perplexity # ------------------------------- def calculate_perplexity(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) with torch.no_grad(): outputs = gpt2_model(**inputs, labels=inputs["input_ids"]) loss = outputs.loss perplexity = torch.exp(loss).item() return perplexity # ------------------------------- # Combine Scores # ------------------------------- def final_score(ai_prob, perplexity): if perplexity > 300: perp_score = 0.0 elif perplexity < 10: perp_score = 1.0 else: perp_score = 1.0 - ((perplexity - 10) / (300 - 10)) perp_score = max(0.0, min(1.0, perp_score)) final_ai_score = (0.7 * perp_score) + (0.3 * ai_prob) return final_ai_score, perp_score # ------------------------------- # Streamlit UI # ------------------------------- st.set_page_config(page_title="AI Text Detector", page_icon="🤖", layout="wide") # st.markdown(""" #

🤖 AI vs Human Text Detector

Enter a sentence to check if it was written by a human or generated by AI.

# """, unsafe_allow_html=True) # st.markdown("""

""", unsafe_allow_html=True) # Spacer user_input = st.text_area("Enter your sentence here:", height=150) if st.button("Check"): if user_input.strip() == "": st.warning("Please enter a sentence before submitting.") else: # Logistic Regression Prediction transformed_input = vectorizer.transform([user_input]) prediction = model.predict_proba(transformed_input) ai_prob = prediction[0][1] human_prob = prediction[0][0] # Perplexity Score perplexity_score = calculate_perplexity(user_input) # # Combined Score # final_ai_score, perp_score = final_score(ai_prob, perplexity_score) # # Result Display # st.subheader("🔍 Result:") # if final_ai_score > 0.5: # st.error("❗ This text is likely **AI-generated**.") # else: # st.success("✅ This text is likely **Human-written**.") # st.markdown(f"**Logistic Model Confidence:** {ai_prob:.3f} AI vs {human_prob:.3f} Human") # st.markdown(f"**Perplexity Score:** {perplexity_score:.2f}") # st.markdown(f"**Combined AI Score:** {final_ai_score:.3f} (Weighted)") # Interpretation if perplexity_score < 30: st.error("🧠 Low perplexity suggests the text is highly predictable—possibly AI-generated.") elif perplexity_score > 100: st.success("🧠 High perplexity suggests human-like variation or complexity.")