Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import joblib | |
| import torch | |
| from transformers import GPT2LMHeadModel, GPT2TokenizerFast | |
| import numpy as np | |
| # ------------------------------- | |
| # Load Logistic Regression model | |
| # ------------------------------- | |
| vectorizer = joblib.load('src/vectorizer.pkl') | |
| model = joblib.load('src/logistic_model.pkl') | |
| # ------------------------------- | |
| # Load GPT-2 model and tokenizer | |
| # ------------------------------- | |
| def load_gpt2(): | |
| tokenizer = GPT2TokenizerFast.from_pretrained("src/gpt2_local") | |
| gpt2_model = GPT2LMHeadModel.from_pretrained("src/gpt2_local") | |
| gpt2_model.eval() | |
| return tokenizer, gpt2_model | |
| tokenizer, gpt2_model = load_gpt2() | |
| # ------------------------------- | |
| # Calculate Perplexity | |
| # ------------------------------- | |
| def calculate_perplexity(text): | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) | |
| with torch.no_grad(): | |
| outputs = gpt2_model(**inputs, labels=inputs["input_ids"]) | |
| loss = outputs.loss | |
| perplexity = torch.exp(loss).item() | |
| return perplexity | |
| # ------------------------------- | |
| # Final prediction combining both | |
| # ------------------------------- | |
| def final_score(ai_prob, perplexity): | |
| # Normalize perplexity: higher => more human-like (score 0), lower => more AI-like (score 1) | |
| if perplexity > 300: | |
| perp_score = 0.0 | |
| elif perplexity < 10: | |
| perp_score = 1.0 | |
| else: | |
| perp_score = 1.0 - ((perplexity - 10) / (300 - 10)) # scale to [0,1] | |
| perp_score = max(0.0, min(1.0, perp_score)) | |
| # Combine: 70% weight to perplexity, 30% to logistic regression | |
| final_ai_score = (0.7 * perp_score) + (0.3 * ai_prob) | |
| return final_ai_score, perp_score | |
| # ------------------------------- | |
| # Streamlit UI | |
| # ------------------------------- | |
| st.set_page_config(page_title="AI Text Detector", page_icon="🤖", layout="centered") | |
| st.markdown(""" | |
| <h2 style='text-align: center; color: #4CAF50;'>🤖 AI vs Human Text Detector</h2> | |
| <p style='text-align: center;'>Enter a sentence to check if it was written by a human or generated by AI.</p> | |
| """, unsafe_allow_html=True) | |
| user_input = st.text_area("Enter your sentence here:", height=150) | |
| if st.button("Check"): | |
| if user_input.strip() == "": | |
| st.warning("Please enter a sentence before submitting.") | |
| else: | |
| # Logistic model prediction | |
| transformed_input = vectorizer.transform([user_input]) | |
| prediction = model.predict_proba(transformed_input) | |
| ai_prob = prediction[0][1] | |
| human_prob = prediction[0][0] | |
| # Perplexity calculation | |
| perplexity_score = calculate_perplexity(user_input) | |
| # Combine scores | |
| final_ai_score, perp_score = final_score(ai_prob, perplexity_score) | |
| # Display Results | |
| st.subheader("🔍 Result:") | |
| if final_ai_score > 0.5: | |
| st.error("❗ This text is likely **AI-generated**.") | |
| else: | |
| st.success("✅ This text is likely **Human-written**.") | |
| st.markdown(f"**Logistic Model Confidence:** {ai_prob:.3f} AI vs {human_prob:.3f} Human") | |
| st.markdown(f"**Perplexity Score:** {perplexity_score:.2f}") | |
| st.markdown(f"**Combined AI Score:** {final_ai_score:.3f} (Weighted)") | |
| # Interpretation | |
| if perplexity_score < 30: | |
| st.info("🧠 Low perplexity suggests the text is highly predictable—possibly AI-generated.") | |
| elif perplexity_score > 100: | |
| st.info("🧠 High perplexity suggests human-like variation or complexity.") |