Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import joblib | |
| import torch | |
| from transformers import GPT2LMHeadModel, GPT2TokenizerFast | |
| import numpy as np | |
| # ------------------------------- | |
| # Load Logistic Regression model | |
| # ------------------------------- | |
| vectorizer = joblib.load('src/vectorizer.pkl') | |
| model = joblib.load('src/logistic_model.pkl') | |
| # ------------------------------- | |
| # Load GPT-2 model and tokenizer | |
| # ------------------------------- | |
| def load_gpt2(): | |
| tokenizer = GPT2TokenizerFast.from_pretrained("src/gpt2_local") | |
| gpt2_model = GPT2LMHeadModel.from_pretrained( | |
| "src/gpt2_local", | |
| trust_remote_code=True, | |
| local_files_only=True # Use only local files in HF Spaces | |
| ) | |
| gpt2_model.to(torch.device("cpu")) # Use "cuda" if on GPU space | |
| gpt2_model.eval() | |
| return tokenizer, gpt2_model | |
| tokenizer, gpt2_model = load_gpt2() | |
| # ------------------------------- | |
| # Calculate Perplexity | |
| # ------------------------------- | |
| def calculate_perplexity(text): | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) | |
| with torch.no_grad(): | |
| outputs = gpt2_model(**inputs, labels=inputs["input_ids"]) | |
| loss = outputs.loss | |
| perplexity = torch.exp(loss).item() | |
| return perplexity | |
| # ------------------------------- | |
| # Combine Scores | |
| # ------------------------------- | |
| def final_score(ai_prob, perplexity): | |
| if perplexity > 300: | |
| perp_score = 0.0 | |
| elif perplexity < 10: | |
| perp_score = 1.0 | |
| else: | |
| perp_score = 1.0 - ((perplexity - 10) / (300 - 10)) | |
| perp_score = max(0.0, min(1.0, perp_score)) | |
| final_ai_score = (0.7 * perp_score) + (0.3 * ai_prob) | |
| return final_ai_score, perp_score | |
| # ------------------------------- | |
| # Streamlit UI | |
| # ------------------------------- | |
| st.set_page_config(page_title="AI Text Detector", page_icon="🤖", layout="wide") | |
| # st.markdown(""" | |
| # <h2 style='text-align: center; color: #4CAF50;'>🤖 AI vs Human Text Detector</h2> | |
| # <p style='text-align: center;'>Enter a sentence to check if it was written by a human or generated by AI.</p> | |
| # """, unsafe_allow_html=True) | |
| # st.markdown("""<br><br><hr><br>""", unsafe_allow_html=True) # Spacer | |
| user_input = st.text_area("Enter your sentence here:", height=150) | |
| if st.button("Check"): | |
| if user_input.strip() == "": | |
| st.warning("Please enter a sentence before submitting.") | |
| else: | |
| # Logistic Regression Prediction | |
| transformed_input = vectorizer.transform([user_input]) | |
| prediction = model.predict_proba(transformed_input) | |
| ai_prob = prediction[0][1] | |
| human_prob = prediction[0][0] | |
| # Perplexity Score | |
| perplexity_score = calculate_perplexity(user_input) | |
| # # Combined Score | |
| # final_ai_score, perp_score = final_score(ai_prob, perplexity_score) | |
| # # Result Display | |
| # st.subheader("🔍 Result:") | |
| # if final_ai_score > 0.5: | |
| # st.error("❗ This text is likely **AI-generated**.") | |
| # else: | |
| # st.success("✅ This text is likely **Human-written**.") | |
| # st.markdown(f"**Logistic Model Confidence:** {ai_prob:.3f} AI vs {human_prob:.3f} Human") | |
| # st.markdown(f"**Perplexity Score:** {perplexity_score:.2f}") | |
| # st.markdown(f"**Combined AI Score:** {final_ai_score:.3f} (Weighted)") | |
| # Interpretation | |
| if perplexity_score < 30: | |
| st.error("🧠 Low perplexity suggests the text is highly predictable—possibly AI-generated.") | |
| elif perplexity_score > 100: | |
| st.success("🧠 High perplexity suggests human-like variation or complexity.") | |