ayes / src /streamlit_app.py
vivv56's picture
Update src/streamlit_app.py (#1)
430bc04 verified
import streamlit as st
import joblib
import torch
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import numpy as np
# -------------------------------
# Load Logistic Regression model
# -------------------------------
vectorizer = joblib.load('src/vectorizer.pkl')
model = joblib.load('src/logistic_model.pkl')
# -------------------------------
# Load GPT-2 model and tokenizer
# -------------------------------
@st.cache_resource
def load_gpt2():
tokenizer = GPT2TokenizerFast.from_pretrained("src/gpt2_local")
gpt2_model = GPT2LMHeadModel.from_pretrained(
"src/gpt2_local",
trust_remote_code=True,
local_files_only=True # Use only local files in HF Spaces
)
gpt2_model.to(torch.device("cpu")) # Use "cuda" if on GPU space
gpt2_model.eval()
return tokenizer, gpt2_model
tokenizer, gpt2_model = load_gpt2()
# -------------------------------
# Calculate Perplexity
# -------------------------------
def calculate_perplexity(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = gpt2_model(**inputs, labels=inputs["input_ids"])
loss = outputs.loss
perplexity = torch.exp(loss).item()
return perplexity
# -------------------------------
# Combine Scores
# -------------------------------
def final_score(ai_prob, perplexity):
if perplexity > 300:
perp_score = 0.0
elif perplexity < 10:
perp_score = 1.0
else:
perp_score = 1.0 - ((perplexity - 10) / (300 - 10))
perp_score = max(0.0, min(1.0, perp_score))
final_ai_score = (0.7 * perp_score) + (0.3 * ai_prob)
return final_ai_score, perp_score
# -------------------------------
# Streamlit UI
# -------------------------------
st.set_page_config(page_title="AI Text Detector", page_icon="🤖", layout="wide")
# st.markdown("""
# <h2 style='text-align: center; color: #4CAF50;'>🤖 AI vs Human Text Detector</h2>
# <p style='text-align: center;'>Enter a sentence to check if it was written by a human or generated by AI.</p>
# """, unsafe_allow_html=True)
# st.markdown("""<br><br><hr><br>""", unsafe_allow_html=True) # Spacer
user_input = st.text_area("Enter your sentence here:", height=150)
if st.button("Check"):
if user_input.strip() == "":
st.warning("Please enter a sentence before submitting.")
else:
# Logistic Regression Prediction
transformed_input = vectorizer.transform([user_input])
prediction = model.predict_proba(transformed_input)
ai_prob = prediction[0][1]
human_prob = prediction[0][0]
# Perplexity Score
perplexity_score = calculate_perplexity(user_input)
# # Combined Score
# final_ai_score, perp_score = final_score(ai_prob, perplexity_score)
# # Result Display
# st.subheader("🔍 Result:")
# if final_ai_score > 0.5:
# st.error("❗ This text is likely **AI-generated**.")
# else:
# st.success("✅ This text is likely **Human-written**.")
# st.markdown(f"**Logistic Model Confidence:** {ai_prob:.3f} AI vs {human_prob:.3f} Human")
# st.markdown(f"**Perplexity Score:** {perplexity_score:.2f}")
# st.markdown(f"**Combined AI Score:** {final_ai_score:.3f} (Weighted)")
# Interpretation
if perplexity_score < 30:
st.error("🧠 Low perplexity suggests the text is highly predictable—possibly AI-generated.")
elif perplexity_score > 100:
st.success("🧠 High perplexity suggests human-like variation or complexity.")