Spaces:

vivv56
/

ayes

Sleeping

App Files Files Community

ayes / src /streamlit_app.py

vivv56

Update src/streamlit_app.py (#1)

430bc04 verified 6 months ago

raw

history blame contribute delete

3.64 kB

	import streamlit as st
	import joblib
	import torch
	from transformers import GPT2LMHeadModel, GPT2TokenizerFast
	import numpy as np

	# -------------------------------
	# Load Logistic Regression model
	# -------------------------------
	vectorizer = joblib.load('src/vectorizer.pkl')
	model = joblib.load('src/logistic_model.pkl')

	# -------------------------------
	# Load GPT-2 model and tokenizer
	# -------------------------------
	@st.cache_resource
	def load_gpt2():
	tokenizer = GPT2TokenizerFast.from_pretrained("src/gpt2_local")
	gpt2_model = GPT2LMHeadModel.from_pretrained(
	"src/gpt2_local",
	trust_remote_code=True,
	local_files_only=True # Use only local files in HF Spaces
	)
	gpt2_model.to(torch.device("cpu")) # Use "cuda" if on GPU space
	gpt2_model.eval()
	return tokenizer, gpt2_model

	tokenizer, gpt2_model = load_gpt2()

	# -------------------------------
	# Calculate Perplexity
	# -------------------------------
	def calculate_perplexity(text):
	inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
	with torch.no_grad():
	outputs = gpt2_model(**inputs, labels=inputs["input_ids"])
	loss = outputs.loss
	perplexity = torch.exp(loss).item()
	return perplexity

	# -------------------------------
	# Combine Scores
	# -------------------------------
	def final_score(ai_prob, perplexity):
	if perplexity > 300:
	perp_score = 0.0
	elif perplexity < 10:
	perp_score = 1.0
	else:
	perp_score = 1.0 - ((perplexity - 10) / (300 - 10))
	perp_score = max(0.0, min(1.0, perp_score))

	final_ai_score = (0.7 * perp_score) + (0.3 * ai_prob)
	return final_ai_score, perp_score

	# -------------------------------
	# Streamlit UI
	# -------------------------------
	st.set_page_config(page_title="AI Text Detector", page_icon="🤖", layout="wide")

	# st.markdown("""
	# <h2 style='text-align: center; color: #4CAF50;'>🤖 AI vs Human Text Detector</h2>
	# <p style='text-align: center;'>Enter a sentence to check if it was written by a human or generated by AI.</p>
	# """, unsafe_allow_html=True)
	# st.markdown("""<br><br><hr><br>""", unsafe_allow_html=True) # Spacer

	user_input = st.text_area("Enter your sentence here:", height=150)

	if st.button("Check"):
	if user_input.strip() == "":
	st.warning("Please enter a sentence before submitting.")
	else:
	# Logistic Regression Prediction
	transformed_input = vectorizer.transform([user_input])
	prediction = model.predict_proba(transformed_input)
	ai_prob = prediction[0][1]
	human_prob = prediction[0][0]

	# Perplexity Score
	perplexity_score = calculate_perplexity(user_input)

	# # Combined Score
	# final_ai_score, perp_score = final_score(ai_prob, perplexity_score)

	# # Result Display
	# st.subheader("🔍 Result:")
	# if final_ai_score > 0.5:
	# st.error("❗ This text is likely AI-generated.")
	# else:
	# st.success("✅ This text is likely Human-written.")

	# st.markdown(f"Logistic Model Confidence: {ai_prob:.3f} AI vs {human_prob:.3f} Human")
	# st.markdown(f"Perplexity Score: {perplexity_score:.2f}")
	# st.markdown(f"Combined AI Score: {final_ai_score:.3f} (Weighted)")

	# Interpretation
	if perplexity_score < 30:
	st.error("🧠 Low perplexity suggests the text is highly predictable—possibly AI-generated.")
	elif perplexity_score > 100:
	st.success("🧠 High perplexity suggests human-like variation or complexity.")