vivv56 commited on
Commit
700c4d8
·
verified ·
1 Parent(s): 0ce0b4e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +89 -34
src/streamlit_app.py CHANGED
@@ -4,41 +4,96 @@ import torch
4
  from transformers import GPT2LMHeadModel, GPT2TokenizerFast
5
  import numpy as np
6
 
7
- # Load models
 
 
8
  vectorizer = joblib.load('src/vectorizer.pkl')
9
- clf = joblib.load('src/logistic_model.pkl')
10
- tokenizer = GPT2TokenizerFast.from_pretrained("src/gpt2_local")
11
- gpt2 = GPT2LMHeadModel.from_pretrained("src/gpt2_local").eval()
12
 
13
- # Calculate perplexity
14
- def get_perplexity(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
16
  with torch.no_grad():
17
- loss = gpt2(**inputs, labels=inputs["input_ids"]).loss
18
- return torch.exp(loss).item()
19
-
20
- # Combine both scores
21
- def final_ai_score(text, w_perp=0.7):
22
- perplexity = get_perplexity(text)
23
- model_prob = clf.predict_proba(vectorizer.transform([text]))[0][1]
24
- perp_score = np.clip(1 / (perplexity + 1), 0, 1)
25
- final_score = w_perp * perp_score + (1 - w_perp) * model_prob
26
- return final_score, perplexity
27
-
28
- # UI
29
- st.title("🤖 AI vs Human Text Detector")
30
- text = st.text_area("Enter your sentence here:", height=150)
31
-
32
- if st.button("Check") and text.strip():
33
- ai_score, perplexity = final_ai_score(text)
34
- is_ai = ai_score > 0.5
35
-
36
- st.subheader("🔍 Result:")
37
- st.error("❗ This text is likely **AI-generated**." if is_ai else "✅ This text is likely **Human-written**.")
38
- st.markdown(f"**Final AI Score:** {ai_score:.3f}")
39
- st.markdown(f"**Perplexity Score:** {perplexity:.2f}")
40
-
41
- if perplexity < 30:
42
- st.info("🧠 Low perplexity suggests the text is highly predictable—possibly AI-generated.")
43
- elif perplexity > 100:
44
- st.info("🧠 High perplexity suggests human-like variation or complexity.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from transformers import GPT2LMHeadModel, GPT2TokenizerFast
5
  import numpy as np
6
 
7
+ # -------------------------------
8
+ # Load Logistic Regression model
9
+ # -------------------------------
10
  vectorizer = joblib.load('src/vectorizer.pkl')
11
+ model = joblib.load('src/logistic_model.pkl')
 
 
12
 
13
+ # -------------------------------
14
+ # Load GPT-2 model and tokenizer
15
+ # -------------------------------
16
+ @st.cache_resource
17
+ def load_gpt2():
18
+ tokenizer = GPT2TokenizerFast.from_pretrained("src/gpt2_local")
19
+ gpt2_model = GPT2LMHeadModel.from_pretrained(
20
+ "src/gpt2_local",
21
+ trust_remote_code=True,
22
+ local_files_only=True # Use only local files in HF Spaces
23
+ )
24
+ gpt2_model.to(torch.device("cpu")) # Use "cuda" if on GPU space
25
+ gpt2_model.eval()
26
+ return tokenizer, gpt2_model
27
+
28
+ tokenizer, gpt2_model = load_gpt2()
29
+
30
+ # -------------------------------
31
+ # Calculate Perplexity
32
+ # -------------------------------
33
+ def calculate_perplexity(text):
34
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
35
  with torch.no_grad():
36
+ outputs = gpt2_model(**inputs, labels=inputs["input_ids"])
37
+ loss = outputs.loss
38
+ perplexity = torch.exp(loss).item()
39
+ return perplexity
40
+
41
+ # -------------------------------
42
+ # Combine Scores
43
+ # -------------------------------
44
+ def final_score(ai_prob, perplexity):
45
+ if perplexity > 300:
46
+ perp_score = 0.0
47
+ elif perplexity < 10:
48
+ perp_score = 1.0
49
+ else:
50
+ perp_score = 1.0 - ((perplexity - 10) / (300 - 10))
51
+ perp_score = max(0.0, min(1.0, perp_score))
52
+
53
+ final_ai_score = (0.7 * perp_score) + (0.3 * ai_prob)
54
+ return final_ai_score, perp_score
55
+
56
+ # -------------------------------
57
+ # Streamlit UI
58
+ # -------------------------------
59
+ st.set_page_config(page_title="AI Text Detector", page_icon="🤖", layout="centered")
60
+
61
+ st.markdown("""
62
+ <h2 style='text-align: center; color: #4CAF50;'>🤖 AI vs Human Text Detector</h2>
63
+ <p style='text-align: center;'>Enter a sentence to check if it was written by a human or generated by AI.</p>
64
+ """, unsafe_allow_html=True)
65
+
66
+ user_input = st.text_area("Enter your sentence here:", height=150)
67
+
68
+ if st.button("Check"):
69
+ if user_input.strip() == "":
70
+ st.warning("Please enter a sentence before submitting.")
71
+ else:
72
+ # Logistic Regression Prediction
73
+ transformed_input = vectorizer.transform([user_input])
74
+ prediction = model.predict_proba(transformed_input)
75
+ ai_prob = prediction[0][1]
76
+ human_prob = prediction[0][0]
77
+
78
+ # Perplexity Score
79
+ perplexity_score = calculate_perplexity(user_input)
80
+
81
+ # Combined Score
82
+ final_ai_score, perp_score = final_score(ai_prob, perplexity_score)
83
+
84
+ # # Result Display
85
+ # st.subheader("🔍 Result:")
86
+ # if final_ai_score > 0.5:
87
+ # st.error("❗ This text is likely **AI-generated**.")
88
+ # else:
89
+ # st.success("✅ This text is likely **Human-written**.")
90
+
91
+ # st.markdown(f"**Logistic Model Confidence:** {ai_prob:.3f} AI vs {human_prob:.3f} Human")
92
+ # st.markdown(f"**Perplexity Score:** {perplexity_score:.2f}")
93
+ # st.markdown(f"**Combined AI Score:** {final_ai_score:.3f} (Weighted)")
94
+
95
+ # Interpretation
96
+ if perplexity_score < 30:
97
+ st.info("🧠 Low perplexity suggests the text is highly predictable—possibly AI-generated.")
98
+ elif perplexity_score > 100:
99
+ st.info("🧠 High perplexity suggests human-like variation or complexity.")