changcheng967 commited on
Commit
3deb947
·
verified ·
1 Parent(s): f0e349a

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +82 -289
src/streamlit_app.py CHANGED
@@ -1,40 +1,32 @@
1
  import os
2
  os.environ["STREAMLIT_SERVER_ENABLE_WATCHER"] = "false"
3
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
- os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # Fix for PyTorch compatibility
5
 
6
  import streamlit as st
7
  import time
8
- import logging
9
  import re
10
- import numpy as np
11
  import random
12
- import torch # ✅ MISSING IMPORT FIXED
13
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
14
  from sentence_transformers import SentenceTransformer, util
15
 
16
- # Configure logging
17
- logging.basicConfig(level=logging.INFO)
18
- logger = logging.getLogger(__name__)
19
-
20
  st.set_page_config(page_title="AI Humanizer Pro", layout="wide")
21
  st.title("AI Humanizer Pro")
22
  st.subheader("Transform AI text to undetectable human content")
23
 
24
- # Enhanced configuration
25
  DETECTION_THRESHOLD = 0.65
26
  MAX_LENGTH = 256
27
  SIMILARITY_THRESHOLD = 0.75
28
  MAX_ITERATIONS = 3
29
 
30
- # Upgrade detection model
31
  MODELS = {
32
- "detection": "openai-detector/gpt3-detector", # upgraded detection
33
- "humanization": "t5-large",
34
- "similarity": "sentence-transformers/all-mpnet-base-v2"
35
  }
36
 
37
- # AI reduction styles (from the article)
38
  STYLES = [
39
  "casual conversation",
40
  "personal diary entry",
@@ -44,323 +36,124 @@ STYLES = [
44
  "technical documentation"
45
  ]
46
 
47
- if "logs" not in st.session_state:
48
- st.session_state.logs = []
49
- st.session_state.models_loaded = False
50
-
51
- def add_log(message):
52
- timestamp = time.strftime("%H:%M:%S")
53
- log_entry = f"[{timestamp}] {message}"
54
- st.session_state.logs.append(log_entry)
55
- logger.info(log_entry)
56
-
57
- def load_models():
58
- if not st.session_state.models_loaded:
59
- # Detection model
60
- add_log("Loading detection model...")
61
- detection_tokenizer = AutoTokenizer.from_pretrained(MODELS["detection"])
62
- detection_model = AutoModelForSequenceClassification.from_pretrained(MODELS["detection"])
63
-
64
- # Humanization pipeline
65
- add_log("Loading humanization system...")
66
- humanizer = pipeline(
67
- "text2text-generation",
68
- model=MODELS["humanization"],
69
- tokenizer=MODELS["humanization"],
70
- device=-1,
71
- framework="pt"
72
- )
73
-
74
- # Similarity model
75
- add_log("Loading semantic analyzer...")
76
- similarity_model = SentenceTransformer(MODELS["similarity"], device="cpu")
77
-
78
- add_log("All systems initialized")
79
- st.session_state.models_loaded = True
80
- return detection_tokenizer, detection_model, humanizer, similarity_model
81
-
82
- return (
83
- st.session_state.detection_tokenizer,
84
- st.session_state.detection_model,
85
- st.session_state.humanizer,
86
- st.session_state.similarity_model
87
  )
88
-
89
- # Load models with progress indicator
90
- if not st.session_state.get("models_initialized", False):
91
- progress_bar = st.progress(0)
92
- status_text = st.empty()
93
-
94
- status_text.text("Initializing systems (this may take 2-3 minutes)...")
95
- progress_bar.progress(10)
96
-
97
- try:
98
- detection_tokenizer, detection_model, humanizer, similarity_model = load_models()
99
- progress_bar.progress(60)
100
-
101
- # Store in session state
102
- st.session_state.detection_tokenizer = detection_tokenizer
103
- st.session_state.detection_model = detection_model
104
- st.session_state.humanizer = humanizer
105
- st.session_state.similarity_model = similarity_model
106
- st.session_state.models_initialized = True
107
-
108
- progress_bar.progress(100)
109
- time.sleep(0.5)
110
- progress_bar.empty()
111
- status_text.empty()
112
- except Exception as e:
113
- progress_bar.empty()
114
- status_text.error(f"Initialization failed: {str(e)}")
115
- st.stop()
116
-
117
- # Access models from session state
118
- detection_tokenizer = st.session_state.detection_tokenizer
119
- detection_model = st.session_state.detection_model
120
- humanizer = st.session_state.humanizer
121
- similarity_model = st.session_state.similarity_model
122
 
123
  def preprocess_text(text):
124
- """Clean text for better analysis"""
125
  text = re.sub(r'\s+', ' ', text)
126
  text = re.sub(r'[^\w\s.,;:!?\'-]', '', text)
127
  return text.strip()
128
 
129
  def detect_ai_probability(text):
130
- """Enhanced detection with full-text analysis"""
131
  text = preprocess_text(text)
132
- add_log("Running AI detection")
133
-
134
- try:
135
- # Process full text for better accuracy
136
- inputs = detection_tokenizer(
137
- text,
138
- return_tensors="pt",
139
- truncation=True,
140
- max_length=MAX_LENGTH,
141
- padding=True
142
- )
143
- with torch.no_grad():
144
- outputs = detection_model(**inputs)
145
- probs = torch.softmax(outputs.logits, dim=1)
146
- ai_prob = probs[0][1].item()
147
- add_log(f"AI probability: {ai_prob:.4f}")
148
- return ai_prob
149
- except Exception as e:
150
- add_log(f"Detection error: {str(e)}")
151
- return 0.95 # Assume AI if detection fails
152
 
153
- def calculate_semantic_similarity(original, humanized):
154
- """Measure meaning preservation"""
155
- embeddings = similarity_model.encode([original, humanized], convert_to_tensor=True)
156
- similarity = util.cos_sim(embeddings[0], embeddings[1]).item()
157
- return similarity
158
 
159
- def enhance_with_ai_reduction(text):
160
- """Apply AI rate reduction techniques from the article"""
161
- # 1. Increase perplexity and burstiness
162
- # 2. Apply style transfer
163
- # 3. Remove AI patterns
164
-
165
- # Select a random style for content mismatch
166
  style = random.choice(STYLES)
167
-
168
- # Apply style transfer prompt
169
  prompt = f"Rewrite this text in a {style} style while preserving the core meaning: {text}"
170
-
171
  try:
172
- result = humanizer(
173
  prompt,
174
  num_beams=3,
175
- num_return_sequences=1,
176
  max_new_tokens=MAX_LENGTH,
177
- temperature=1.7, # Higher temperature for more creativity
178
  repetition_penalty=2.5,
179
  do_sample=True
180
  )
181
  rewritten = result[0]["generated_text"]
182
  except:
183
  rewritten = text
184
-
185
- # Remove common AI patterns
186
- ai_patterns = [
187
  "Furthermore", "Moreover", "In conclusion",
188
  "it is important to", "plays a crucial role",
189
  "on an unprecedented scale", "as a result",
190
  "in today's world", "it is worth noting"
191
- ]
192
-
193
- for pattern in ai_patterns:
194
- rewritten = rewritten.replace(pattern, "")
195
-
196
- # Add human-like imperfections
197
  if random.random() > 0.7 and len(rewritten.split()) > 20:
198
  sentences = rewritten.split('. ')
199
  if len(sentences) > 3:
200
- # Add a short, abrupt sentence
201
  sentences.insert(random.randint(2, len(sentences)-1), "Let me think.")
202
  rewritten = '. '.join(sentences)
203
-
204
  return rewritten
205
 
206
- def transform_to_human(text, original_text):
207
- """Transform AI text to undetectable content"""
208
- add_log("Starting transformation")
209
-
210
  best_text = text
211
- best_ai_prob = detect_ai_probability(text)
212
- best_similarity = calculate_semantic_similarity(original_text, text)
213
-
214
- if best_ai_prob < DETECTION_THRESHOLD:
215
- return best_text, best_ai_prob, best_similarity, True
216
-
217
- for iteration in range(MAX_ITERATIONS):
218
- add_log(f"Transformation iteration #{iteration+1}")
219
-
220
- # Generate enhanced text with AI reduction techniques
221
- candidate = enhance_with_ai_reduction(best_text)
222
-
223
- # Calculate metrics
224
  try:
225
  ai_prob = detect_ai_probability(candidate)
226
- similarity = calculate_semantic_similarity(original_text, candidate)
227
-
228
- add_log(f"Candidate: AI={ai_prob:.4f}, Similarity={similarity:.4f}")
229
-
230
- # Accept candidate if it reduces AI probability
231
- if ai_prob < best_ai_prob and similarity >= SIMILARITY_THRESHOLD:
232
- best_text = candidate
233
- best_ai_prob = ai_prob
234
- best_similarity = similarity
235
-
236
- if best_ai_prob < DETECTION_THRESHOLD:
237
- add_log(f"✅ Achieved undetectable status")
238
- return best_text, best_ai_prob, best_similarity, True
239
- except Exception as e:
240
- add_log(f"⚠️ Error evaluating candidate: {str(e)}")
241
  continue
242
-
243
- return best_text, best_ai_prob, best_similarity, best_ai_prob < DETECTION_THRESHOLD
244
-
245
- def process_text(text):
246
- add_log("Starting text processing")
247
- original_text = text
248
-
249
- # Initial AI detection
250
- initial_ai_prob = detect_ai_probability(text)
251
- add_log(f"Initial AI probability: {initial_ai_prob:.4f}")
252
-
253
- # Humanization decision
254
- if initial_ai_prob > DETECTION_THRESHOLD:
255
- add_log("AI probability exceeds threshold - transforming")
256
- humanized, final_ai_prob, similarity, success = transform_to_human(text, original_text)
257
- return final_ai_prob, humanized, success, similarity, initial_ai_prob
258
  else:
259
- add_log("Text appears human-like - no transformation needed")
260
- return initial_ai_prob, text, False, 1.0, initial_ai_prob
261
 
262
- # UI Components
263
  with st.sidebar:
264
- st.header("Configuration")
265
- detection_threshold = st.slider("Detection Threshold", 0.1, 0.9, DETECTION_THRESHOLD, 0.05)
266
- similarity_threshold = st.slider("Meaning Preservation", 0.1, 0.9, SIMILARITY_THRESHOLD, 0.05)
267
- max_iterations = st.slider("Max Transformation Passes", 1, 5, MAX_ITERATIONS, 1)
268
-
269
- st.caption("Enhanced Models:")
270
- st.code(f"Detector: {MODELS['detection']}")
271
- st.code(f"Humanizer: {MODELS['humanization']}")
272
- st.code(f"Similarity: {MODELS['similarity']}")
273
-
274
- if st.button("Clear Logs"):
275
- st.session_state.logs = []
276
- st.rerun()
277
 
278
- st.subheader("Input")
279
- input_text = st.text_area("Paste AI-generated text to transform",
280
- placeholder="Enter text to humanize...",
281
- height=200,
282
- key="input_text")
283
 
284
- if st.button("Transform to Human-like Text", type="primary"):
285
  if not input_text.strip():
286
- st.warning("Please enter some text to transform")
287
  else:
288
- # Update parameters from UI
289
- DETECTION_THRESHOLD = detection_threshold
290
- SIMILARITY_THRESHOLD = similarity_threshold
291
- MAX_ITERATIONS = max_iterations
292
-
293
- with st.expander("Processing Logs", expanded=True):
294
- log_placeholder = st.empty()
295
-
296
- try:
297
- start_time = time.time()
298
- ai_prob, output, transformed, similarity, initial_ai = process_text(input_text)
299
- processing_time = time.time() - start_time
300
-
301
- log_text = "\n".join(st.session_state.logs[-20:])
302
- log_placeholder.code(log_text, language="log")
303
-
304
- add_log(f"Processing completed in {processing_time:.1f} seconds")
305
- except Exception as e:
306
- log_placeholder.error(f"Transformation failed: {str(e)}")
307
- st.stop()
308
-
309
- st.divider()
310
-
311
- # Results display
312
- col1, col2 = st.columns(2)
313
- with col1:
314
- st.subheader("Analysis Results")
315
-
316
- st.metric("Initial AI Probability", f"{initial_ai*100:.1f}%",
317
- delta="High AI" if initial_ai > 0.7 else "Medium AI" if initial_ai > 0.4 else "Low AI")
318
-
319
- st.metric("Final AI Probability", f"{ai_prob*100:.1f}%",
320
- delta="Undetectable" if ai_prob < DETECTION_THRESHOLD else "Detectable",
321
- delta_color="inverse")
322
-
323
- # Confidence indicator
324
- confidence_reduction = max(0, initial_ai - ai_prob)
325
- st.progress(int(confidence_reduction * 100),
326
- text=f"AI Detection Reduced by {confidence_reduction*100:.1f}%")
327
-
328
- st.subheader("Original Text")
329
- st.write(input_text)
330
-
331
- with col2:
332
- status = "Transformed" if transformed else "Original"
333
- color = "green" if transformed else "blue"
334
- st.subheader(f"Output Text ({status})")
335
- st.markdown(f'<div style="border-left: 4px solid {color}; padding: 10px;">{output}</div>',
336
- unsafe_allow_html=True)
337
-
338
- st.metric("Meaning Preservation", f"{similarity*100:.1f}%")
339
-
340
- if transformed:
341
- st.success("✅ Successfully transformed to human-like text")
342
- else:
343
- if ai_prob < DETECTION_THRESHOLD:
344
- st.info("✅ Text already human-like")
345
- else:
346
- st.warning("⚠️ Text may still be detectable")
347
-
348
- st.subheader("Quality Feedback")
349
- quality = st.slider("How human-like does this sound?", 1, 5, 4)
350
- if quality < 3:
351
- st.warning("Thanks for your feedback! We'll improve.")
352
-
353
- # Add sample texts for testing
354
- st.sidebar.divider()
355
- st.sidebar.subheader("Sample AI Texts")
356
- sample_texts = {
357
- "Academic": "The utilization of renewable energy sources is imperative for environmental sustainability and represents a critical pathway toward decarbonizing our global energy infrastructure.",
358
- "Business": "Leveraging synergistic paradigms, we can optimize scalable solutions to drive disruptive innovation in the marketplace.",
359
- "Technical": "Machine learning algorithms, particularly deep neural networks, require substantial computational resources during their training phases.",
360
- "Creative": "The city pulsed with predictable rhythms—lights changed on schedule, drones delivered packages, even rain fell by appointment."
361
- }
362
-
363
- for name, text in sample_texts.items():
364
- if st.sidebar.button(name, key=f"sample_{name}"):
365
- st.session_state.input_text = text
366
- st.rerun()
 
1
  import os
2
  os.environ["STREAMLIT_SERVER_ENABLE_WATCHER"] = "false"
3
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
+ os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
5
 
6
  import streamlit as st
7
  import time
 
8
  import re
 
9
  import random
10
+ import torch
11
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
12
  from sentence_transformers import SentenceTransformer, util
13
 
 
 
 
 
14
  st.set_page_config(page_title="AI Humanizer Pro", layout="wide")
15
  st.title("AI Humanizer Pro")
16
  st.subheader("Transform AI text to undetectable human content")
17
 
 
18
  DETECTION_THRESHOLD = 0.65
19
  MAX_LENGTH = 256
20
  SIMILARITY_THRESHOLD = 0.75
21
  MAX_ITERATIONS = 3
22
 
23
+ # Updated best models as of 2025
24
  MODELS = {
25
+ "detection": "roberta-large-openai-detector", # Strong RoBERTa-based AI detector
26
+ "humanization": "facebook/bart-large-cnn", # Strong generation model, good for rewriting
27
+ "similarity": "sentence-transformers/all-MiniLM-L12-v2" # Compact, very good semantic similarity
28
  }
29
 
 
30
  STYLES = [
31
  "casual conversation",
32
  "personal diary entry",
 
36
  "technical documentation"
37
  ]
38
 
39
+ if "models_loaded" not in st.session_state:
40
+ # Load models once
41
+ st.session_state.detection_tokenizer = AutoTokenizer.from_pretrained(MODELS["detection"])
42
+ st.session_state.detection_model = AutoModelForSequenceClassification.from_pretrained(MODELS["detection"])
43
+ st.session_state.humanizer = pipeline(
44
+ "text2text-generation",
45
+ model=MODELS["humanization"],
46
+ tokenizer=MODELS["humanization"],
47
+ device=-1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  )
49
+ st.session_state.similarity_model = SentenceTransformer(MODELS["similarity"], device="cpu")
50
+ st.session_state.models_loaded = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  def preprocess_text(text):
 
53
  text = re.sub(r'\s+', ' ', text)
54
  text = re.sub(r'[^\w\s.,;:!?\'-]', '', text)
55
  return text.strip()
56
 
57
  def detect_ai_probability(text):
 
58
  text = preprocess_text(text)
59
+ inputs = st.session_state.detection_tokenizer(
60
+ text, return_tensors="pt", truncation=True, max_length=MAX_LENGTH, padding=True
61
+ )
62
+ with torch.no_grad():
63
+ outputs = st.session_state.detection_model(**inputs)
64
+ probs = torch.softmax(outputs.logits, dim=1)
65
+ return probs[0][1].item()
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ def calculate_similarity(original, humanized):
68
+ embeddings = st.session_state.similarity_model.encode([original, humanized], convert_to_tensor=True)
69
+ return util.cos_sim(embeddings[0], embeddings[1]).item()
 
 
70
 
71
+ def enhance_text(text):
 
 
 
 
 
 
72
  style = random.choice(STYLES)
 
 
73
  prompt = f"Rewrite this text in a {style} style while preserving the core meaning: {text}"
 
74
  try:
75
+ result = st.session_state.humanizer(
76
  prompt,
77
  num_beams=3,
 
78
  max_new_tokens=MAX_LENGTH,
79
+ temperature=1.7,
80
  repetition_penalty=2.5,
81
  do_sample=True
82
  )
83
  rewritten = result[0]["generated_text"]
84
  except:
85
  rewritten = text
86
+
87
+ # Remove common AI phrases
88
+ for phrase in [
89
  "Furthermore", "Moreover", "In conclusion",
90
  "it is important to", "plays a crucial role",
91
  "on an unprecedented scale", "as a result",
92
  "in today's world", "it is worth noting"
93
+ ]:
94
+ rewritten = rewritten.replace(phrase, "")
95
+
96
+ # Add some human-like randomness
 
 
97
  if random.random() > 0.7 and len(rewritten.split()) > 20:
98
  sentences = rewritten.split('. ')
99
  if len(sentences) > 3:
 
100
  sentences.insert(random.randint(2, len(sentences)-1), "Let me think.")
101
  rewritten = '. '.join(sentences)
102
+
103
  return rewritten
104
 
105
+ def transform_text(text, original):
 
 
 
106
  best_text = text
107
+ best_prob = detect_ai_probability(text)
108
+ best_sim = calculate_similarity(original, text)
109
+ if best_prob < DETECTION_THRESHOLD:
110
+ return best_text, best_prob, best_sim, True
111
+
112
+ for _ in range(MAX_ITERATIONS):
113
+ candidate = enhance_text(best_text)
 
 
 
 
 
 
114
  try:
115
  ai_prob = detect_ai_probability(candidate)
116
+ sim = calculate_similarity(original, candidate)
117
+ if ai_prob < best_prob and sim >= SIMILARITY_THRESHOLD:
118
+ best_text, best_prob, best_sim = candidate, ai_prob, sim
119
+ if best_prob < DETECTION_THRESHOLD:
120
+ return best_text, best_prob, best_sim, True
121
+ except:
 
 
 
 
 
 
 
 
 
122
  continue
123
+ return best_text, best_prob, best_sim, best_prob < DETECTION_THRESHOLD
124
+
125
+ def process(text):
126
+ original = text
127
+ initial_prob = detect_ai_probability(text)
128
+ if initial_prob > DETECTION_THRESHOLD:
129
+ transformed, final_prob, similarity, success = transform_text(text, original)
130
+ return initial_prob, final_prob, transformed, success, similarity
 
 
 
 
 
 
 
 
131
  else:
132
+ return initial_prob, initial_prob, text, False, 1.0
 
133
 
134
+ # UI
135
  with st.sidebar:
136
+ st.header("Settings")
137
+ DETECTION_THRESHOLD = st.slider("Detection Threshold", 0.1, 0.9, DETECTION_THRESHOLD, 0.05)
138
+ SIMILARITY_THRESHOLD = st.slider("Meaning Preservation", 0.1, 0.9, SIMILARITY_THRESHOLD, 0.05)
139
+ MAX_ITERATIONS = st.slider("Max Transformation Passes", 1, 5, MAX_ITERATIONS, 1)
 
 
 
 
 
 
 
 
 
140
 
141
+ input_text = st.text_area("Paste AI-generated text", height=200)
 
 
 
 
142
 
143
+ if st.button("Transform"):
144
  if not input_text.strip():
145
+ st.warning("Please enter text")
146
  else:
147
+ st.session_state.logs = []
148
+ start = time.time()
149
+ init_prob, final_prob, output_text, transformed, similarity = process(input_text)
150
+ duration = time.time() - start
151
+
152
+ st.write(f"**Initial AI Probability:** {init_prob:.2%}")
153
+ st.write(f"**Final AI Probability:** {final_prob:.2%}")
154
+ st.write(f"**Meaning Preservation:** {similarity:.2%}")
155
+ st.write(f"**Transformation:** {'Yes' if transformed else 'No'}")
156
+ st.write(f"**Processing time:** {duration:.1f}s")
157
+
158
+ st.subheader("Output Text")
159
+ st.markdown(f'<div style="border-left:4px solid {"green" if transformed else "blue"}; padding:10px;">{output_text}</div>', unsafe_allow_html=True)