Spaces:

changcheng967
/

DouletAI_Humanizer

Runtime error

App Files Files Community

changcheng967 commited on Jun 12, 2025

Commit

3deb947

verified ·

1 Parent(s): f0e349a

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +82 -289

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,32 @@
 import os
 os.environ["STREAMLIT_SERVER_ENABLE_WATCHER"] = "false"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"  # Fix for PyTorch compatibility
 import streamlit as st
 import time
-import logging
 import re
-import numpy as np
 import random
-import torch  # ✅ MISSING IMPORT FIXED
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from sentence_transformers import SentenceTransformer, util
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 st.set_page_config(page_title="AI Humanizer Pro", layout="wide")
 st.title("AI Humanizer Pro")
 st.subheader("Transform AI text to undetectable human content")
-# Enhanced configuration
 DETECTION_THRESHOLD = 0.65
 MAX_LENGTH = 256
 SIMILARITY_THRESHOLD = 0.75
 MAX_ITERATIONS = 3
-# Upgrade detection model
 MODELS = {
-    "detection": "openai-detector/gpt3-detector",  # upgraded detection
-    "humanization": "t5-large",
-    "similarity": "sentence-transformers/all-mpnet-base-v2"
 }
-# AI reduction styles (from the article)
 STYLES = [
     "casual conversation",
     "personal diary entry",
@@ -44,323 +36,124 @@ STYLES = [
     "technical documentation"
 ]
-if "logs" not in st.session_state:
-    st.session_state.logs = []
-    st.session_state.models_loaded = False
-def add_log(message):
-    timestamp = time.strftime("%H:%M:%S")
-    log_entry = f"[{timestamp}] {message}"
-    st.session_state.logs.append(log_entry)
-    logger.info(log_entry)
-def load_models():
-    if not st.session_state.models_loaded:
-        # Detection model
-        add_log("Loading detection model...")
-        detection_tokenizer = AutoTokenizer.from_pretrained(MODELS["detection"])
-        detection_model = AutoModelForSequenceClassification.from_pretrained(MODELS["detection"])
-        # Humanization pipeline
-        add_log("Loading humanization system...")
-        humanizer = pipeline(
-            "text2text-generation",
-            model=MODELS["humanization"],
-            tokenizer=MODELS["humanization"],
-            device=-1,
-            framework="pt"
-        )
-        # Similarity model
-        add_log("Loading semantic analyzer...")
-        similarity_model = SentenceTransformer(MODELS["similarity"], device="cpu")
-        add_log("All systems initialized")
-        st.session_state.models_loaded = True
-        return detection_tokenizer, detection_model, humanizer, similarity_model
-    return (
-        st.session_state.detection_tokenizer,
-        st.session_state.detection_model,
-        st.session_state.humanizer,
-        st.session_state.similarity_model
     )
-# Load models with progress indicator
-if not st.session_state.get("models_initialized", False):
-    progress_bar = st.progress(0)
-    status_text = st.empty()
-    status_text.text("Initializing systems (this may take 2-3 minutes)...")
-    progress_bar.progress(10)
-    try:
-        detection_tokenizer, detection_model, humanizer, similarity_model = load_models()
-        progress_bar.progress(60)
-        # Store in session state
-        st.session_state.detection_tokenizer = detection_tokenizer
-        st.session_state.detection_model = detection_model
-        st.session_state.humanizer = humanizer
-        st.session_state.similarity_model = similarity_model
-        st.session_state.models_initialized = True
-        progress_bar.progress(100)
-        time.sleep(0.5)
-        progress_bar.empty()
-        status_text.empty()
-    except Exception as e:
-        progress_bar.empty()
-        status_text.error(f"Initialization failed: {str(e)}")
-        st.stop()
-# Access models from session state
-detection_tokenizer = st.session_state.detection_tokenizer
-detection_model = st.session_state.detection_model
-humanizer = st.session_state.humanizer
-similarity_model = st.session_state.similarity_model
 def preprocess_text(text):
-    """Clean text for better analysis"""
     text = re.sub(r'\s+', ' ', text)
     text = re.sub(r'[^\w\s.,;:!?\'-]', '', text)
     return text.strip()
 def detect_ai_probability(text):
-    """Enhanced detection with full-text analysis"""
     text = preprocess_text(text)
-    add_log("Running AI detection")
-    try:
-        # Process full text for better accuracy
-        inputs = detection_tokenizer(
-            text,
-            return_tensors="pt",
-            truncation=True,
-            max_length=MAX_LENGTH,
-            padding=True
-        )
-        with torch.no_grad():
-            outputs = detection_model(**inputs)
-        probs = torch.softmax(outputs.logits, dim=1)
-        ai_prob = probs[0][1].item()
-        add_log(f"AI probability: {ai_prob:.4f}")
-        return ai_prob
-    except Exception as e:
-        add_log(f"Detection error: {str(e)}")
-        return 0.95  # Assume AI if detection fails
-def calculate_semantic_similarity(original, humanized):
-    """Measure meaning preservation"""
-    embeddings = similarity_model.encode([original, humanized], convert_to_tensor=True)
-    similarity = util.cos_sim(embeddings[0], embeddings[1]).item()
-    return similarity
-def enhance_with_ai_reduction(text):
-    """Apply AI rate reduction techniques from the article"""
-    # 1. Increase perplexity and burstiness
-    # 2. Apply style transfer
-    # 3. Remove AI patterns
-    # Select a random style for content mismatch
     style = random.choice(STYLES)
-    # Apply style transfer prompt
     prompt = f"Rewrite this text in a {style} style while preserving the core meaning: {text}"
     try:
-        result = humanizer(
             prompt,
             num_beams=3,
-            num_return_sequences=1,
             max_new_tokens=MAX_LENGTH,
-            temperature=1.7,  # Higher temperature for more creativity
             repetition_penalty=2.5,
             do_sample=True
         )
         rewritten = result[0]["generated_text"]
     except:
         rewritten = text
-    # Remove common AI patterns
-    ai_patterns = [
         "Furthermore", "Moreover", "In conclusion",
         "it is important to", "plays a crucial role",
         "on an unprecedented scale", "as a result",
         "in today's world", "it is worth noting"
-    ]
-    for pattern in ai_patterns:
-        rewritten = rewritten.replace(pattern, "")
-    # Add human-like imperfections
     if random.random() > 0.7 and len(rewritten.split()) > 20:
         sentences = rewritten.split('. ')
         if len(sentences) > 3:
-            # Add a short, abrupt sentence
             sentences.insert(random.randint(2, len(sentences)-1), "Let me think.")
             rewritten = '. '.join(sentences)
     return rewritten
-def transform_to_human(text, original_text):
-    """Transform AI text to undetectable content"""
-    add_log("Starting transformation")
     best_text = text
-    best_ai_prob = detect_ai_probability(text)
-    best_similarity = calculate_semantic_similarity(original_text, text)
-    if best_ai_prob < DETECTION_THRESHOLD:
-        return best_text, best_ai_prob, best_similarity, True
-    for iteration in range(MAX_ITERATIONS):
-        add_log(f"Transformation iteration #{iteration+1}")
-        # Generate enhanced text with AI reduction techniques
-        candidate = enhance_with_ai_reduction(best_text)
-        # Calculate metrics
         try:
             ai_prob = detect_ai_probability(candidate)
-            similarity = calculate_semantic_similarity(original_text, candidate)
-            add_log(f"Candidate: AI={ai_prob:.4f}, Similarity={similarity:.4f}")
-            # Accept candidate if it reduces AI probability
-            if ai_prob < best_ai_prob and similarity >= SIMILARITY_THRESHOLD:
-                best_text = candidate
-                best_ai_prob = ai_prob
-                best_similarity = similarity
-                if best_ai_prob < DETECTION_THRESHOLD:
-                    add_log(f"✅ Achieved undetectable status")
-                    return best_text, best_ai_prob, best_similarity, True
-        except Exception as e:
-            add_log(f"⚠️ Error evaluating candidate: {str(e)}")
             continue
-    return best_text, best_ai_prob, best_similarity, best_ai_prob < DETECTION_THRESHOLD
-def process_text(text):
-    add_log("Starting text processing")
-    original_text = text
-    # Initial AI detection
-    initial_ai_prob = detect_ai_probability(text)
-    add_log(f"Initial AI probability: {initial_ai_prob:.4f}")
-    # Humanization decision
-    if initial_ai_prob > DETECTION_THRESHOLD:
-        add_log("AI probability exceeds threshold - transforming")
-        humanized, final_ai_prob, similarity, success = transform_to_human(text, original_text)
-        return final_ai_prob, humanized, success, similarity, initial_ai_prob
     else:
-        add_log("Text appears human-like - no transformation needed")
-        return initial_ai_prob, text, False, 1.0, initial_ai_prob
-# UI Components
 with st.sidebar:
-    st.header("Configuration")
-    detection_threshold = st.slider("Detection Threshold", 0.1, 0.9, DETECTION_THRESHOLD, 0.05)
-    similarity_threshold = st.slider("Meaning Preservation", 0.1, 0.9, SIMILARITY_THRESHOLD, 0.05)
-    max_iterations = st.slider("Max Transformation Passes", 1, 5, MAX_ITERATIONS, 1)
-    st.caption("Enhanced Models:")
-    st.code(f"Detector: {MODELS['detection']}")
-    st.code(f"Humanizer: {MODELS['humanization']}")
-    st.code(f"Similarity: {MODELS['similarity']}")
-    if st.button("Clear Logs"):
-        st.session_state.logs = []
-        st.rerun()
-st.subheader("Input")
-input_text = st.text_area("Paste AI-generated text to transform",
-                          placeholder="Enter text to humanize...",
-                          height=200,
-                          key="input_text")
-if st.button("Transform to Human-like Text", type="primary"):
     if not input_text.strip():
-        st.warning("Please enter some text to transform")
     else:
-        # Update parameters from UI
-        DETECTION_THRESHOLD = detection_threshold
-        SIMILARITY_THRESHOLD = similarity_threshold
-        MAX_ITERATIONS = max_iterations
-        with st.expander("Processing Logs", expanded=True):
-            log_placeholder = st.empty()
-            try:
-                start_time = time.time()
-                ai_prob, output, transformed, similarity, initial_ai = process_text(input_text)
-                processing_time = time.time() - start_time
-                log_text = "\n".join(st.session_state.logs[-20:])
-                log_placeholder.code(log_text, language="log")
-                add_log(f"Processing completed in {processing_time:.1f} seconds")
-            except Exception as e:
-                log_placeholder.error(f"Transformation failed: {str(e)}")
-                st.stop()
-        st.divider()
-        # Results display
-        col1, col2 = st.columns(2)
-        with col1:
-            st.subheader("Analysis Results")
-            st.metric("Initial AI Probability", f"{initial_ai*100:.1f}%",
-                      delta="High AI" if initial_ai > 0.7 else "Medium AI" if initial_ai > 0.4 else "Low AI")
-            st.metric("Final AI Probability", f"{ai_prob*100:.1f}%",
-                      delta="Undetectable" if ai_prob < DETECTION_THRESHOLD else "Detectable",
-                      delta_color="inverse")
-            # Confidence indicator
-            confidence_reduction = max(0, initial_ai - ai_prob)
-            st.progress(int(confidence_reduction * 100),
-                        text=f"AI Detection Reduced by {confidence_reduction*100:.1f}%")
-            st.subheader("Original Text")
-            st.write(input_text)
-        with col2:
-            status = "Transformed" if transformed else "Original"
-            color = "green" if transformed else "blue"
-            st.subheader(f"Output Text ({status})")
-            st.markdown(f'<div style="border-left: 4px solid {color}; padding: 10px;">{output}</div>',
-                        unsafe_allow_html=True)
-            st.metric("Meaning Preservation", f"{similarity*100:.1f}%")
-            if transformed:
-                st.success("✅ Successfully transformed to human-like text")
-            else:
-                if ai_prob < DETECTION_THRESHOLD:
-                    st.info("✅ Text already human-like")
-                else:
-                    st.warning("⚠️ Text may still be detectable")
-            st.subheader("Quality Feedback")
-            quality = st.slider("How human-like does this sound?", 1, 5, 4)
-            if quality < 3:
-                st.warning("Thanks for your feedback! We'll improve.")
-# Add sample texts for testing
-st.sidebar.divider()
-st.sidebar.subheader("Sample AI Texts")
-sample_texts = {
-    "Academic": "The utilization of renewable energy sources is imperative for environmental sustainability and represents a critical pathway toward decarbonizing our global energy infrastructure.",
-    "Business": "Leveraging synergistic paradigms, we can optimize scalable solutions to drive disruptive innovation in the marketplace.",
-    "Technical": "Machine learning algorithms, particularly deep neural networks, require substantial computational resources during their training phases.",
-    "Creative": "The city pulsed with predictable rhythms—lights changed on schedule, drones delivered packages, even rain fell by appointment."
-}
-for name, text in sample_texts.items():
-    if st.sidebar.button(name, key=f"sample_{name}"):
-        st.session_state.input_text = text
-        st.rerun()

 import os
 os.environ["STREAMLIT_SERVER_ENABLE_WATCHER"] = "false"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 import streamlit as st
 import time
 import re
 import random
+import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from sentence_transformers import SentenceTransformer, util
 st.set_page_config(page_title="AI Humanizer Pro", layout="wide")
 st.title("AI Humanizer Pro")
 st.subheader("Transform AI text to undetectable human content")
 DETECTION_THRESHOLD = 0.65
 MAX_LENGTH = 256
 SIMILARITY_THRESHOLD = 0.75
 MAX_ITERATIONS = 3
+# Updated best models as of 2025
 MODELS = {
+    "detection": "roberta-large-openai-detector",       # Strong RoBERTa-based AI detector
+    "humanization": "facebook/bart-large-cnn",           # Strong generation model, good for rewriting
+    "similarity": "sentence-transformers/all-MiniLM-L12-v2"  # Compact, very good semantic similarity
 }
 STYLES = [
     "casual conversation",
     "personal diary entry",
     "technical documentation"
 ]
+if "models_loaded" not in st.session_state:
+    # Load models once
+    st.session_state.detection_tokenizer = AutoTokenizer.from_pretrained(MODELS["detection"])
+    st.session_state.detection_model = AutoModelForSequenceClassification.from_pretrained(MODELS["detection"])
+    st.session_state.humanizer = pipeline(
+        "text2text-generation",
+        model=MODELS["humanization"],
+        tokenizer=MODELS["humanization"],
+        device=-1
     )
+    st.session_state.similarity_model = SentenceTransformer(MODELS["similarity"], device="cpu")
+    st.session_state.models_loaded = True
 def preprocess_text(text):
     text = re.sub(r'\s+', ' ', text)
     text = re.sub(r'[^\w\s.,;:!?\'-]', '', text)
     return text.strip()
 def detect_ai_probability(text):
     text = preprocess_text(text)
+    inputs = st.session_state.detection_tokenizer(
+        text, return_tensors="pt", truncation=True, max_length=MAX_LENGTH, padding=True
+    )
+    with torch.no_grad():
+        outputs = st.session_state.detection_model(**inputs)
+    probs = torch.softmax(outputs.logits, dim=1)
+    return probs[0][1].item()
+def calculate_similarity(original, humanized):
+    embeddings = st.session_state.similarity_model.encode([original, humanized], convert_to_tensor=True)
+    return util.cos_sim(embeddings[0], embeddings[1]).item()
+def enhance_text(text):
     style = random.choice(STYLES)
     prompt = f"Rewrite this text in a {style} style while preserving the core meaning: {text}"
     try:
+        result = st.session_state.humanizer(
             prompt,
             num_beams=3,
             max_new_tokens=MAX_LENGTH,
+            temperature=1.7,
             repetition_penalty=2.5,
             do_sample=True
         )
         rewritten = result[0]["generated_text"]
     except:
         rewritten = text
+    # Remove common AI phrases
+    for phrase in [
         "Furthermore", "Moreover", "In conclusion",
         "it is important to", "plays a crucial role",
         "on an unprecedented scale", "as a result",
         "in today's world", "it is worth noting"
+    ]:
+        rewritten = rewritten.replace(phrase, "")
+    # Add some human-like randomness
     if random.random() > 0.7 and len(rewritten.split()) > 20:
         sentences = rewritten.split('. ')
         if len(sentences) > 3:
             sentences.insert(random.randint(2, len(sentences)-1), "Let me think.")
             rewritten = '. '.join(sentences)
     return rewritten
+def transform_text(text, original):
     best_text = text
+    best_prob = detect_ai_probability(text)
+    best_sim = calculate_similarity(original, text)
+    if best_prob < DETECTION_THRESHOLD:
+        return best_text, best_prob, best_sim, True
+    for _ in range(MAX_ITERATIONS):
+        candidate = enhance_text(best_text)
         try:
             ai_prob = detect_ai_probability(candidate)
+            sim = calculate_similarity(original, candidate)
+            if ai_prob < best_prob and sim >= SIMILARITY_THRESHOLD:
+                best_text, best_prob, best_sim = candidate, ai_prob, sim
+                if best_prob < DETECTION_THRESHOLD:
+                    return best_text, best_prob, best_sim, True
+        except:
             continue
+    return best_text, best_prob, best_sim, best_prob < DETECTION_THRESHOLD
+def process(text):
+    original = text
+    initial_prob = detect_ai_probability(text)
+    if initial_prob > DETECTION_THRESHOLD:
+        transformed, final_prob, similarity, success = transform_text(text, original)
+        return initial_prob, final_prob, transformed, success, similarity
     else:
+        return initial_prob, initial_prob, text, False, 1.0
+# UI
 with st.sidebar:
+    st.header("Settings")
+    DETECTION_THRESHOLD = st.slider("Detection Threshold", 0.1, 0.9, DETECTION_THRESHOLD, 0.05)
+    SIMILARITY_THRESHOLD = st.slider("Meaning Preservation", 0.1, 0.9, SIMILARITY_THRESHOLD, 0.05)
+    MAX_ITERATIONS = st.slider("Max Transformation Passes", 1, 5, MAX_ITERATIONS, 1)
+input_text = st.text_area("Paste AI-generated text", height=200)
+if st.button("Transform"):
     if not input_text.strip():
+        st.warning("Please enter text")
     else:
+        st.session_state.logs = []
+        start = time.time()
+        init_prob, final_prob, output_text, transformed, similarity = process(input_text)
+        duration = time.time() - start
+        st.write(f"**Initial AI Probability:** {init_prob:.2%}")
+        st.write(f"**Final AI Probability:** {final_prob:.2%}")
+        st.write(f"**Meaning Preservation:** {similarity:.2%}")
+        st.write(f"**Transformation:** {'Yes' if transformed else 'No'}")
+        st.write(f"**Processing time:** {duration:.1f}s")
+        st.subheader("Output Text")
+        st.markdown(f'<div style="border-left:4px solid {"green" if transformed else "blue"}; padding:10px;">{output_text}</div>', unsafe_allow_html=True)