File size: 2,586 Bytes
8826e0b
85cba87
3deb947
d16b402
df03841
8826e0b
d16b402
df03841
8826e0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c712a46
 
8826e0b
 
 
 
 
29632cc
8826e0b
 
 
d16b402
8826e0b
 
cea2c03
8826e0b
29632cc
8826e0b
d16b402
 
29632cc
8826e0b
 
d16b402
8826e0b
 
d16b402
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from transformers import pipeline, AutoTokenizer
import streamlit as st
import torch
import time

# Limit CPU threads for performance
torch.set_num_threads(2)

st.title("AI Humanizer Lite (CPU Friendly) — Unlimited Text Detection")

@st.cache_resource(show_spinner=False)
def load_models():
    detect_pipe = pipeline("text-classification", model="roberta-base-openai-detector", device=-1)
    tokenizer = AutoTokenizer.from_pretrained("roberta-base-openai-detector")
    return detect_pipe, tokenizer

detect_pipe, tokenizer = load_models()

def chunk_text(text, max_tokens=256):
    tokens = tokenizer.tokenize(text)
    chunks = []
    for i in range(0, len(tokens), max_tokens):
        chunk_tokens = tokens[i:i+max_tokens]
        chunk_text = tokenizer.convert_tokens_to_string(chunk_tokens)
        chunks.append(chunk_text)
    return chunks

def detect_ai(text):
    chunks = chunk_text(text)
    scores = []
    for chunk in chunks:
        outputs = detect_pipe(chunk)
        # Find "AI" label score or 0.0 fallback
        ai_score = 0.0
        for out in outputs:
            if out["label"].lower() == "ai":
                ai_score = out["score"]
        scores.append(ai_score)
    # Aggregate results: max score means any AI-like chunk triggers high detection
    final_score = max(scores) if scores else 0.0
    return final_score

def humanize_text(text):
    # Use a smaller summarization or rewriting model
    humanizer_pipe = pipeline("text2text-generation", model="sshleifer/distilbart-cnn-12-6", device=-1)
    prompt = f"Rewrite this text naturally: {text}"
    result = humanizer_pipe(prompt, max_length=128, num_beams=3)
    return result[0]["generated_text"]

text = st.text_area("Paste AI-generated text here", height=300)

if st.button("Detect & Humanize"):
    if not text.strip():
        st.warning("Please enter some text!")
    else:
        with st.spinner("Detecting AI content on chunks..."):
            start = time.time()
            score = detect_ai(text)
            duration = time.time() - start
            st.write(f"AI detection score: {score:.2%} (Processed in {duration:.1f}s)")

        if score < 0.5:
            st.success("This text looks human already! No need to rewrite.")
            st.write(text)
        else:
            with st.spinner("Rewriting text for better human likeness..."):
                start = time.time()
                rewritten = humanize_text(text)
                duration = time.time() - start
            st.success(f"Rewritten text in {duration:.1f} seconds")
            st.write(rewritten)