Spaces:

Chungulus
/

Humanizer_Pro

Sleeping

File size: 8,429 Bytes

fea8d44

# =============================================
#        HuForm AI Mini - Gradio UI
#   AI-generated text detection + humanisation
#   Clean version – generation warnings removed
#   Last updated for transformers 2025–2026
# =============================================

# ── 1. Install dependencies ───────────────────────────────────────
# !pip install -q gradio transformers torch accelerate

# ── 2. Imports ─────────────────────────────────────────────────────
import gradio as gr
import torch
import re
from transformers import (
    pipeline,
    AutoTokenizer,
    AutoModelForCausalLM,
    GenerationConfig
)

# ── 3. Configuration ───────────────────────────────────────────────
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE.upper()}")

# Detection model – good open-source choice
DETECTION_MODEL = "Hello-SimpleAI/chatgpt-detector-roberta"

# Humanisation model – fast and decent quality
HUMANISATION_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"

# ── 4. Lazy model loading ──────────────────────────────────────────
_detection_pipe = None
def get_detection():
    global _detection_pipe
    if _detection_pipe is None:
        print(f"Loading detector: {DETECTION_MODEL}")
        _detection_pipe = pipeline(
            "text-classification",
            model=DETECTION_MODEL,
            device=0 if DEVICE == "cuda" else -1,
            torch_dtype=torch.float16 if DEVICE == "cuda" else None
        )
    return _detection_pipe

_humanisation_pipe = None
def get_humaniser():
    global _humanisation_pipe
    if _humanisation_pipe is None:
        print(f"Loading humaniser: {HUMANISATION_MODEL}")
        tokenizer = AutoTokenizer.from_pretrained(HUMANISATION_MODEL)
        model = AutoModelForCausalLM.from_pretrained(
            HUMANISATION_MODEL,
            torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
            device_map="auto" if DEVICE == "cuda" else None
        )
        _humanisation_pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer
        )
    return _humanisation_pipe

# ── 5. Helper functions ────────────────────────────────────────────
def split_sentences(text):
    if not text.strip():
        return []
    return [s.strip() for s in re.split(r'(?<=[.!?])\s+', text.strip()) if s.strip()]

def detect_ai(text):
    if not text.strip():
        return "No text provided.", ""

    sentences = split_sentences(text)
    pipe = get_detection()

    results = []
    total_ai = 0.0

    preds = pipe(sentences, truncation=True, max_length=512)

    for sent, pred in zip(sentences, preds):
        label = pred['label'].lower()
        score = pred['score']

        # Normalize to AI probability (model-specific)
        ai_prob = score * 100 if any(x in label for x in ["fake", "ai", "generated"]) else (1 - score) * 100
        total_ai += ai_prob

        tag = "Very likely AI" if ai_prob > 85 else "Likely AI" if ai_prob > 60 else "Likely Human"
        color = "#dc2626" if ai_prob > 85 else "#d97706" if ai_prob > 60 else "#16a34a"

        results.append(
            f"<div style='padding:8px; margin:4px 0; border-left:4px solid {color};'>"
            f"<strong>{tag} ({ai_prob:.1f}%)</strong><br>{sent}</div>"
        )

    avg = total_ai / len(sentences) if sentences else 0
    summary = f"<h3>Overall AI probability: {avg:.1f}%</h3>"

    return summary + "".join(results), f"Overall: {avg:.1f}% AI"

def humanise(text, style="Natural", intensity=0.7):
    if not text.strip():
        return "Please enter some text."

    pipe = get_humaniser()

    style_prompts = {
        "Natural": "Rewrite this to sound completely natural, human-written — vary sentence length, use contractions, slight imperfections.",
        "Casual": "Rewrite this in a relaxed, friendly, conversational tone like a real person chatting.",
        "Academic": "Rewrite this in clear, formal academic style with precise and sophisticated language.",
        "Professional": "Rewrite this in a crisp, professional business tone — confident and authoritative."
    }

    tone = style_prompts.get(style, style_prompts["Natural"])

    prompt = f"""<|im_start|>system
You are an expert editor that removes AI stiffness and makes text feel authentically human.
Keep original meaning 100%. Improve flow, rhythm, vocabulary variety. Output ONLY the rewritten text.<|im_end|>
<|im_start|>user
{tone}
Text:
{text}<|im_end|>
<|im_start|>assistant
"""

    try:
        # ── Explicit GenerationConfig – removes both warnings ──
        gen_config = GenerationConfig(
            max_new_tokens=600,
            temperature=0.4 + float(intensity) * 0.5,
            top_p=0.92,
            repetition_penalty=1.08,
            do_sample=True,
            pad_token_id=pipe.tokenizer.eos_token_id,
            eos_token_id=pipe.tokenizer.eos_token_id
        )
        gen_config.max_length = None  # ← disables conflicting default max_length

        output = pipe(
            prompt,
            generation_config=gen_config,
            num_return_sequences=1
        )[0]["generated_text"]

        # Extract after assistant tag
        if "assistant" in output:
            rewritten = output.split("assistant", 1)[-1].strip()
        else:
            rewritten = output[len(prompt):].strip()

        return rewritten.strip()
    except Exception as e:
        return f"Error during generation: {str(e)}"

# ── 6. Gradio Interface ────────────────────────────────────────────
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# HuForm AI Mini\n**Sentence-level AI detection + style-controlled humanisation**")

    with gr.Row():
        with gr.Column(scale=1):
            input_text = gr.Textbox(
                label="Input Text (paragraph)",
                placeholder="Paste or type text here...",
                lines=8,
                max_lines=20
            )

            style_dropdown = gr.Dropdown(
                choices=["Natural", "Casual", "Academic", "Professional"],
                value="Natural",
                label="Humanisation Style"
            )

            intensity_slider = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.7, step=0.05,
                label="Rewrite Intensity (higher = more creative change)"
            )

            with gr.Row():
                detect_btn = gr.Button("Analyze (Detect AI)")
                humanise_btn = gr.Button("Rewrite / Humanise")

        with gr.Column(scale=1):
            detection_output = gr.HTML(label="Detection Result")
            humanised_output = gr.Textbox(label="Rewritten Text", lines=10)

    # ── Event handlers ─────────────────────────────────────────────
    detect_btn.click(
        fn=detect_ai,
        inputs=input_text,
        outputs=[detection_output, gr.Textbox(visible=False)]
    )

    humanise_btn.click(
        fn=humanise,
        inputs=[input_text, style_dropdown, intensity_slider],
        outputs=humanised_output
    )

    # Example texts
    gr.Examples(
        examples=[
            ["The rapid advancement of artificial intelligence technologies has significantly transformed numerous industries and daily life."],
            ["Yo this new AI stuff is actually kinda wild, like it's everywhere now lol."],
            ["Machine learning algorithms demonstrate superior performance in pattern recognition tasks across diverse datasets."]
        ],
        inputs=input_text,
        label="Quick examples"
    )

# ── Launch ─────────────────────────────────────────────────────────
demo.launch(debug=False, share=True)