Spaces:
Sleeping
Sleeping
File size: 8,429 Bytes
fea8d44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | # =============================================
# HuForm AI Mini - Gradio UI
# AI-generated text detection + humanisation
# Clean version β generation warnings removed
# Last updated for transformers 2025β2026
# =============================================
# ββ 1. Install dependencies βββββββββββββββββββββββββββββββββββββββ
# !pip install -q gradio transformers torch accelerate
# ββ 2. Imports βββββββββββββββββββββββββββββββββββββββββββββββββββββ
import gradio as gr
import torch
import re
from transformers import (
pipeline,
AutoTokenizer,
AutoModelForCausalLM,
GenerationConfig
)
# ββ 3. Configuration βββββββββββββββββββββββββββββββββββββββββββββββ
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE.upper()}")
# Detection model β good open-source choice
DETECTION_MODEL = "Hello-SimpleAI/chatgpt-detector-roberta"
# Humanisation model β fast and decent quality
HUMANISATION_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
# ββ 4. Lazy model loading ββββββββββββββββββββββββββββββββββββββββββ
_detection_pipe = None
def get_detection():
global _detection_pipe
if _detection_pipe is None:
print(f"Loading detector: {DETECTION_MODEL}")
_detection_pipe = pipeline(
"text-classification",
model=DETECTION_MODEL,
device=0 if DEVICE == "cuda" else -1,
torch_dtype=torch.float16 if DEVICE == "cuda" else None
)
return _detection_pipe
_humanisation_pipe = None
def get_humaniser():
global _humanisation_pipe
if _humanisation_pipe is None:
print(f"Loading humaniser: {HUMANISATION_MODEL}")
tokenizer = AutoTokenizer.from_pretrained(HUMANISATION_MODEL)
model = AutoModelForCausalLM.from_pretrained(
HUMANISATION_MODEL,
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
device_map="auto" if DEVICE == "cuda" else None
)
_humanisation_pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)
return _humanisation_pipe
# ββ 5. Helper functions ββββββββββββββββββββββββββββββββββββββββββββ
def split_sentences(text):
if not text.strip():
return []
return [s.strip() for s in re.split(r'(?<=[.!?])\s+', text.strip()) if s.strip()]
def detect_ai(text):
if not text.strip():
return "No text provided.", ""
sentences = split_sentences(text)
pipe = get_detection()
results = []
total_ai = 0.0
preds = pipe(sentences, truncation=True, max_length=512)
for sent, pred in zip(sentences, preds):
label = pred['label'].lower()
score = pred['score']
# Normalize to AI probability (model-specific)
ai_prob = score * 100 if any(x in label for x in ["fake", "ai", "generated"]) else (1 - score) * 100
total_ai += ai_prob
tag = "Very likely AI" if ai_prob > 85 else "Likely AI" if ai_prob > 60 else "Likely Human"
color = "#dc2626" if ai_prob > 85 else "#d97706" if ai_prob > 60 else "#16a34a"
results.append(
f"<div style='padding:8px; margin:4px 0; border-left:4px solid {color};'>"
f"<strong>{tag} ({ai_prob:.1f}%)</strong><br>{sent}</div>"
)
avg = total_ai / len(sentences) if sentences else 0
summary = f"<h3>Overall AI probability: {avg:.1f}%</h3>"
return summary + "".join(results), f"Overall: {avg:.1f}% AI"
def humanise(text, style="Natural", intensity=0.7):
if not text.strip():
return "Please enter some text."
pipe = get_humaniser()
style_prompts = {
"Natural": "Rewrite this to sound completely natural, human-written β vary sentence length, use contractions, slight imperfections.",
"Casual": "Rewrite this in a relaxed, friendly, conversational tone like a real person chatting.",
"Academic": "Rewrite this in clear, formal academic style with precise and sophisticated language.",
"Professional": "Rewrite this in a crisp, professional business tone β confident and authoritative."
}
tone = style_prompts.get(style, style_prompts["Natural"])
prompt = f"""<|im_start|>system
You are an expert editor that removes AI stiffness and makes text feel authentically human.
Keep original meaning 100%. Improve flow, rhythm, vocabulary variety. Output ONLY the rewritten text.<|im_end|>
<|im_start|>user
{tone}
Text:
{text}<|im_end|>
<|im_start|>assistant
"""
try:
# ββ Explicit GenerationConfig β removes both warnings ββ
gen_config = GenerationConfig(
max_new_tokens=600,
temperature=0.4 + float(intensity) * 0.5,
top_p=0.92,
repetition_penalty=1.08,
do_sample=True,
pad_token_id=pipe.tokenizer.eos_token_id,
eos_token_id=pipe.tokenizer.eos_token_id
)
gen_config.max_length = None # β disables conflicting default max_length
output = pipe(
prompt,
generation_config=gen_config,
num_return_sequences=1
)[0]["generated_text"]
# Extract after assistant tag
if "assistant" in output:
rewritten = output.split("assistant", 1)[-1].strip()
else:
rewritten = output[len(prompt):].strip()
return rewritten.strip()
except Exception as e:
return f"Error during generation: {str(e)}"
# ββ 6. Gradio Interface ββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# HuForm AI Mini\n**Sentence-level AI detection + style-controlled humanisation**")
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="Input Text (paragraph)",
placeholder="Paste or type text here...",
lines=8,
max_lines=20
)
style_dropdown = gr.Dropdown(
choices=["Natural", "Casual", "Academic", "Professional"],
value="Natural",
label="Humanisation Style"
)
intensity_slider = gr.Slider(
minimum=0.1, maximum=1.0, value=0.7, step=0.05,
label="Rewrite Intensity (higher = more creative change)"
)
with gr.Row():
detect_btn = gr.Button("Analyze (Detect AI)")
humanise_btn = gr.Button("Rewrite / Humanise")
with gr.Column(scale=1):
detection_output = gr.HTML(label="Detection Result")
humanised_output = gr.Textbox(label="Rewritten Text", lines=10)
# ββ Event handlers βββββββββββββββββββββββββββββββββββββββββββββ
detect_btn.click(
fn=detect_ai,
inputs=input_text,
outputs=[detection_output, gr.Textbox(visible=False)]
)
humanise_btn.click(
fn=humanise,
inputs=[input_text, style_dropdown, intensity_slider],
outputs=humanised_output
)
# Example texts
gr.Examples(
examples=[
["The rapid advancement of artificial intelligence technologies has significantly transformed numerous industries and daily life."],
["Yo this new AI stuff is actually kinda wild, like it's everywhere now lol."],
["Machine learning algorithms demonstrate superior performance in pattern recognition tasks across diverse datasets."]
],
inputs=input_text,
label="Quick examples"
)
# ββ Launch βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
demo.launch(debug=False, share=True) |