EditLens / app.py
multimodalart's picture
multimodalart HF Staff
Use real human/AI dataset rows as examples
0717dfc verified
Raw
History Blame Contribute Delete
5.38 kB
import os
import spaces
import torch
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from preprocess import clean_text, count_words
CHECKPOINT = "pangram/editlens_roberta-large"
BASE_MODEL = "FacebookAI/roberta-large"
MAX_LENGTH = 512
MIN_WORDS = 50
HF_TOKEN = os.environ.get("HF_TOKEN")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
model = AutoModelForSequenceClassification.from_pretrained(CHECKPOINT, token=HF_TOKEN)
model.eval().to("cuda")
N_BUCKETS = model.config.num_labels
# A 4-bucket model maps a continuous "extent of AI editing" axis onto ordinal
# buckets: 0 = fully human, N-1 = fully AI-generated, with intermediate buckets
# representing increasing amounts of AI editing.
BUCKET_NAMES = {
4: [
"Human-written",
"Lightly AI-edited",
"Heavily AI-edited",
"Fully AI-generated",
]
}.get(N_BUCKETS, [f"Bucket {i}" for i in range(N_BUCKETS)])
@spaces.GPU(duration=30)
def analyze(text):
if not text or not text.strip():
raise gr.Error("Please paste some text to analyze.")
n_words = count_words(text)
if n_words < MIN_WORDS:
gr.Warning(
f"Only {n_words} words detected. EditLens is most reliable on passages "
f"of at least {MIN_WORDS} words; treat this result with caution."
)
cleaned = clean_text(text)
inputs = tokenizer(
cleaned,
truncation=True,
max_length=MAX_LENGTH,
return_tensors="pt",
).to("cuda")
with torch.no_grad():
logits = model(**inputs).logits
probs = torch.softmax(logits.float(), dim=-1)[0].cpu()
bucket_idx = int(torch.argmax(probs).item())
bucket_labels = torch.arange(N_BUCKETS, dtype=torch.float32)
score = float((probs @ bucket_labels).item() / (N_BUCKETS - 1))
label_probs = {BUCKET_NAMES[i]: float(probs[i]) for i in range(N_BUCKETS)}
summary = (
f"## AI editing score: {score:.2f} / 1.00\n"
f"**Most likely:** {BUCKET_NAMES[bucket_idx]}\n\n"
f"0.00 means the text reads as fully human-written; "
f"1.00 means it reads as fully AI-generated. The score is the expected "
f"value over the bucket distribution below."
)
return summary, label_probs
# Both examples are real rows from the EditLens test set (pangram/editlens_iclr),
# not text written for this demo — a human_written review and an ai_generated review.
HUMAN_EXAMPLE = (
"We enjoyed our first visit to enroute coffee. Place was very easy to find and "
"had a very warm and cozy feel for the place. Walking in you quickly view the "
"delicious looking pastries they have lined up along the counter. Trying those "
"next visit. They had a large section of coffee choices and looks like they sell "
"additional merchandise. Giving them a 5 for outstanding 1st impression.\n\n"
"Note for owner: possible define the cash register station or separate it a bit "
"more from coffee pickup station. Seemed a bit crowded in that area."
)
AI_EXAMPLE = (
"McDonald’s offers a consistent fast-food experience with widely recognized "
"favorites like the Big Mac, fries, and McNuggets. The service is generally "
"quick, and the menu provides options for breakfast, lunch, and dinner, as well "
"as some healthier choices. While the food isn’t gourmet, it’s reliably "
"tasty and convenient, making it a popular choice for people on the go. Prices "
"are reasonable, but quality can vary slightly between locations. Overall, "
"McDonald’s delivers on expectations for fast, accessible, and familiar meals."
)
with gr.Blocks(title="EditLens") as demo:
gr.Markdown(
"""
# EditLens — Quantifying the Extent of AI Editing in Text
EditLens scores text on a continuous scale according to *how much AI
intervention* it contains, rather than a simple human-vs-AI binary. This demo
runs the RoBERTa-large model from the
[EditLens ICLR 2026 paper](https://arxiv.org/abs/2510.03154)
([code](https://github.com/pangramlabs/EditLens) ·
[model](https://huggingface.co/pangram/editlens_roberta-large)).
Paste a passage (≥ 50 words works best) and get an AI-editing score in [0, 1].
"""
)
with gr.Row():
with gr.Column():
text_in = gr.Textbox(
label="Text to analyze",
placeholder="Paste a paragraph or more here…",
lines=12,
)
btn = gr.Button("Analyze", variant="primary")
gr.Examples(
examples=[[HUMAN_EXAMPLE], [AI_EXAMPLE]],
inputs=text_in,
label="Examples (real human-written / AI-generated reviews from the EditLens test set)",
)
with gr.Column():
summary_out = gr.Markdown()
label_out = gr.Label(label="Bucket distribution", num_top_classes=N_BUCKETS)
btn.click(analyze, inputs=text_in, outputs=[summary_out, label_out])
text_in.submit(analyze, inputs=text_in, outputs=[summary_out, label_out])
gr.Markdown(
"Model and dataset are licensed CC BY-NC-SA 4.0. EditLens is a research "
"system and can be wrong — do not use it to make consequential judgments "
"about real people."
)
demo.launch()