import os import spaces import torch import gradio as gr from transformers import AutoModelForSequenceClassification, AutoTokenizer from preprocess import clean_text, count_words CHECKPOINT = "pangram/editlens_roberta-large" BASE_MODEL = "FacebookAI/roberta-large" MAX_LENGTH = 512 MIN_WORDS = 50 HF_TOKEN = os.environ.get("HF_TOKEN") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) model = AutoModelForSequenceClassification.from_pretrained(CHECKPOINT, token=HF_TOKEN) model.eval().to("cuda") N_BUCKETS = model.config.num_labels # A 4-bucket model maps a continuous "extent of AI editing" axis onto ordinal # buckets: 0 = fully human, N-1 = fully AI-generated, with intermediate buckets # representing increasing amounts of AI editing. BUCKET_NAMES = { 4: [ "Human-written", "Lightly AI-edited", "Heavily AI-edited", "Fully AI-generated", ] }.get(N_BUCKETS, [f"Bucket {i}" for i in range(N_BUCKETS)]) @spaces.GPU(duration=30) def analyze(text): if not text or not text.strip(): raise gr.Error("Please paste some text to analyze.") n_words = count_words(text) if n_words < MIN_WORDS: gr.Warning( f"Only {n_words} words detected. EditLens is most reliable on passages " f"of at least {MIN_WORDS} words; treat this result with caution." ) cleaned = clean_text(text) inputs = tokenizer( cleaned, truncation=True, max_length=MAX_LENGTH, return_tensors="pt", ).to("cuda") with torch.no_grad(): logits = model(**inputs).logits probs = torch.softmax(logits.float(), dim=-1)[0].cpu() bucket_idx = int(torch.argmax(probs).item()) bucket_labels = torch.arange(N_BUCKETS, dtype=torch.float32) score = float((probs @ bucket_labels).item() / (N_BUCKETS - 1)) label_probs = {BUCKET_NAMES[i]: float(probs[i]) for i in range(N_BUCKETS)} summary = ( f"## AI editing score: {score:.2f} / 1.00\n" f"**Most likely:** {BUCKET_NAMES[bucket_idx]}\n\n" f"0.00 means the text reads as fully human-written; " f"1.00 means it reads as fully AI-generated. The score is the expected " f"value over the bucket distribution below." ) return summary, label_probs # Both examples are real rows from the EditLens test set (pangram/editlens_iclr), # not text written for this demo — a human_written review and an ai_generated review. HUMAN_EXAMPLE = ( "We enjoyed our first visit to enroute coffee. Place was very easy to find and " "had a very warm and cozy feel for the place. Walking in you quickly view the " "delicious looking pastries they have lined up along the counter. Trying those " "next visit. They had a large section of coffee choices and looks like they sell " "additional merchandise. Giving them a 5 for outstanding 1st impression.\n\n" "Note for owner: possible define the cash register station or separate it a bit " "more from coffee pickup station. Seemed a bit crowded in that area." ) AI_EXAMPLE = ( "McDonald’s offers a consistent fast-food experience with widely recognized " "favorites like the Big Mac, fries, and McNuggets. The service is generally " "quick, and the menu provides options for breakfast, lunch, and dinner, as well " "as some healthier choices. While the food isn’t gourmet, it’s reliably " "tasty and convenient, making it a popular choice for people on the go. Prices " "are reasonable, but quality can vary slightly between locations. Overall, " "McDonald’s delivers on expectations for fast, accessible, and familiar meals." ) with gr.Blocks(title="EditLens") as demo: gr.Markdown( """ # EditLens — Quantifying the Extent of AI Editing in Text EditLens scores text on a continuous scale according to *how much AI intervention* it contains, rather than a simple human-vs-AI binary. This demo runs the RoBERTa-large model from the [EditLens ICLR 2026 paper](https://arxiv.org/abs/2510.03154) ([code](https://github.com/pangramlabs/EditLens) · [model](https://huggingface.co/pangram/editlens_roberta-large)). Paste a passage (≥ 50 words works best) and get an AI-editing score in [0, 1]. """ ) with gr.Row(): with gr.Column(): text_in = gr.Textbox( label="Text to analyze", placeholder="Paste a paragraph or more here…", lines=12, ) btn = gr.Button("Analyze", variant="primary") gr.Examples( examples=[[HUMAN_EXAMPLE], [AI_EXAMPLE]], inputs=text_in, label="Examples (real human-written / AI-generated reviews from the EditLens test set)", ) with gr.Column(): summary_out = gr.Markdown() label_out = gr.Label(label="Bucket distribution", num_top_classes=N_BUCKETS) btn.click(analyze, inputs=text_in, outputs=[summary_out, label_out]) text_in.submit(analyze, inputs=text_in, outputs=[summary_out, label_out]) gr.Markdown( "Model and dataset are licensed CC BY-NC-SA 4.0. EditLens is a research " "system and can be wrong — do not use it to make consequential judgments " "about real people." ) demo.launch()