GigaCheck / app.py
iitolstykh's picture
update app
0cf2685
Raw
History Blame Contribute Delete
7.88 kB
"""GigaCheck demo: classify text and localize AI-written spans.
Two tabs back two Mistral-7B models from the LLMTrace / GigaCheck project:
a binary human/AI classifier and a fine-grained AI-span detector.
"""
from __future__ import annotations
import gradio as gr
from loguru import logger
from models import classify_text, detect_intervals
from rendering import build_classifier_card, build_detector_card
from styles import CSS, THEME
from validation import CONFIG, validate_text
LANG_NOTE = (
'<div class="gc-lang-note">⚠️ These models work with <b>English</b> and '
f"<b>Russian</b> text only. Enter between {CONFIG.min_words} and "
f"{CONFIG.max_words} words.</div>"
)
CLASSIFIER_EXAMPLES = [
# human
[
"Netflix has laid off around 300 people across a number of departments. The company also laid off 150 workers back in May."
],
# ai
[
"@GOP @PamBondi Maybe we should talk about Gordon Sondland, who had ZERO experience or qualifications to be an Ambassador to the EU but since he donated a million dollars to trump he got the job anyway. I'm outraged!"
],
# ai
[
"В прекрасное утро, поразмыслив о том, как множество радостей приносит нам природа, я вспомнила о своей милой питомице, мадам Леске, и о ее необъяснимой страсти к дыне. \"Как же,\" подумала я, \"этот нежный плод вмещает в себе не только великолепие вкуса, но и целую кладезь полезных веществ\". И вдруг осенило меня: разве не заслуживает моя преданная спутница опробовать хоть крохотный кусочек этого плода?"
],
]
CLASSIFIER_EXAMPLE_LABELS = ["EN · human", "EN · AI", "RU · AI"]
DETECTOR_EXAMPLES = [
[
"The critic's review of the recent publication was scathing. The book failed miserably in portraying the harmful subjective discourses associated with the hegemony of the political system."
],
[
"Университет Шеффилд Холлем имеет в Шеффилде два кампуса. Один из них — это Городской кампус, который находится в центре города. Второй кампус, Коллегиальный, расположен на юго-западе Шеффилда. Оба кампуса предлагают современные учебные здания и ресурсы для студентов. История университета Шеффилд Холлем началась в 1843 году с основания Шеффилдской школы дизайна. В 1960-х годах несколько независимых колледжей (включая Школу дизайна) объединились в Шеффилдский Политехникум, с 1976 года — Шеффилдский городской политехникум, с 1992 года — Университет Шеффилд Холлем."
],
]
DETECTOR_EXAMPLE_LABELS = ["EN · example", "RU · example"]
def update_counter(text: str) -> str:
"""Render a live word counter, flagging out-of-bounds input in red.
Args:
text: Current textbox content.
Returns:
An HTML snippet showing the word count and any validation message.
"""
result = validate_text(text)
bad = (not result.ok) and result.word_count > 0
css_class = "gc-counter gc-bad" if bad else "gc-counter"
note = f" — {result.message}" if bad else ""
return (
f'<div class="{css_class}">{result.word_count} / '
f"{CONFIG.max_words} words{note}</div>"
)
def run_classifier(text: str) -> str:
"""Validate then classify the text, returning a result card.
Args:
text: User-supplied text.
Returns:
HTML for the classifier result card.
Raises:
gr.Error: If the text falls outside the configured word bounds.
"""
result = validate_text(text)
if not result.ok:
raise gr.Error(result.message)
label, p_human, p_ai = classify_text(text)
logger.info("classifier: label={} p_human={:.3f}", label, p_human)
return build_classifier_card(label, p_human, p_ai)
def run_detector(text: str, conf_threshold: float) -> str:
"""Validate then run AI-span detection, returning a result card.
Args:
text: User-supplied text.
conf_threshold: Confidence threshold for keeping a span.
Returns:
HTML for the detector result card.
Raises:
gr.Error: If the text falls outside the configured word bounds.
"""
result = validate_text(text)
if not result.ok:
raise gr.Error(result.message)
intervals = detect_intervals(text, conf_threshold)
logger.info("detector: {} interval(s) at thresh={}", len(intervals), conf_threshold)
return build_detector_card(text, intervals)
def build_classifier_tab() -> None:
"""Build the classifier tab UI and wire its events."""
gr.HTML(LANG_NOTE)
text_in = gr.Textbox(
label="Text",
placeholder="Paste English or Russian text to classify…",
lines=8,
)
counter = gr.HTML(update_counter(""))
analyze_btn = gr.Button("Analyze", variant="primary")
output = gr.HTML()
gr.Examples(
examples=CLASSIFIER_EXAMPLES,
inputs=[text_in],
label="Examples",
example_labels=CLASSIFIER_EXAMPLE_LABELS,
)
text_in.change(update_counter, inputs=[text_in], outputs=[counter])
analyze_btn.click(run_classifier, inputs=[text_in], outputs=[output])
def build_detector_tab() -> None:
"""Build the detector tab UI and wire its events."""
gr.HTML(LANG_NOTE)
text_in = gr.Textbox(
label="Text",
placeholder="Paste English or Russian text to scan for AI-written spans…",
lines=8,
)
counter = gr.HTML(update_counter(""))
conf = gr.Slider(
label="Confidence threshold",
minimum=0.0,
maximum=1.0,
value=CONFIG.default_conf_threshold,
step=0.05,
)
detect_btn = gr.Button("Detect", variant="primary")
output = gr.HTML()
gr.Examples(
examples=DETECTOR_EXAMPLES,
inputs=[text_in],
label="Examples",
example_labels=DETECTOR_EXAMPLE_LABELS,
)
text_in.change(update_counter, inputs=[text_in], outputs=[counter])
detect_btn.click(run_detector, inputs=[text_in, conf], outputs=[output])
def build_demo() -> gr.Blocks:
"""Assemble the full Gradio demo.
Returns:
The configured :class:`gradio.Blocks` application.
"""
with gr.Blocks(theme=THEME, css=CSS, title="GigaCheck") as demo:
gr.HTML(
'<div id="gc-header"><h1>GigaCheck</h1>'
"<p>Detect AI-generated text — binary classification and "
"fine-grained span localization</p></div>"
)
with gr.Tabs():
with gr.Tab("Classifier"):
gr.Markdown(
"Binary **human / AI** classifier — "
"[GigaCheck-Classifier-Multi]"
"(https://huggingface.co/iitolstykh/GigaCheck-Classifier-Multi)"
)
build_classifier_tab()
with gr.Tab("Detector"):
gr.Markdown(
"Fine-grained **AI-span detector** — "
"[GigaCheck-Detector-Multi]"
"(https://huggingface.co/iitolstykh/GigaCheck-Detector-Multi)"
)
build_detector_tab()
return demo
if __name__ == "__main__":
build_demo().queue(max_size=50).launch()