GigaCheck / validation.py
iitolstykh's picture
initial commit
7c51531
Raw
History Blame Contribute Delete
2.74 kB
"""Shared configuration and input validation for the GigaCheck demo."""
from __future__ import annotations
from pydantic import BaseModel
class DemoConfig(BaseModel):
"""Static configuration shared across the demo.
Attributes:
classifier_model_id: Hub id of the human/AI classifier model.
detector_model_id: Hub id of the AI-span detector model.
min_words: Minimum number of words an input must contain.
max_words: Maximum number of words an input may contain.
human_color: Hex color used for the "human" portion of the bar.
ai_color: Hex color used for the "AI" portion of the bar and span highlights.
default_conf_threshold: Default confidence threshold for the detector.
"""
classifier_model_id: str = "iitolstykh/GigaCheck-Classifier-Multi"
detector_model_id: str = "iitolstykh/GigaCheck-Detector-Multi"
min_words: int = 15
max_words: int = 512
human_color: str = "#2FA66B"
ai_color: str = "#E5533C"
default_conf_threshold: float = 0.5
CONFIG = DemoConfig()
class ValidationResult(BaseModel):
"""Outcome of validating a piece of input text.
Attributes:
ok: Whether the text satisfies the word-count bounds.
message: Human-readable explanation when ``ok`` is ``False``.
word_count: Number of words found in the text.
"""
ok: bool
message: str
word_count: int
def count_words(text: str) -> int:
"""Count whitespace-separated words in a string.
Args:
text: Arbitrary user input.
Returns:
The number of whitespace-separated tokens.
"""
return len(text.split())
def validate_text(text: str, config: DemoConfig = CONFIG) -> ValidationResult:
"""Validate that ``text`` falls within the configured word bounds.
Args:
text: User-supplied text to analyze.
config: Demo configuration providing the word bounds.
Returns:
A :class:`ValidationResult` describing whether the text is acceptable.
"""
word_count = count_words(text)
if word_count < config.min_words:
return ValidationResult(
ok=False,
message=(
f"Text is too short: {word_count} word(s). "
f"Please enter at least {config.min_words} words."
),
word_count=word_count,
)
if word_count > config.max_words:
return ValidationResult(
ok=False,
message=(
f"Text is too long: {word_count} words. "
f"Please keep it under {config.max_words} words."
),
word_count=word_count,
)
return ValidationResult(ok=True, message="", word_count=word_count)