File size: 2,737 Bytes
7c51531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""Shared configuration and input validation for the GigaCheck demo."""

from __future__ import annotations

from pydantic import BaseModel


class DemoConfig(BaseModel):
    """Static configuration shared across the demo.

    Attributes:
        classifier_model_id: Hub id of the human/AI classifier model.
        detector_model_id: Hub id of the AI-span detector model.
        min_words: Minimum number of words an input must contain.
        max_words: Maximum number of words an input may contain.
        human_color: Hex color used for the "human" portion of the bar.
        ai_color: Hex color used for the "AI" portion of the bar and span highlights.
        default_conf_threshold: Default confidence threshold for the detector.
    """

    classifier_model_id: str = "iitolstykh/GigaCheck-Classifier-Multi"
    detector_model_id: str = "iitolstykh/GigaCheck-Detector-Multi"
    min_words: int = 15
    max_words: int = 512
    human_color: str = "#2FA66B"
    ai_color: str = "#E5533C"
    default_conf_threshold: float = 0.5


CONFIG = DemoConfig()


class ValidationResult(BaseModel):
    """Outcome of validating a piece of input text.

    Attributes:
        ok: Whether the text satisfies the word-count bounds.
        message: Human-readable explanation when ``ok`` is ``False``.
        word_count: Number of words found in the text.
    """

    ok: bool
    message: str
    word_count: int


def count_words(text: str) -> int:
    """Count whitespace-separated words in a string.

    Args:
        text: Arbitrary user input.

    Returns:
        The number of whitespace-separated tokens.
    """
    return len(text.split())


def validate_text(text: str, config: DemoConfig = CONFIG) -> ValidationResult:
    """Validate that ``text`` falls within the configured word bounds.

    Args:
        text: User-supplied text to analyze.
        config: Demo configuration providing the word bounds.

    Returns:
        A :class:`ValidationResult` describing whether the text is acceptable.
    """
    word_count = count_words(text)
    if word_count < config.min_words:
        return ValidationResult(
            ok=False,
            message=(
                f"Text is too short: {word_count} word(s). "
                f"Please enter at least {config.min_words} words."
            ),
            word_count=word_count,
        )
    if word_count > config.max_words:
        return ValidationResult(
            ok=False,
            message=(
                f"Text is too long: {word_count} words. "
                f"Please keep it under {config.max_words} words."
            ),
            word_count=word_count,
        )
    return ValidationResult(ok=True, message="", word_count=word_count)