"""
Validation module constants.

This module defines all magic numbers used across the validation system,
providing clear documentation and a single source of truth for tunable
parameters.
"""

# =============================================================================
# Text Quality Thresholds
# =============================================================================

# Minimum ratio of ASCII characters for valid text output
# Below this threshold, output is considered corrupted or non-English
MIN_ASCII_RATIO: float = 0.7

# Maximum ratio of repeated n-grams in text
# Above this threshold, output is considered too repetitive
MAX_REPETITION_RATIO: float = 0.5

# Minimum average sample length (characters) for valid output
# Shorter outputs may indicate model collapse or truncation issues
MIN_SAMPLE_LENGTH: float = 5.0

# Grammar score threshold for quality alerts
# Scores below this trigger warnings in validation callbacks
LOW_GRAMMAR_SCORE_THRESHOLD: float = 0.5

# Target grammar score for production-ready outputs
# This is the aspirational quality level (>95% grammatically correct)
TARGET_GRAMMAR_SCORE: float = 0.95

# =============================================================================
# LanguageTool API Configuration
# =============================================================================

# Maximum text length sent to LanguageTool API (characters)
# Longer texts are truncated to prevent timeouts and rate limit issues
MAX_API_TEXT_LENGTH: int = 500

# Request timeout for LanguageTool API calls (seconds)
# Prevents hanging on slow/unresponsive API
API_TIMEOUT_SECONDS: float = 2.0

# Minimum interval between API requests (seconds)
# Rate limiting for free tier: 20 req/min = 3s interval
API_MIN_INTERVAL: float = 3.0

# Health check timeout for LanguageTool availability (seconds)
API_HEALTH_CHECK_TIMEOUT: float = 1.0

# =============================================================================
# Text Generation Parameters
# =============================================================================

# Maximum length for validation sample generation (tokens)
# Short generations for fast validation (<1s overhead)
VALIDATION_MAX_LENGTH: int = 30

# Temperature for validation sample generation
# Moderate temperature balances diversity and coherence
VALIDATION_TEMPERATURE: float = 0.8

# Maximum length for knowledge validation answers (tokens)
# Knowledge questions expect concise answers
KNOWLEDGE_MAX_LENGTH: int = 15

# Temperature for knowledge validation (more deterministic)
# Lower temperature for fact-based answers
KNOWLEDGE_TEMPERATURE: float = 0.5

# =============================================================================
# History and Caching
# =============================================================================

# Maximum number of sample outputs stored in history (deque size)
# Bounded to prevent unbounded memory growth
SAMPLE_HISTORY_SIZE: int = 20

# Maximum number of grammar scores stored for trend analysis
# Larger window enables longer-term trend detection
GRAMMAR_HISTORY_SIZE: int = 50

# Maximum number of timestamps stored for rate limiting
TIMESTAMP_HISTORY_SIZE: int = 50

# Default window size for trend analysis (number of recent scores)
TREND_ANALYSIS_WINDOW: int = 5

# =============================================================================
# Validation Callback Configuration
# =============================================================================

# Default frequency for fast validation (every N steps)
FAST_VALIDATION_FREQUENCY: int = 100

# Default frequency for grammar validation (every N steps)
GRAMMAR_VALIDATION_FREQUENCY: int = 200

# Maximum number of samples to display in TensorBoard logs
MAX_TENSORBOARD_SAMPLES: int = 3

# =============================================================================
# N-gram Analysis
# =============================================================================

# N-gram size for repetition detection
# 3-grams balance sensitivity to repetition vs. false positives
NGRAM_SIZE: int = 3

# Minimum text length for n-gram analysis (characters)
# Shorter texts use fallback repetition score
MIN_NGRAM_TEXT_LENGTH: int = 10

# Fallback repetition score for too-short texts
# Assumes short texts are maximally repetitive (conservative)
FALLBACK_REPETITION_SCORE: float = 1.0

# =============================================================================
# Fallback/Error Values
# =============================================================================

# Fallback grammar score when API is unavailable
# Set to 0.0 (worst) to signal quality gate failures when LanguageTool unavailable
FALLBACK_GRAMMAR_SCORE: float = 0.0

# Fallback error count for failed validations
FALLBACK_ERROR_COUNT: int = 999

# Truncation length for error logging (characters)
ERROR_LOG_TRUNCATE_LENGTH: int = 50