# DEPENDENCIES import re import numpy as np from typing import Any from typing import Dict from typing import List from loguru import logger from collections import Counter from metrics.base_metric import MetricResult from metrics.base_metric import StatisticalMetric from config.threshold_config import Domain from config.threshold_config import get_threshold_for_domain class StructuralMetric(StatisticalMetric): """ Structural analysis of text patterns with domain-aware thresholds Analyzes various structural features including: - Sentence length distribution and variance - Word length distribution - Punctuation patterns - Vocabulary richness - Burstiness (variation in patterns) """ def __init__(self): super().__init__(name = "structural", description = "Structural and pattern analysis of the text", ) def compute(self, text: str, **kwargs) -> MetricResult: """ Compute structural features with domain aware thresholds Arguments: ---------- text { str } : Input text to analyze **kwargs : Additional parameters including 'domain' Returns: -------- { MetricResult } : MetricResult with AI/Human probabilities """ try: # Get domain-specific thresholds domain = kwargs.get('domain', Domain.GENERAL) domain_thresholds = get_threshold_for_domain(domain) structural_thresholds = domain_thresholds.structural # Extract all structural features features = self._extract_features(text) # Calculate raw AI probability based on features raw_ai_prob, confidence = self._calculate_ai_probability(features) # Apply domain-specific thresholds to convert raw score to probabilities ai_prob, human_prob, mixed_prob = self._apply_domain_thresholds(raw_ai_prob, structural_thresholds, features) # Apply confidence multiplier from domain thresholds confidence *= structural_thresholds.confidence_multiplier confidence = max(0.0, min(1.0, confidence)) return MetricResult(metric_name = self.name, ai_probability = ai_prob, human_probability = human_prob, mixed_probability = mixed_prob, confidence = confidence, details = {**features, 'domain_used' : domain.value, 'ai_threshold' : structural_thresholds.ai_threshold, 'human_threshold' : structural_thresholds.human_threshold, 'raw_score' : raw_ai_prob, }, ) except Exception as e: logger.error(f"Error in {self.name} computation: {repr(e)}") return MetricResult(metric_name = self.name, ai_probability = 0.5, human_probability = 0.5, mixed_probability = 0.0, confidence = 0.0, error = str(e), ) def _apply_domain_thresholds(self, raw_score: float, thresholds: Any, features: Dict[str, Any]) -> tuple: """ Apply domain-specific thresholds to convert raw score to probabilities """ ai_threshold = thresholds.ai_threshold # Domain-specific human_threshold = thresholds.human_threshold # Domain-specific # Calculate probabilities based on threshold distances if (raw_score >= ai_threshold): # Above AI threshold - strongly AI distance_from_threshold = raw_score - ai_threshold ai_prob = 0.7 + (distance_from_threshold * 0.3) # 0.7 to 1.0 human_prob = 0.3 - (distance_from_threshold * 0.3) # 0.3 to 0.0 elif (raw_score <= human_threshold): # Below human threshold - strongly human distance_from_threshold = human_threshold - raw_score ai_prob = 0.3 - (distance_from_threshold * 0.3) # 0.3 to 0.0 human_prob = 0.7 + (distance_from_threshold * 0.3) # 0.7 to 1.0 else: # Between thresholds - uncertain zone range_width = ai_threshold - human_threshold if (range_width > 0): position_in_range = (raw_score - human_threshold) / range_width ai_prob = 0.3 + (position_in_range * 0.4) # 0.3 to 0.7 human_prob = 0.7 - (position_in_range * 0.4) # 0.7 to 0.3 else: ai_prob = 0.5 human_prob = 0.5 # Ensure probabilities are valid ai_prob = max(0.0, min(1.0, ai_prob)) human_prob = max(0.0, min(1.0, human_prob)) # Calculate mixed probability based on statistical patterns mixed_prob = self._calculate_mixed_probability(features) # Normalize to sum to 1.0 total = ai_prob + human_prob + mixed_prob if (total > 0): ai_prob /= total human_prob /= total mixed_prob /= total return ai_prob, human_prob, mixed_prob def _extract_features(self, text: str) -> Dict[str, Any]: """ Extract all structural features from text """ # Basic tokenization sentences = self._split_sentences(text) words = self._tokenize_words(text) # Sentence-level features sentence_lengths = [len(s.split()) for s in sentences] avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0 std_sentence_length = np.std(sentence_lengths) if len(sentence_lengths) > 1 else 0 # Word-level features word_lengths = [len(w) for w in words] avg_word_length = np.mean(word_lengths) if word_lengths else 0 std_word_length = np.std(word_lengths) if len(word_lengths) > 1 else 0 # Vocabulary richness vocabulary_size = len(set(words)) type_token_ratio = vocabulary_size / len(words) if words else 0 # Punctuation analysis punctuation_density = self._calculate_punctuation_density(text) comma_frequency = text.count(',') / len(words) if words else 0 # Burstiness (variation in patterns) burstiness = self._calculate_burstiness(sentence_lengths) # Uniformity scores length_uniformity = 1.0 - (std_sentence_length / avg_sentence_length) if avg_sentence_length > 0 else 0 length_uniformity = max(0, min(1, length_uniformity)) # Readability approximation (simplified) readability = self._calculate_readability(text, sentences, words) # Pattern detection repetition_score = self._detect_repetitive_patterns(words) # N-gram analysis bigram_diversity = self._calculate_ngram_diversity(words, n = 2) trigram_diversity = self._calculate_ngram_diversity(words, n = 3) return {"avg_sentence_length" : round(avg_sentence_length, 2), "std_sentence_length" : round(std_sentence_length, 2), "avg_word_length" : round(avg_word_length, 2), "std_word_length" : round(std_word_length, 2), "vocabulary_size" : vocabulary_size, "type_token_ratio" : round(type_token_ratio, 4), "punctuation_density" : round(punctuation_density, 4), "comma_frequency" : round(comma_frequency, 4), "burstiness_score" : round(burstiness, 4), "length_uniformity" : round(length_uniformity, 4), "readability_score" : round(readability, 2), "repetition_score" : round(repetition_score, 4), "bigram_diversity" : round(bigram_diversity, 4), "trigram_diversity" : round(trigram_diversity, 4), "num_sentences" : len(sentences), "num_words" : len(words), } def _split_sentences(self, text: str) -> List[str]: """ Split text into sentences """ # Simple sentence splitting sentences = re.split(r'[.!?]+', text) return [s.strip() for s in sentences if s.strip()] def _tokenize_words(self, text: str) -> List[str]: """ Tokenize text into words """ # Simple word tokenization words = re.findall(r'\b\w+\b', text.lower()) return words def _calculate_punctuation_density(self, text: str) -> float: """ Calculate punctuation density """ punctuation = re.findall(r'[^\w\s]', text) total_chars = len(text) return len(punctuation) / total_chars if total_chars > 0 else 0 def _calculate_burstiness(self, values: List[float]) -> float: """ Calculate burstiness score (variation in patterns) Higher burstiness typically indicates human writing """ if (len(values) < 2): return 0.0 mean_val = np.mean(values) std_val = np.std(values) if (mean_val == 0): return 0.0 # Coefficient of variation cv = std_val / mean_val # Normalize to 0-1 range burstiness = min(1.0, cv / 2.0) return burstiness def _calculate_readability(self, text: str, sentences: List[str], words: List[str]) -> float: """ Calculate simplified readability score (Approximation of Flesch Reading Ease) """ if not sentences or not words: return 0.0 total_sentences = len(sentences) total_words = len(words) total_syllables = sum(self._count_syllables(word) for word in words) # Flesch Reading Ease approximation if ((total_sentences > 0) and (total_words > 0)): score = 206.835 - 1.015 * (total_words / total_sentences) - 84.6 * (total_syllables / total_words) return max(0, min(100, score)) # Neutral score return 50.0 def _count_syllables(self, word: str) -> int: """ Approximate syllable count for a word """ word = word.lower() vowels = 'aeiouy' syllable_count = 0 previous_was_vowel = False for char in word: is_vowel = char in vowels if is_vowel and not previous_was_vowel: syllable_count += 1 previous_was_vowel = is_vowel # Adjust for silent 'e' if (word.endswith('e')): syllable_count -= 1 # Ensure at least one syllable if (syllable_count == 0): syllable_count = 1 return syllable_count def _detect_repetitive_patterns(self, words: List[str]) -> float: """ Detect repetitive patterns in text AI text sometimes shows more repetition """ if (len(words) < 10): return 0.0 # Check for repeated words in close proximity window_size = 10 repetitions = 0 for i in range(len(words) - window_size): window = words[i:i + window_size] word_counts = Counter(window) # Count words that appear more than once repetitions += sum(1 for count in word_counts.values() if count > 1) # Normalize max_repetitions = (len(words) - window_size) * window_size repetition_score = repetitions / max_repetitions if max_repetitions > 0 else 0 return repetition_score def _calculate_ngram_diversity(self, words: List[str], n: int = 2) -> float: """ Calculate n-gram diversity Higher diversity often indicates human writing """ if (len(words) < n): return 0.0 # Generate n-grams ngrams = [tuple(words[i:i+n]) for i in range(len(words) - n + 1)] # Calculate diversity as ratio of unique n-grams to total n-grams unique_ngrams = len(set(ngrams)) total_ngrams = len(ngrams) diversity = unique_ngrams / total_ngrams if total_ngrams > 0 else 0 return diversity def _calculate_ai_probability(self, features: Dict[str, Any]) -> tuple: """ Calculate AI probability based on structural features Returns raw score and confidence """ ai_indicators = list() # Low burstiness suggests AI (AI is more consistent) if (features['burstiness_score'] < 0.3): # Strong AI indicator ai_indicators.append(0.7) elif (features['burstiness_score'] < 0.5): # Moderate AI indicator ai_indicators.append(0.5) else: # Weak AI indicator ai_indicators.append(0.3) # High length uniformity suggests AI if (features['length_uniformity'] > 0.7): # Strong AI indicator ai_indicators.append(0.7) elif (features['length_uniformity'] > 0.5): # Moderate AI indicator ai_indicators.append(0.5) else: # Weak AI indicator ai_indicators.append(0.3) # Low n-gram diversity suggests AI if (features['bigram_diversity'] < 0.7): # Moderate AI indicator ai_indicators.append(0.6) else: # Weak AI indicator ai_indicators.append(0.4) # Moderate readability suggests AI (AI often produces "perfect" readability) if (60 <= features['readability_score'] <= 75): # Moderate AI indicator ai_indicators.append(0.6) else: # Weak AI indicator ai_indicators.append(0.4) # Low repetition suggests AI (AI avoids excessive repetition) if (features['repetition_score'] < 0.1): # Moderate AI indicator ai_indicators.append(0.6) elif (features['repetition_score'] < 0.2): # Neutral ai_indicators.append(0.5) else: # Weak AI indicator ai_indicators.append(0.3) # Calculate raw score and confidence raw_score = np.mean(ai_indicators) if ai_indicators else 0.5 confidence = 1.0 - (np.std(ai_indicators) / 0.5) if ai_indicators else 0.5 confidence = max(0.1, min(0.9, confidence)) return raw_score, confidence def _calculate_mixed_probability(self, features: Dict[str, Any]) -> float: """ Calculate probability of mixed AI/Human content based on structural patterns """ mixed_indicators = [] # High burstiness suggests mixed content (inconsistent patterns) if features['burstiness_score'] > 0.6: mixed_indicators.append(0.4) # Inconsistent sentence lengths might indicate mixing if (features['std_sentence_length'] > features['avg_sentence_length'] * 0.8): mixed_indicators.append(0.3) # Extreme values in multiple features might indicate mixing extreme_features = 0 if (features['type_token_ratio'] < 0.3) or (features['type_token_ratio'] > 0.9): extreme_features += 1 if (features['readability_score'] < 20) or (features['readability_score'] > 90): extreme_features += 1 if (extreme_features >= 2): mixed_indicators.append(0.3) return min(0.3, np.mean(mixed_indicators)) if mixed_indicators else 0.0 # Export __all__ = ["StructuralMetric"]