Spaces:

satyakimitra
/

text_auth_ai

Running

File size: 16,946 Bytes

8205d6b

# DEPENDENCIES
import re
import numpy as np
from typing import Any
from typing import Dict
from typing import List
from loguru import logger
from collections import Counter
from metrics.base_metric import MetricResult
from metrics.base_metric import StatisticalMetric 
from config.threshold_config import Domain
from config.threshold_config import get_threshold_for_domain


class StructuralMetric(StatisticalMetric):
    """
    Structural analysis of text patterns with domain-aware thresholds
    
    Analyzes various structural features including:
    - Sentence length distribution and variance
    - Word length distribution  
    - Punctuation patterns
    - Vocabulary richness
    - Burstiness (variation in patterns)
    """
    def __init__(self):
        super().__init__(name        = "structural",
                         description = "Structural and pattern analysis of the text",
                        )
    

    def compute(self, text: str, **kwargs) -> MetricResult:
        """
        Compute structural features with domain aware thresholds
        
        Arguments:
        ----------
            text     { str } : Input text to analyze

            **kwargs         : Additional parameters including 'domain'
            
        Returns:
        --------
            { MetricResult } : MetricResult with AI/Human probabilities
        """
        try:
            # Get domain-specific thresholds
            domain                          = kwargs.get('domain', Domain.GENERAL)
            domain_thresholds               = get_threshold_for_domain(domain)
            structural_thresholds           = domain_thresholds.structural
            
            # Extract all structural features
            features                        = self._extract_features(text)
            
            # Calculate raw AI probability based on features
            raw_ai_prob, confidence         = self._calculate_ai_probability(features)
            
            # Apply domain-specific thresholds to convert raw score to probabilities
            ai_prob, human_prob, mixed_prob = self._apply_domain_thresholds(raw_ai_prob, structural_thresholds, features)
            
            # Apply confidence multiplier from domain thresholds
            confidence                     *= structural_thresholds.confidence_multiplier
            confidence                      = max(0.0, min(1.0, confidence))
            
            return MetricResult(metric_name       = self.name,
                                ai_probability    = ai_prob,
                                human_probability = human_prob,
                                mixed_probability = mixed_prob,
                                confidence        = confidence,
                                details           = {**features, 
                                                     'domain_used'     : domain.value,
                                                     'ai_threshold'    : structural_thresholds.ai_threshold,
                                                     'human_threshold' : structural_thresholds.human_threshold,
                                                     'raw_score'       : raw_ai_prob,
                                                    },
                               )
            
        except Exception as e:
            logger.error(f"Error in {self.name} computation: {repr(e)}")
            return MetricResult(metric_name       = self.name,
                                ai_probability    = 0.5,
                                human_probability = 0.5,
                                mixed_probability = 0.0,
                                confidence        = 0.0,
                                error             = str(e),
                               )
    


    def _apply_domain_thresholds(self, raw_score: float, thresholds: Any, features: Dict[str, Any]) -> tuple:
        """
        Apply domain-specific thresholds to convert raw score to probabilities
        """
        ai_threshold    = thresholds.ai_threshold    # Domain-specific
        human_threshold = thresholds.human_threshold # Domain-specific
        
        # Calculate probabilities based on threshold distances
        if (raw_score >= ai_threshold):
            # Above AI threshold - strongly AI
            distance_from_threshold = raw_score - ai_threshold
            ai_prob                 = 0.7 + (distance_from_threshold * 0.3)  # 0.7 to 1.0
            human_prob              = 0.3 - (distance_from_threshold * 0.3)  # 0.3 to 0.0
        
        elif (raw_score <= human_threshold):
            # Below human threshold - strongly human
            distance_from_threshold = human_threshold - raw_score
            ai_prob                 = 0.3 - (distance_from_threshold * 0.3)  # 0.3 to 0.0
            human_prob              = 0.7 + (distance_from_threshold * 0.3)  # 0.7 to 1.0
        
        else:
            # Between thresholds - uncertain zone
            range_width = ai_threshold - human_threshold
            
            if (range_width > 0):
                position_in_range = (raw_score - human_threshold) / range_width
                ai_prob           = 0.3 + (position_in_range * 0.4)  # 0.3 to 0.7
                human_prob        = 0.7 - (position_in_range * 0.4)  # 0.7 to 0.3
            
            else:
                ai_prob    = 0.5
                human_prob = 0.5
        
        # Ensure probabilities are valid
        ai_prob    = max(0.0, min(1.0, ai_prob))
        human_prob = max(0.0, min(1.0, human_prob))
        
        # Calculate mixed probability based on statistical patterns
        mixed_prob = self._calculate_mixed_probability(features)
        
        # Normalize to sum to 1.0
        total      = ai_prob + human_prob + mixed_prob
        
        if (total > 0):
            ai_prob    /= total
            human_prob /= total
            mixed_prob /= total
        
        return ai_prob, human_prob, mixed_prob

    
    def _extract_features(self, text: str) -> Dict[str, Any]:
        """
        Extract all structural features from text
        """
        # Basic tokenization
        sentences           = self._split_sentences(text)
        words               = self._tokenize_words(text)
        
        # Sentence-level features
        sentence_lengths    = [len(s.split()) for s in sentences]
        avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
        std_sentence_length = np.std(sentence_lengths) if len(sentence_lengths) > 1 else 0
        
        # Word-level features
        word_lengths        = [len(w) for w in words]
        avg_word_length     = np.mean(word_lengths) if word_lengths else 0
        std_word_length     = np.std(word_lengths) if len(word_lengths) > 1 else 0
        
        # Vocabulary richness
        vocabulary_size     = len(set(words))
        type_token_ratio    = vocabulary_size / len(words) if words else 0
        
        # Punctuation analysis
        punctuation_density = self._calculate_punctuation_density(text)
        comma_frequency     = text.count(',') / len(words) if words else 0
        
        # Burstiness (variation in patterns)
        burstiness          = self._calculate_burstiness(sentence_lengths)
        
        # Uniformity scores
        length_uniformity   = 1.0 - (std_sentence_length / avg_sentence_length) if avg_sentence_length > 0 else 0
        length_uniformity   = max(0, min(1, length_uniformity))
        
        # Readability approximation (simplified)
        readability         = self._calculate_readability(text, sentences, words)
        
        # Pattern detection
        repetition_score    = self._detect_repetitive_patterns(words)
        
        # N-gram analysis
        bigram_diversity    = self._calculate_ngram_diversity(words, n = 2)
        trigram_diversity   = self._calculate_ngram_diversity(words, n = 3)
        
        return {"avg_sentence_length" : round(avg_sentence_length, 2),
                "std_sentence_length" : round(std_sentence_length, 2),
                "avg_word_length"     : round(avg_word_length, 2),
                "std_word_length"     : round(std_word_length, 2),
                "vocabulary_size"     : vocabulary_size,
                "type_token_ratio"    : round(type_token_ratio, 4),
                "punctuation_density" : round(punctuation_density, 4),
                "comma_frequency"     : round(comma_frequency, 4),
                "burstiness_score"    : round(burstiness, 4),
                "length_uniformity"   : round(length_uniformity, 4),
                "readability_score"   : round(readability, 2),
                "repetition_score"    : round(repetition_score, 4),
                "bigram_diversity"    : round(bigram_diversity, 4),
                "trigram_diversity"   : round(trigram_diversity, 4),
                "num_sentences"       : len(sentences),
                "num_words"           : len(words),
               }

    
    def _split_sentences(self, text: str) -> List[str]:
        """
        Split text into sentences
        """
        # Simple sentence splitting
        sentences = re.split(r'[.!?]+', text)
        
        return [s.strip() for s in sentences if s.strip()]
    

    def _tokenize_words(self, text: str) -> List[str]:
        """
        Tokenize text into words
        """
        # Simple word tokenization
        words = re.findall(r'\b\w+\b', text.lower())
        
        return words
    

    def _calculate_punctuation_density(self, text: str) -> float:
        """
        Calculate punctuation density
        """
        punctuation = re.findall(r'[^\w\s]', text)
        total_chars = len(text)
        
        return len(punctuation) / total_chars if total_chars > 0 else 0
    

    def _calculate_burstiness(self, values: List[float]) -> float:
        """
        Calculate burstiness score (variation in patterns)
        Higher burstiness typically indicates human writing
        """
        if (len(values) < 2):
            return 0.0
        
        mean_val   = np.mean(values)
        std_val    = np.std(values)
        
        if (mean_val == 0):
            return 0.0
        
        # Coefficient of variation
        cv         = std_val / mean_val
        
        # Normalize to 0-1 range
        burstiness = min(1.0, cv / 2.0)
        
        return burstiness
    

    def _calculate_readability(self, text: str, sentences: List[str], words: List[str]) -> float:
        """
        Calculate simplified readability score
        (Approximation of Flesch Reading Ease)
        """
        if not sentences or not words:
            return 0.0
        
        total_sentences = len(sentences)
        total_words     = len(words)
        total_syllables = sum(self._count_syllables(word) for word in words)
        
        # Flesch Reading Ease approximation
        if ((total_sentences > 0) and (total_words > 0)):
            score = 206.835 - 1.015 * (total_words / total_sentences) - 84.6 * (total_syllables / total_words)
            return max(0, min(100, score))
       
        # Neutral score
        return 50.0 
    

    def _count_syllables(self, word: str) -> int:
        """
        Approximate syllable count for a word
        """
        word               = word.lower()
        vowels             = 'aeiouy'
        syllable_count     = 0
        previous_was_vowel = False
        
        for char in word:
            is_vowel = char in vowels
            if is_vowel and not previous_was_vowel:
                syllable_count += 1
            
            previous_was_vowel = is_vowel
        
        # Adjust for silent 'e'
        if (word.endswith('e')):
            syllable_count -= 1
        
        # Ensure at least one syllable
        if (syllable_count == 0):
            syllable_count = 1
        
        return syllable_count
    

    def _detect_repetitive_patterns(self, words: List[str]) -> float:
        """
        Detect repetitive patterns in text
        AI text sometimes shows more repetition
        """
        if (len(words) < 10):
            return 0.0
        
        # Check for repeated words in close proximity
        window_size = 10
        repetitions = 0
        
        for i in range(len(words) - window_size):
            window       = words[i:i + window_size]
            word_counts  = Counter(window)
            # Count words that appear more than once
            repetitions += sum(1 for count in word_counts.values() if count > 1)
        
        # Normalize
        max_repetitions  = (len(words) - window_size) * window_size
        repetition_score = repetitions / max_repetitions if max_repetitions > 0 else 0
        
        return repetition_score

    
    def _calculate_ngram_diversity(self, words: List[str], n: int = 2) -> float:
        """
        Calculate n-gram diversity
        Higher diversity often indicates human writing
        """
        if (len(words) < n):
            return 0.0
        
        # Generate n-grams
        ngrams        = [tuple(words[i:i+n]) for i in range(len(words) - n + 1)]
        
        # Calculate diversity as ratio of unique n-grams to total n-grams
        unique_ngrams = len(set(ngrams))
        total_ngrams  = len(ngrams)
        
        diversity     = unique_ngrams / total_ngrams if total_ngrams > 0 else 0
        
        return diversity
    

    def _calculate_ai_probability(self, features: Dict[str, Any]) -> tuple:
        """
        Calculate AI probability based on structural features
        Returns raw score and confidence
        """
        ai_indicators = list()
        
        # Low burstiness suggests AI (AI is more consistent)
        if (features['burstiness_score'] < 0.3):
            # Strong AI indicator
            ai_indicators.append(0.7)

        elif (features['burstiness_score'] < 0.5):
            # Moderate AI indicator
            ai_indicators.append(0.5)

        else:
            # Weak AI indicator
            ai_indicators.append(0.3)
        
        # High length uniformity suggests AI
        if (features['length_uniformity'] > 0.7):
            # Strong AI indicator
            ai_indicators.append(0.7)

        elif (features['length_uniformity'] > 0.5):
            # Moderate AI indicator
            ai_indicators.append(0.5)

        else:
            # Weak AI indicator
            ai_indicators.append(0.3)
        
        # Low n-gram diversity suggests AI
        if (features['bigram_diversity'] < 0.7):
            # Moderate AI indicator
            ai_indicators.append(0.6)

        else:
            # Weak AI indicator
            ai_indicators.append(0.4)
        
        # Moderate readability suggests AI (AI often produces "perfect" readability)
        if (60 <= features['readability_score'] <= 75):
            # Moderate AI indicator
            ai_indicators.append(0.6)

        else:
            # Weak AI indicator
            ai_indicators.append(0.4)
        
        # Low repetition suggests AI (AI avoids excessive repetition)
        if (features['repetition_score'] < 0.1):
            # Moderate AI indicator
            ai_indicators.append(0.6)

        elif (features['repetition_score'] < 0.2):
            # Neutral
            ai_indicators.append(0.5)

        else:
            # Weak AI indicator
            ai_indicators.append(0.3)
        
        # Calculate raw score and confidence
        raw_score  = np.mean(ai_indicators) if ai_indicators else 0.5
        confidence = 1.0 - (np.std(ai_indicators) / 0.5) if ai_indicators else 0.5
        confidence = max(0.1, min(0.9, confidence))
        
        return raw_score, confidence
    

    def _calculate_mixed_probability(self, features: Dict[str, Any]) -> float:
        """
        Calculate probability of mixed AI/Human content based on structural patterns
        """
        mixed_indicators = []
        
        # High burstiness suggests mixed content (inconsistent patterns)
        if features['burstiness_score'] > 0.6:
            mixed_indicators.append(0.4)
        
        # Inconsistent sentence lengths might indicate mixing
        if (features['std_sentence_length'] > features['avg_sentence_length'] * 0.8):
            mixed_indicators.append(0.3)
        
        # Extreme values in multiple features might indicate mixing
        extreme_features = 0
        if (features['type_token_ratio'] < 0.3) or (features['type_token_ratio'] > 0.9):
            extreme_features += 1
        if (features['readability_score'] < 20) or (features['readability_score'] > 90):
            extreme_features += 1
        
        if (extreme_features >= 2):
            mixed_indicators.append(0.3)
        
        return min(0.3, np.mean(mixed_indicators)) if mixed_indicators else 0.0


# Export
__all__ = ["StructuralMetric"]