""" Essay-level dyslexic writing pattern profiling. This module aggregates sentence-level dyslexic writing patterns into dominance-based essay profiles. """ import pandas as pd def assign_essay_ids(df: pd.DataFrame, essay_size: int = 5) -> pd.DataFrame: """ Assign essay IDs to sentence-level data using fixed-size grouping. Parameters ---------- df : pd.DataFrame DataFrame containing sentence-level patterns. essay_size : int Number of sentences per essay abstraction. Returns ------- pd.DataFrame DataFrame with an added 'essay_id' column. """ df = df.copy() df["essay_id"] = df.index // essay_size return df def profile_essays(df: pd.DataFrame) -> pd.DataFrame: """ Aggregate sentence-level patterns into essay-level dominance profiles. Parameters ---------- df : pd.DataFrame DataFrame containing 'essay_id' and 'writing_pattern'. Returns ------- pd.DataFrame Essay-level pattern profiles with dominance and confidence. """ # Count patterns per essay pattern_counts = ( df .groupby("essay_id")["writing_pattern"] .value_counts() .unstack(fill_value=0) ) essay_summary = pattern_counts.copy() # Dominant pattern essay_summary["dominant_pattern"] = essay_summary.idxmax(axis=1) # Compute dominance metrics pattern_columns = pattern_counts.columns essay_summary["max_count"] = essay_summary[pattern_columns].max(axis=1) essay_summary["total_sentences"] = essay_summary[pattern_columns].sum(axis=1) essay_summary["confidence"] = ( essay_summary["max_count"] / essay_summary["total_sentences"] ) # Dominance strength categorization essay_summary["dominance_strength"] = essay_summary["confidence"].apply( dominance_strength ) return essay_summary.reset_index() def dominance_strength(confidence: float) -> str: """ Categorize dominance strength based on confidence score. """ if confidence >= 0.6: return "Strong" elif confidence >= 0.4: return "Moderate" else: return "Weak / Mixed"