| """ |
| Essay-level dyslexic writing pattern profiling. |
| |
| This module aggregates sentence-level dyslexic writing patterns |
| into dominance-based essay profiles. |
| """ |
|
|
| import pandas as pd |
|
|
|
|
| def assign_essay_ids(df: pd.DataFrame, essay_size: int = 5) -> pd.DataFrame: |
| """ |
| Assign essay IDs to sentence-level data using fixed-size grouping. |
| |
| Parameters |
| ---------- |
| df : pd.DataFrame |
| DataFrame containing sentence-level patterns. |
| essay_size : int |
| Number of sentences per essay abstraction. |
| |
| Returns |
| ------- |
| pd.DataFrame |
| DataFrame with an added 'essay_id' column. |
| """ |
| df = df.copy() |
| df["essay_id"] = df.index // essay_size |
| return df |
|
|
|
|
| def profile_essays(df: pd.DataFrame) -> pd.DataFrame: |
| """ |
| Aggregate sentence-level patterns into essay-level dominance profiles. |
| |
| Parameters |
| ---------- |
| df : pd.DataFrame |
| DataFrame containing 'essay_id' and 'writing_pattern'. |
| |
| Returns |
| ------- |
| pd.DataFrame |
| Essay-level pattern profiles with dominance and confidence. |
| """ |
|
|
| |
| pattern_counts = ( |
| df |
| .groupby("essay_id")["writing_pattern"] |
| .value_counts() |
| .unstack(fill_value=0) |
| ) |
|
|
| essay_summary = pattern_counts.copy() |
|
|
| |
| essay_summary["dominant_pattern"] = essay_summary.idxmax(axis=1) |
|
|
| |
| pattern_columns = pattern_counts.columns |
| essay_summary["max_count"] = essay_summary[pattern_columns].max(axis=1) |
| essay_summary["total_sentences"] = essay_summary[pattern_columns].sum(axis=1) |
|
|
| essay_summary["confidence"] = ( |
| essay_summary["max_count"] / essay_summary["total_sentences"] |
| ) |
|
|
| |
| essay_summary["dominance_strength"] = essay_summary["confidence"].apply( |
| dominance_strength |
| ) |
|
|
| return essay_summary.reset_index() |
|
|
|
|
| def dominance_strength(confidence: float) -> str: |
| """ |
| Categorize dominance strength based on confidence score. |
| """ |
| if confidence >= 0.6: |
| return "Strong" |
| elif confidence >= 0.4: |
| return "Moderate" |
| else: |
| return "Weak / Mixed" |
|
|