| """ |
| End-to-end pipeline for Sinhala dyslexic writing pattern analysis. |
| |
| This module orchestrates sentence-level feature extraction, |
| pattern inference, and essay-level profiling. |
| """ |
|
|
| import pandas as pd |
|
|
| from .feature_extraction import extract_surface_features |
| from .pattern_rules import infer_pattern |
| from .essay_profile import assign_essay_ids, profile_essays |
|
|
|
|
| def run_pattern_analysis( |
| df: pd.DataFrame, |
| essay_size: int = 5 |
| ) -> tuple[pd.DataFrame, pd.DataFrame]: |
| """ |
| Run the complete dyslexic writing pattern analysis pipeline. |
| |
| Parameters |
| ---------- |
| df : pd.DataFrame |
| Input DataFrame containing: |
| - 'clean_sentence' |
| - 'dyslexic_sentence' |
| essay_size : int |
| Number of sentences per essay abstraction. |
| |
| Returns |
| ------- |
| tuple (sentence_df, essay_df) |
| sentence_df : pd.DataFrame |
| Sentence-level features and inferred patterns. |
| essay_df : pd.DataFrame |
| Essay-level dominance profiles. |
| """ |
|
|
| df = df.copy() |
|
|
| |
| surface_features = df.apply( |
| lambda row: extract_surface_features( |
| row["clean_sentence"], |
| row["dyslexic_sentence"] |
| ), |
| axis=1 |
| ) |
|
|
| feature_df = pd.concat( |
| [df.reset_index(drop=True), surface_features.apply(pd.Series)], |
| axis=1 |
| ) |
|
|
| |
| feature_df["writing_pattern"] = feature_df.apply( |
| lambda row: infer_pattern(row), |
| axis=1 |
| ) |
|
|
| |
| feature_df = assign_essay_ids(feature_df, essay_size=essay_size) |
| essay_df = profile_essays(feature_df) |
|
|
| return feature_df, essay_df |
|
|