""" End-to-end pipeline for Sinhala dyslexic writing pattern analysis. This module orchestrates sentence-level feature extraction, pattern inference, and essay-level profiling. """ import pandas as pd from .feature_extraction import extract_surface_features from .pattern_rules import infer_pattern from .essay_profile import assign_essay_ids, profile_essays def run_pattern_analysis( df: pd.DataFrame, essay_size: int = 5 ) -> tuple[pd.DataFrame, pd.DataFrame]: """ Run the complete dyslexic writing pattern analysis pipeline. Parameters ---------- df : pd.DataFrame Input DataFrame containing: - 'clean_sentence' - 'dyslexic_sentence' essay_size : int Number of sentences per essay abstraction. Returns ------- tuple (sentence_df, essay_df) sentence_df : pd.DataFrame Sentence-level features and inferred patterns. essay_df : pd.DataFrame Essay-level dominance profiles. """ df = df.copy() # --- Sentence-level feature extraction --- surface_features = df.apply( lambda row: extract_surface_features( row["clean_sentence"], row["dyslexic_sentence"] ), axis=1 ) feature_df = pd.concat( [df.reset_index(drop=True), surface_features.apply(pd.Series)], axis=1 ) # --- Sentence-level pattern inference --- feature_df["writing_pattern"] = feature_df.apply( lambda row: infer_pattern(row), axis=1 ) # --- Essay-level profiling --- feature_df = assign_essay_ids(feature_df, essay_size=essay_size) essay_df = profile_essays(feature_df) return feature_df, essay_df