tinykavi's picture
Add writing_pattern_classifier package for live demo
5548ff6
"""
End-to-end pipeline for Sinhala dyslexic writing pattern analysis.
This module orchestrates sentence-level feature extraction,
pattern inference, and essay-level profiling.
"""
import pandas as pd
from .feature_extraction import extract_surface_features
from .pattern_rules import infer_pattern
from .essay_profile import assign_essay_ids, profile_essays
def run_pattern_analysis(
df: pd.DataFrame,
essay_size: int = 5
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
Run the complete dyslexic writing pattern analysis pipeline.
Parameters
----------
df : pd.DataFrame
Input DataFrame containing:
- 'clean_sentence'
- 'dyslexic_sentence'
essay_size : int
Number of sentences per essay abstraction.
Returns
-------
tuple (sentence_df, essay_df)
sentence_df : pd.DataFrame
Sentence-level features and inferred patterns.
essay_df : pd.DataFrame
Essay-level dominance profiles.
"""
df = df.copy()
# --- Sentence-level feature extraction ---
surface_features = df.apply(
lambda row: extract_surface_features(
row["clean_sentence"],
row["dyslexic_sentence"]
),
axis=1
)
feature_df = pd.concat(
[df.reset_index(drop=True), surface_features.apply(pd.Series)],
axis=1
)
# --- Sentence-level pattern inference ---
feature_df["writing_pattern"] = feature_df.apply(
lambda row: infer_pattern(row),
axis=1
)
# --- Essay-level profiling ---
feature_df = assign_essay_ids(feature_df, essay_size=essay_size)
essay_df = profile_essays(feature_df)
return feature_df, essay_df