tinykavi's picture
Add writing_pattern_classifier package for live demo
5548ff6
"""
Essay-level dyslexic writing pattern profiling.
This module aggregates sentence-level dyslexic writing patterns
into dominance-based essay profiles.
"""
import pandas as pd
def assign_essay_ids(df: pd.DataFrame, essay_size: int = 5) -> pd.DataFrame:
"""
Assign essay IDs to sentence-level data using fixed-size grouping.
Parameters
----------
df : pd.DataFrame
DataFrame containing sentence-level patterns.
essay_size : int
Number of sentences per essay abstraction.
Returns
-------
pd.DataFrame
DataFrame with an added 'essay_id' column.
"""
df = df.copy()
df["essay_id"] = df.index // essay_size
return df
def profile_essays(df: pd.DataFrame) -> pd.DataFrame:
"""
Aggregate sentence-level patterns into essay-level dominance profiles.
Parameters
----------
df : pd.DataFrame
DataFrame containing 'essay_id' and 'writing_pattern'.
Returns
-------
pd.DataFrame
Essay-level pattern profiles with dominance and confidence.
"""
# Count patterns per essay
pattern_counts = (
df
.groupby("essay_id")["writing_pattern"]
.value_counts()
.unstack(fill_value=0)
)
essay_summary = pattern_counts.copy()
# Dominant pattern
essay_summary["dominant_pattern"] = essay_summary.idxmax(axis=1)
# Compute dominance metrics
pattern_columns = pattern_counts.columns
essay_summary["max_count"] = essay_summary[pattern_columns].max(axis=1)
essay_summary["total_sentences"] = essay_summary[pattern_columns].sum(axis=1)
essay_summary["confidence"] = (
essay_summary["max_count"] / essay_summary["total_sentences"]
)
# Dominance strength categorization
essay_summary["dominance_strength"] = essay_summary["confidence"].apply(
dominance_strength
)
return essay_summary.reset_index()
def dominance_strength(confidence: float) -> str:
"""
Categorize dominance strength based on confidence score.
"""
if confidence >= 0.6:
return "Strong"
elif confidence >= 0.4:
return "Moderate"
else:
return "Weak / Mixed"