| { | |
| "model_type": "Enhanced Random Forest", | |
| "model_version": "enhanced", | |
| "task": "regression", | |
| "domain": "readability assessment", | |
| "performance": { | |
| "test_mae": 0.4219855672279393, | |
| "test_r2": 0.8460916091361399, | |
| "cv_mae": 0.41789318171271833, | |
| "cv_std": 0.011257100569327851, | |
| "oob_score": 0.8415221144777265 | |
| }, | |
| "features": { | |
| "total_features": 36, | |
| "selected_features": 25, | |
| "selection_method": "N/A", | |
| "feature_categories": [ | |
| "traditional_readability", | |
| "age_of_acquisition", | |
| "syntactic_complexity", | |
| "lexical_diversity", | |
| "morphological_features", | |
| "semantic_features", | |
| "corpus_indicators" | |
| ] | |
| }, | |
| "training_data": { | |
| "primary": "WeeBit corpus", | |
| "secondary": "CLEAR corpus", | |
| "samples": 2500 | |
| }, | |
| "architecture": { | |
| "algorithm": "Random Forest", | |
| "n_estimators": 200, | |
| "feature_selection": true, | |
| "scaling": "RobustScaler" | |
| }, | |
| "improvements": [ | |
| "Enhanced feature engineering", | |
| "Robust scaling and selection", | |
| "Improved generalization", | |
| "Multi-dataset validation" | |
| ] | |
| } |