File size: 1,098 Bytes
1154fa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
  "model_type": "Enhanced Random Forest",
  "model_version": "enhanced",
  "task": "regression",
  "domain": "readability assessment",
  "performance": {
    "test_mae": 0.4219855672279393,
    "test_r2": 0.8460916091361399,
    "cv_mae": 0.41789318171271833,
    "cv_std": 0.011257100569327851,
    "oob_score": 0.8415221144777265
  },
  "features": {
    "total_features": 36,
    "selected_features": 25,
    "selection_method": "N/A",
    "feature_categories": [
      "traditional_readability",
      "age_of_acquisition",
      "syntactic_complexity",
      "lexical_diversity",
      "morphological_features",
      "semantic_features",
      "corpus_indicators"
    ]
  },
  "training_data": {
    "primary": "WeeBit corpus",
    "secondary": "CLEAR corpus",
    "samples": 2500
  },
  "architecture": {
    "algorithm": "Random Forest",
    "n_estimators": 200,
    "feature_selection": true,
    "scaling": "RobustScaler"
  },
  "improvements": [
    "Enhanced feature engineering",
    "Robust scaling and selection",
    "Improved generalization",
    "Multi-dataset validation"
  ]
}