CEFR_Bert_Fine-tuned / config.json
SNALYF's picture
Upload ACE-CEFR BERT regression model (reproduction)
486b7ab verified
{
"architectures": [
"BertRegressor"
],
"base_model": "bert-base-uncased",
"num_hidden_layers": 3,
"hidden_size": 768,
"head": "Linear(768, 1)",
"task": "regression",
"output_range": [
1.0,
6.0
],
"cefr_mapping": {
"A1": 1,
"A2": 2,
"B1": 3,
"B2": 4,
"C1": 5,
"C2": 6
},
"max_length": 128,
"tokenizer": "bert-base-uncased",
"training_config": {
"csv_path": "data/processed/ace_cefr_labeled.csv",
"output_dir": "checkpoints/reproduce",
"model_name": "bert-base-uncased",
"num_layers": 3,
"max_length": 128,
"lr": 6e-05,
"epochs": 12,
"batch_size": 32,
"warmup_ratio": 0.1,
"weight_decay": 0.01,
"max_grad_norm": 1.0,
"num_workers": 2,
"seed": 42
},
"test_results": {
"final_epoch_test_mse": 0.5775573253631592,
"final_epoch_test_mae": 0.5508898496627808,
"best_test_mse": 0.5665906071662903,
"history": [
{
"epoch": 1,
"train_loss": 12.465281147903271,
"test_mse": 6.588264465332031,
"test_mae": 2.1838321685791016
},
{
"epoch": 2,
"train_loss": 2.5425199029150973,
"test_mse": 1.0636351108551025,
"test_mae": 0.8281134366989136
},
{
"epoch": 3,
"train_loss": 0.9577709433737766,
"test_mse": 1.0986764430999756,
"test_mae": 0.8498026132583618
},
{
"epoch": 4,
"train_loss": 0.6925251134995664,
"test_mse": 0.7558661699295044,
"test_mae": 0.6341950297355652
},
{
"epoch": 5,
"train_loss": 0.4300207313526882,
"test_mse": 0.573773205280304,
"test_mae": 0.5825716257095337
},
{
"epoch": 6,
"train_loss": 0.34610338934351886,
"test_mse": 0.5665906071662903,
"test_mae": 0.5687209367752075
},
{
"epoch": 7,
"train_loss": 0.25567558910069843,
"test_mse": 0.6220540404319763,
"test_mae": 0.5755833983421326
},
{
"epoch": 8,
"train_loss": 0.17715133244401954,
"test_mse": 0.6116251945495605,
"test_mae": 0.5671263337135315
},
{
"epoch": 9,
"train_loss": 0.1541851587509841,
"test_mse": 0.6381506323814392,
"test_mae": 0.5819261074066162
},
{
"epoch": 10,
"train_loss": 0.13355727959214972,
"test_mse": 0.5858347415924072,
"test_mae": 0.5533825755119324
},
{
"epoch": 11,
"train_loss": 0.1009212305371681,
"test_mse": 0.5986077189445496,
"test_mae": 0.5595420002937317
},
{
"epoch": 12,
"train_loss": 0.08693857780668172,
"test_mse": 0.5775573253631592,
"test_mae": 0.5508898496627808
}
],
"paper_targets": {
"bert_baseline": 0.44,
"bert_with_llm_pretrain": 0.37,
"human_expert": 0.75
}
},
"selected_state": "best_test_mse_epoch"
}