HTR-ConvText / config.json
k0ry's picture
Update config.json
3892aba verified
{
"model": "DAIR-Group/HTR-ConvText",
"generated_at": "2025-12-17T08:57:42.912579",
"artifacts": [
{
"name": "iam.pth",
"path": "checkpoints\\iam.pth",
"exists": true,
"size_bytes": 529444565,
"size_mb": 504.918,
"sha256": "9b2aba60e18d922a615562e23f4079cef4ba4bd559a38d1d6e53de8d7495c122",
"modified": "2025-12-16T07:24:08.462236",
"created": "2025-12-16T08:20:24.508913",
"dataset": "iam",
"best_cer": 0.02582327045571924,
"best_wer": 0.08769689229459345,
"nb_iter": 54000,
"args": {
"out_dir": "./output",
"exp_name": "iam",
"seed": 123,
"use_wandb": true,
"wandb_project": "iam",
"print_iter": 100,
"eval_iter": 1000,
"dataset": "iam",
"data_path": "/root/ws/dataset/iam/lines/",
"train_data_list": "/root/ws/dataset/iam/train.ln",
"val_data_list": "/root/ws/dataset/iam/val.ln",
"test_data_list": "/root/ws/dataset/iam/test.ln",
"nb_cls": 80,
"num_workers": 4,
"img_size": [
512,
64
],
"patch_size": [
4,
32
],
"train_bs": 128,
"accum_steps": 1,
"val_bs": 16,
"total_iter": 100001,
"warm_up_iter": 1000,
"max_lr": 0.001,
"weight_decay": 0.05,
"ema_decay": 0.9999,
"alpha": 1.0,
"model_type": "ctc",
"cos_temp": 8,
"proj": 8.0,
"attn_mask_ratio": 0.1,
"use_masking": false,
"mask_ratio": 0.4,
"max_span_length": 8,
"spacing": 0,
"r_rand": 0.6,
"r_block": 0.6,
"block_span": 4,
"r_span": 0.4,
"max_span": 8,
"dpi_min_factor": 0.5,
"dpi_max_factor": 1.5,
"perspective_low": 0.0,
"perspective_high": 0.4,
"elastic_distortion_min_kernel_size": 3,
"elastic_distortion_max_kernel_size": 3,
"elastic_distortion_max_magnitude": 20,
"elastic_distortion_min_alpha": 0.5,
"elastic_distortion_max_alpha": 1,
"elastic_distortion_min_sigma": 1,
"elastic_distortion_max_sigma": 10,
"dila_ero_max_kernel": 2,
"dila_ero_iter": 1,
"jitter_contrast": 0.4,
"jitter_brightness": 0.4,
"jitter_saturation": 0.4,
"jitter_hue": 0.2,
"blur_min_kernel": 3,
"blur_max_kernel": 5,
"blur_min_sigma": 3,
"blur_max_sigma": 5,
"sharpen_min_alpha": 0,
"sharpen_max_alpha": 1,
"sharpen_min_strength": 0,
"sharpen_max_strength": 1,
"zoom_min_h": 0.8,
"zoom_max_h": 1,
"zoom_min_w": 0.99,
"zoom_max_w": 1,
"proba": 0.5,
"decoder_layers": 6,
"decoder_heads": 8,
"max_seq_len": 256,
"label_smoothing": 0.1,
"beam_size": 5,
"generation_method": "nucleus",
"generation_temperature": 0.7,
"repetition_penalty": 1.3,
"top_p": 0.9,
"tcm_enable": true,
"tcm_lambda": 1.0,
"ctc_lambda": 0.1,
"tcm_sub_len": 5,
"tcm_warmup_iters": 0,
"resume": null,
"load_model": null,
"load_encoder_only": false,
"strict_loading": true,
"save_dir": "./output/iam"
},
"load_error": null
},
{
"name": "hands-vnondb.pth",
"path": "checkpoints\\hands-vnondb.pth",
"exists": true,
"size_bytes": 530033301,
"size_mb": 505.479,
"sha256": "7208c41f3e58cf02bc228e29aa669836393bb1c08ae33a077e42dfa119645a61",
"modified": "2025-12-16T07:25:45.907545",
"created": "2025-12-16T08:20:25.225462",
"dataset": "hands-vnondb",
"best_cer": 0.035116817411641174,
"best_wer": 0.0871608236974248,
"nb_iter": 65000,
"args": {
"out_dir": "./output",
"exp_name": "vnondb",
"seed": 123,
"use_wandb": true,
"wandb_project": "vnondb",
"print_iter": 100,
"eval_iter": 1000,
"dataset": "vnondb",
"data_path": "/root/ws/dataset/vnondb/lines/",
"train_data_list": "/root/ws/dataset/vnondb/train.ln",
"val_data_list": "/root/ws/dataset/vnondb/valid.ln",
"test_data_list": "/root/ws/dataset/vnondb/test.ln",
"nb_cls": 162,
"num_workers": 4,
"img_size": [
512,
64
],
"patch_size": [
4,
32
],
"train_bs": 128,
"accum_steps": 1,
"val_bs": 16,
"total_iter": 100001,
"warm_up_iter": 1000,
"max_lr": 0.001,
"weight_decay": 0.05,
"ema_decay": 0.9999,
"alpha": 1.0,
"model_type": "ctc",
"cos_temp": 8,
"proj": 8.0,
"attn_mask_ratio": 0.1,
"use_masking": false,
"mask_ratio": 0.4,
"max_span_length": 8,
"spacing": 0,
"r_rand": 0.6,
"r_block": 0.6,
"block_span": 4,
"r_span": 0.4,
"max_span": 8,
"dpi_min_factor": 0.5,
"dpi_max_factor": 1.5,
"perspective_low": 0.0,
"perspective_high": 0.4,
"elastic_distortion_min_kernel_size": 3,
"elastic_distortion_max_kernel_size": 3,
"elastic_distortion_max_magnitude": 20,
"elastic_distortion_min_alpha": 0.5,
"elastic_distortion_max_alpha": 1,
"elastic_distortion_min_sigma": 1,
"elastic_distortion_max_sigma": 10,
"dila_ero_max_kernel": 2,
"dila_ero_iter": 1,
"jitter_contrast": 0.4,
"jitter_brightness": 0.4,
"jitter_saturation": 0.4,
"jitter_hue": 0.2,
"blur_min_kernel": 3,
"blur_max_kernel": 5,
"blur_min_sigma": 3,
"blur_max_sigma": 5,
"sharpen_min_alpha": 0,
"sharpen_max_alpha": 1,
"sharpen_min_strength": 0,
"sharpen_max_strength": 1,
"zoom_min_h": 0.8,
"zoom_max_h": 1,
"zoom_min_w": 0.99,
"zoom_max_w": 1,
"proba": 0.5,
"decoder_layers": 6,
"decoder_heads": 8,
"max_seq_len": 256,
"label_smoothing": 0.1,
"beam_size": 5,
"generation_method": "nucleus",
"generation_temperature": 0.7,
"repetition_penalty": 1.3,
"top_p": 0.9,
"tcm_enable": true,
"tcm_lambda": 1.0,
"ctc_lambda": 0.1,
"tcm_sub_len": 5,
"tcm_warmup_iters": 0,
"resume": null,
"load_model": null,
"load_encoder_only": false,
"strict_loading": true,
"save_dir": "./output/vnondb"
},
"load_error": null
},
{
"name": "read.pth",
"path": "checkpoints\\read.pth",
"exists": true,
"size_bytes": 529516245,
"size_mb": 504.986,
"sha256": "01d3552f42356eecb1625dfc8692306f49efab1c7a3e116d819c9cc42d317d1f",
"modified": "2025-12-16T07:24:15.937379",
"created": "2025-12-16T08:20:24.959519",
"dataset": "read",
"best_cer": 0.039916476472587774,
"best_wer": 0.17772215269086358,
"nb_iter": 40000,
"args": {
"out_dir": "./output",
"exp_name": "read",
"seed": 123,
"use_wandb": true,
"wandb_project": "read2016",
"print_iter": 100,
"eval_iter": 1000,
"dataset": "read2016",
"data_path": "/root/ws/dataset/read/lines/",
"train_data_list": "/root/ws/dataset/read/train.ln",
"val_data_list": "/root/ws/dataset/read/val.ln",
"test_data_list": "/root/ws/dataset/read/test.ln",
"nb_cls": 90,
"num_workers": 4,
"img_size": [
512,
64
],
"patch_size": [
4,
32
],
"train_bs": 128,
"accum_steps": 1,
"val_bs": 16,
"total_iter": 100001,
"warm_up_iter": 1000,
"max_lr": 0.001,
"weight_decay": 0.05,
"ema_decay": 0.9999,
"alpha": 1.0,
"model_type": "ctc",
"cos_temp": 8,
"proj": 8.0,
"attn_mask_ratio": 0.1,
"use_masking": false,
"mask_ratio": 0.4,
"max_span_length": 8,
"spacing": 0,
"r_rand": 0.6,
"r_block": 0.6,
"block_span": 4,
"r_span": 0.4,
"max_span": 8,
"dpi_min_factor": 0.5,
"dpi_max_factor": 1.5,
"perspective_low": 0.0,
"perspective_high": 0.4,
"elastic_distortion_min_kernel_size": 3,
"elastic_distortion_max_kernel_size": 3,
"elastic_distortion_max_magnitude": 20,
"elastic_distortion_min_alpha": 0.5,
"elastic_distortion_max_alpha": 1,
"elastic_distortion_min_sigma": 1,
"elastic_distortion_max_sigma": 10,
"dila_ero_max_kernel": 2,
"dila_ero_iter": 1,
"jitter_contrast": 0.4,
"jitter_brightness": 0.4,
"jitter_saturation": 0.4,
"jitter_hue": 0.2,
"blur_min_kernel": 3,
"blur_max_kernel": 5,
"blur_min_sigma": 3,
"blur_max_sigma": 5,
"sharpen_min_alpha": 0,
"sharpen_max_alpha": 1,
"sharpen_min_strength": 0,
"sharpen_max_strength": 1,
"zoom_min_h": 0.8,
"zoom_max_h": 1,
"zoom_min_w": 0.99,
"zoom_max_w": 1,
"proba": 0.5,
"decoder_layers": 6,
"decoder_heads": 8,
"max_seq_len": 256,
"label_smoothing": 0.1,
"beam_size": 5,
"generation_method": "nucleus",
"generation_temperature": 0.7,
"repetition_penalty": 1.3,
"top_p": 0.9,
"tcm_enable": true,
"tcm_lambda": 1.0,
"ctc_lambda": 0.1,
"tcm_sub_len": 5,
"tcm_warmup_iters": 0,
"resume": null,
"load_model": null,
"load_encoder_only": false,
"strict_loading": true,
"save_dir": "./output/read"
},
"load_error": null
},
{
"name": "lam.pth",
"path": "checkpoints\\lam.pth",
"exists": true,
"size_bytes": 529523413,
"size_mb": 504.993,
"sha256": "ac1117ff37ba4ffc4282c5ba31574472be63cb2344f6b59e16f264df02d24973",
"modified": "2025-12-17T08:55:03.999301",
"created": "2025-12-17T08:54:55.089015",
"dataset": "lam",
"best_cer": 0.024121379310344828,
"best_wer": 0.06445365282406991,
"nb_iter": 62000,
"args": {
"out_dir": "./output",
"exp_name": "lam",
"seed": 123,
"use_wandb": true,
"wandb_project": "lam",
"print_iter": 100,
"eval_iter": 1000,
"dataset": "lam",
"data_path": "/root/ws/dataset/lam/lines/",
"train_data_list": "/root/ws/dataset/lam/train.ln",
"val_data_list": "/root/ws/dataset/lam/val.ln",
"test_data_list": "/root/ws/dataset/lam/test.ln",
"nb_cls": 91,
"num_workers": 4,
"img_size": [
512,
64
],
"patch_size": [
4,
32
],
"train_bs": 96,
"accum_steps": 1,
"val_bs": 16,
"total_iter": 100001,
"warm_up_iter": 1000,
"max_lr": 0.001,
"weight_decay": 0.05,
"ema_decay": 0.9999,
"alpha": 1.0,
"model_type": "ctc",
"cos_temp": 8,
"proj": 8.0,
"attn_mask_ratio": 0.1,
"use_masking": false,
"mask_ratio": 0.4,
"max_span_length": 8,
"spacing": 0,
"r_rand": 0.6,
"r_block": 0.6,
"block_span": 4,
"r_span": 0.4,
"max_span": 8,
"dpi_min_factor": 0.5,
"dpi_max_factor": 1.5,
"perspective_low": 0.0,
"perspective_high": 0.4,
"elastic_distortion_min_kernel_size": 3,
"elastic_distortion_max_kernel_size": 3,
"elastic_distortion_max_magnitude": 20,
"elastic_distortion_min_alpha": 0.5,
"elastic_distortion_max_alpha": 1,
"elastic_distortion_min_sigma": 1,
"elastic_distortion_max_sigma": 10,
"dila_ero_max_kernel": 2,
"dila_ero_iter": 1,
"jitter_contrast": 0.4,
"jitter_brightness": 0.4,
"jitter_saturation": 0.4,
"jitter_hue": 0.2,
"blur_min_kernel": 3,
"blur_max_kernel": 5,
"blur_min_sigma": 3,
"blur_max_sigma": 5,
"sharpen_min_alpha": 0,
"sharpen_max_alpha": 1,
"sharpen_min_strength": 0,
"sharpen_max_strength": 1,
"zoom_min_h": 0.8,
"zoom_max_h": 1,
"zoom_min_w": 0.99,
"zoom_max_w": 1,
"proba": 0.5,
"decoder_layers": 6,
"decoder_heads": 8,
"max_seq_len": 256,
"label_smoothing": 0.1,
"beam_size": 5,
"generation_method": "nucleus",
"generation_temperature": 0.7,
"repetition_penalty": 1.3,
"top_p": 0.9,
"tcm_enable": true,
"tcm_lambda": 1.0,
"ctc_lambda": 0.1,
"tcm_sub_len": 5,
"tcm_warmup_iters": 0,
"resume": null,
"load_model": null,
"load_encoder_only": false,
"strict_loading": true,
"save_dir": "./output/lam"
},
"load_error": null
}
]
}