{ "model": "DAIR-Group/HTR-ConvText", "generated_at": "2025-12-17T08:57:42.912579", "artifacts": [ { "name": "iam.pth", "path": "checkpoints\\iam.pth", "exists": true, "size_bytes": 529444565, "size_mb": 504.918, "sha256": "9b2aba60e18d922a615562e23f4079cef4ba4bd559a38d1d6e53de8d7495c122", "modified": "2025-12-16T07:24:08.462236", "created": "2025-12-16T08:20:24.508913", "dataset": "iam", "best_cer": 0.02582327045571924, "best_wer": 0.08769689229459345, "nb_iter": 54000, "args": { "out_dir": "./output", "exp_name": "iam", "seed": 123, "use_wandb": true, "wandb_project": "iam", "print_iter": 100, "eval_iter": 1000, "dataset": "iam", "data_path": "/root/ws/dataset/iam/lines/", "train_data_list": "/root/ws/dataset/iam/train.ln", "val_data_list": "/root/ws/dataset/iam/val.ln", "test_data_list": "/root/ws/dataset/iam/test.ln", "nb_cls": 80, "num_workers": 4, "img_size": [ 512, 64 ], "patch_size": [ 4, 32 ], "train_bs": 128, "accum_steps": 1, "val_bs": 16, "total_iter": 100001, "warm_up_iter": 1000, "max_lr": 0.001, "weight_decay": 0.05, "ema_decay": 0.9999, "alpha": 1.0, "model_type": "ctc", "cos_temp": 8, "proj": 8.0, "attn_mask_ratio": 0.1, "use_masking": false, "mask_ratio": 0.4, "max_span_length": 8, "spacing": 0, "r_rand": 0.6, "r_block": 0.6, "block_span": 4, "r_span": 0.4, "max_span": 8, "dpi_min_factor": 0.5, "dpi_max_factor": 1.5, "perspective_low": 0.0, "perspective_high": 0.4, "elastic_distortion_min_kernel_size": 3, "elastic_distortion_max_kernel_size": 3, "elastic_distortion_max_magnitude": 20, "elastic_distortion_min_alpha": 0.5, "elastic_distortion_max_alpha": 1, "elastic_distortion_min_sigma": 1, "elastic_distortion_max_sigma": 10, "dila_ero_max_kernel": 2, "dila_ero_iter": 1, "jitter_contrast": 0.4, "jitter_brightness": 0.4, "jitter_saturation": 0.4, "jitter_hue": 0.2, "blur_min_kernel": 3, "blur_max_kernel": 5, "blur_min_sigma": 3, "blur_max_sigma": 5, "sharpen_min_alpha": 0, "sharpen_max_alpha": 1, "sharpen_min_strength": 0, "sharpen_max_strength": 1, "zoom_min_h": 0.8, "zoom_max_h": 1, "zoom_min_w": 0.99, "zoom_max_w": 1, "proba": 0.5, "decoder_layers": 6, "decoder_heads": 8, "max_seq_len": 256, "label_smoothing": 0.1, "beam_size": 5, "generation_method": "nucleus", "generation_temperature": 0.7, "repetition_penalty": 1.3, "top_p": 0.9, "tcm_enable": true, "tcm_lambda": 1.0, "ctc_lambda": 0.1, "tcm_sub_len": 5, "tcm_warmup_iters": 0, "resume": null, "load_model": null, "load_encoder_only": false, "strict_loading": true, "save_dir": "./output/iam" }, "load_error": null }, { "name": "hands-vnondb.pth", "path": "checkpoints\\hands-vnondb.pth", "exists": true, "size_bytes": 530033301, "size_mb": 505.479, "sha256": "7208c41f3e58cf02bc228e29aa669836393bb1c08ae33a077e42dfa119645a61", "modified": "2025-12-16T07:25:45.907545", "created": "2025-12-16T08:20:25.225462", "dataset": "hands-vnondb", "best_cer": 0.035116817411641174, "best_wer": 0.0871608236974248, "nb_iter": 65000, "args": { "out_dir": "./output", "exp_name": "vnondb", "seed": 123, "use_wandb": true, "wandb_project": "vnondb", "print_iter": 100, "eval_iter": 1000, "dataset": "vnondb", "data_path": "/root/ws/dataset/vnondb/lines/", "train_data_list": "/root/ws/dataset/vnondb/train.ln", "val_data_list": "/root/ws/dataset/vnondb/valid.ln", "test_data_list": "/root/ws/dataset/vnondb/test.ln", "nb_cls": 162, "num_workers": 4, "img_size": [ 512, 64 ], "patch_size": [ 4, 32 ], "train_bs": 128, "accum_steps": 1, "val_bs": 16, "total_iter": 100001, "warm_up_iter": 1000, "max_lr": 0.001, "weight_decay": 0.05, "ema_decay": 0.9999, "alpha": 1.0, "model_type": "ctc", "cos_temp": 8, "proj": 8.0, "attn_mask_ratio": 0.1, "use_masking": false, "mask_ratio": 0.4, "max_span_length": 8, "spacing": 0, "r_rand": 0.6, "r_block": 0.6, "block_span": 4, "r_span": 0.4, "max_span": 8, "dpi_min_factor": 0.5, "dpi_max_factor": 1.5, "perspective_low": 0.0, "perspective_high": 0.4, "elastic_distortion_min_kernel_size": 3, "elastic_distortion_max_kernel_size": 3, "elastic_distortion_max_magnitude": 20, "elastic_distortion_min_alpha": 0.5, "elastic_distortion_max_alpha": 1, "elastic_distortion_min_sigma": 1, "elastic_distortion_max_sigma": 10, "dila_ero_max_kernel": 2, "dila_ero_iter": 1, "jitter_contrast": 0.4, "jitter_brightness": 0.4, "jitter_saturation": 0.4, "jitter_hue": 0.2, "blur_min_kernel": 3, "blur_max_kernel": 5, "blur_min_sigma": 3, "blur_max_sigma": 5, "sharpen_min_alpha": 0, "sharpen_max_alpha": 1, "sharpen_min_strength": 0, "sharpen_max_strength": 1, "zoom_min_h": 0.8, "zoom_max_h": 1, "zoom_min_w": 0.99, "zoom_max_w": 1, "proba": 0.5, "decoder_layers": 6, "decoder_heads": 8, "max_seq_len": 256, "label_smoothing": 0.1, "beam_size": 5, "generation_method": "nucleus", "generation_temperature": 0.7, "repetition_penalty": 1.3, "top_p": 0.9, "tcm_enable": true, "tcm_lambda": 1.0, "ctc_lambda": 0.1, "tcm_sub_len": 5, "tcm_warmup_iters": 0, "resume": null, "load_model": null, "load_encoder_only": false, "strict_loading": true, "save_dir": "./output/vnondb" }, "load_error": null }, { "name": "read.pth", "path": "checkpoints\\read.pth", "exists": true, "size_bytes": 529516245, "size_mb": 504.986, "sha256": "01d3552f42356eecb1625dfc8692306f49efab1c7a3e116d819c9cc42d317d1f", "modified": "2025-12-16T07:24:15.937379", "created": "2025-12-16T08:20:24.959519", "dataset": "read", "best_cer": 0.039916476472587774, "best_wer": 0.17772215269086358, "nb_iter": 40000, "args": { "out_dir": "./output", "exp_name": "read", "seed": 123, "use_wandb": true, "wandb_project": "read2016", "print_iter": 100, "eval_iter": 1000, "dataset": "read2016", "data_path": "/root/ws/dataset/read/lines/", "train_data_list": "/root/ws/dataset/read/train.ln", "val_data_list": "/root/ws/dataset/read/val.ln", "test_data_list": "/root/ws/dataset/read/test.ln", "nb_cls": 90, "num_workers": 4, "img_size": [ 512, 64 ], "patch_size": [ 4, 32 ], "train_bs": 128, "accum_steps": 1, "val_bs": 16, "total_iter": 100001, "warm_up_iter": 1000, "max_lr": 0.001, "weight_decay": 0.05, "ema_decay": 0.9999, "alpha": 1.0, "model_type": "ctc", "cos_temp": 8, "proj": 8.0, "attn_mask_ratio": 0.1, "use_masking": false, "mask_ratio": 0.4, "max_span_length": 8, "spacing": 0, "r_rand": 0.6, "r_block": 0.6, "block_span": 4, "r_span": 0.4, "max_span": 8, "dpi_min_factor": 0.5, "dpi_max_factor": 1.5, "perspective_low": 0.0, "perspective_high": 0.4, "elastic_distortion_min_kernel_size": 3, "elastic_distortion_max_kernel_size": 3, "elastic_distortion_max_magnitude": 20, "elastic_distortion_min_alpha": 0.5, "elastic_distortion_max_alpha": 1, "elastic_distortion_min_sigma": 1, "elastic_distortion_max_sigma": 10, "dila_ero_max_kernel": 2, "dila_ero_iter": 1, "jitter_contrast": 0.4, "jitter_brightness": 0.4, "jitter_saturation": 0.4, "jitter_hue": 0.2, "blur_min_kernel": 3, "blur_max_kernel": 5, "blur_min_sigma": 3, "blur_max_sigma": 5, "sharpen_min_alpha": 0, "sharpen_max_alpha": 1, "sharpen_min_strength": 0, "sharpen_max_strength": 1, "zoom_min_h": 0.8, "zoom_max_h": 1, "zoom_min_w": 0.99, "zoom_max_w": 1, "proba": 0.5, "decoder_layers": 6, "decoder_heads": 8, "max_seq_len": 256, "label_smoothing": 0.1, "beam_size": 5, "generation_method": "nucleus", "generation_temperature": 0.7, "repetition_penalty": 1.3, "top_p": 0.9, "tcm_enable": true, "tcm_lambda": 1.0, "ctc_lambda": 0.1, "tcm_sub_len": 5, "tcm_warmup_iters": 0, "resume": null, "load_model": null, "load_encoder_only": false, "strict_loading": true, "save_dir": "./output/read" }, "load_error": null }, { "name": "lam.pth", "path": "checkpoints\\lam.pth", "exists": true, "size_bytes": 529523413, "size_mb": 504.993, "sha256": "ac1117ff37ba4ffc4282c5ba31574472be63cb2344f6b59e16f264df02d24973", "modified": "2025-12-17T08:55:03.999301", "created": "2025-12-17T08:54:55.089015", "dataset": "lam", "best_cer": 0.024121379310344828, "best_wer": 0.06445365282406991, "nb_iter": 62000, "args": { "out_dir": "./output", "exp_name": "lam", "seed": 123, "use_wandb": true, "wandb_project": "lam", "print_iter": 100, "eval_iter": 1000, "dataset": "lam", "data_path": "/root/ws/dataset/lam/lines/", "train_data_list": "/root/ws/dataset/lam/train.ln", "val_data_list": "/root/ws/dataset/lam/val.ln", "test_data_list": "/root/ws/dataset/lam/test.ln", "nb_cls": 91, "num_workers": 4, "img_size": [ 512, 64 ], "patch_size": [ 4, 32 ], "train_bs": 96, "accum_steps": 1, "val_bs": 16, "total_iter": 100001, "warm_up_iter": 1000, "max_lr": 0.001, "weight_decay": 0.05, "ema_decay": 0.9999, "alpha": 1.0, "model_type": "ctc", "cos_temp": 8, "proj": 8.0, "attn_mask_ratio": 0.1, "use_masking": false, "mask_ratio": 0.4, "max_span_length": 8, "spacing": 0, "r_rand": 0.6, "r_block": 0.6, "block_span": 4, "r_span": 0.4, "max_span": 8, "dpi_min_factor": 0.5, "dpi_max_factor": 1.5, "perspective_low": 0.0, "perspective_high": 0.4, "elastic_distortion_min_kernel_size": 3, "elastic_distortion_max_kernel_size": 3, "elastic_distortion_max_magnitude": 20, "elastic_distortion_min_alpha": 0.5, "elastic_distortion_max_alpha": 1, "elastic_distortion_min_sigma": 1, "elastic_distortion_max_sigma": 10, "dila_ero_max_kernel": 2, "dila_ero_iter": 1, "jitter_contrast": 0.4, "jitter_brightness": 0.4, "jitter_saturation": 0.4, "jitter_hue": 0.2, "blur_min_kernel": 3, "blur_max_kernel": 5, "blur_min_sigma": 3, "blur_max_sigma": 5, "sharpen_min_alpha": 0, "sharpen_max_alpha": 1, "sharpen_min_strength": 0, "sharpen_max_strength": 1, "zoom_min_h": 0.8, "zoom_max_h": 1, "zoom_min_w": 0.99, "zoom_max_w": 1, "proba": 0.5, "decoder_layers": 6, "decoder_heads": 8, "max_seq_len": 256, "label_smoothing": 0.1, "beam_size": 5, "generation_method": "nucleus", "generation_temperature": 0.7, "repetition_penalty": 1.3, "top_p": 0.9, "tcm_enable": true, "tcm_lambda": 1.0, "ctc_lambda": 0.1, "tcm_sub_len": 5, "tcm_warmup_iters": 0, "resume": null, "load_model": null, "load_encoder_only": false, "strict_loading": true, "save_dir": "./output/lam" }, "load_error": null } ] }