{ "seed": 42, "data": { "train_files": [ "/kaggle/working/data/processed/processed.txt" ], "val_files": null, "test_files": null, "batch_size": 256, "num_workers": 4, "cache_dir": ".cache/", "cache_format": "npz", "max_length": 512, "val_split": 0.01, "test_split": 0, "diacritic_keep_probs": [ 0.25, 0.0, 0.0 ] }, "modeling_config": { "architecture": { "name": "bigru", "embedding_dim": 128, "hidden_dim": 128, "num_layers": 3, "dropout": 0.3 }, "loss": { "name": "cross_entropy", "label_smoothing": 0.1, "ignore_index": 0 }, "optimizer": { "name": "adamw", "lr": 0.0007, "weight_decay": 0.01 }, "scheduler": { "name": "cosine", "t_max": 50 } }, "trainer": { "max_epochs": 40, "accelerator": "auto", "devices": 1, "precision": "32", "log_every_n_steps": 100, "gradient_clip_val": 1.0, "loggers": [ { "logger_name": "tensorboard", "save_dir": "lightning_logs/" }, { "logger_name": "wandb", "project": "Arabic-Diacritizer", "log_model": "all", "name": "exp/M-bigru-hinted_20251001-070659" } ], "callbacks": [ { "name": "model_checkpoint", "monitor": "val_der", "mode": "min", "save_top_k": 3, "filename": "diacritizer-epoch={epoch:02d}-val-der={val_der:.4f}" }, { "name": "early_stopping", "monitor": "val_loss", "mode": "min", "patience": 10 }, { "name": "lr_monitor", "logging_interval": "step" } ] }, "export": { "output_dir": "artifacts/", "dummy_input_length": 50, "onnx_opset_version": 15, "use_torch_dynamo": false }, "git": { "branch": "exp/M-bigru-hinted", "commit_hash": "b9a4ec5cde7dddbe75df258765143566e0452b5c" } }