| { | |
| "task": "B", | |
| "train_result": { | |
| "train_runtime": 63.5451, | |
| "train_samples_per_second": 54.922, | |
| "train_steps_per_second": 3.462, | |
| "total_flos": 998021268971520.0, | |
| "train_loss": 1.1348177476362749, | |
| "epoch": 5.0 | |
| }, | |
| "eval_result": { | |
| "eval_loss": 0.8116088509559631, | |
| "eval_accuracy": 0.7535816618911175, | |
| "eval_precision": 0.6650326670418545, | |
| "eval_recall": 0.7535816618911175, | |
| "eval_specificity": NaN, | |
| "eval_f1": 0.6942659022778552, | |
| "eval_auc": NaN, | |
| "eval_pr_auc": NaN, | |
| "eval_mcc": 0.4826800602055998, | |
| "eval_balanced_accuracy": 0.20106890227872526, | |
| "eval_runtime": 2.8095, | |
| "eval_samples_per_second": 248.44, | |
| "eval_steps_per_second": 7.83, | |
| "epoch": 5.0 | |
| }, | |
| "config": { | |
| "task": "B", | |
| "base_model": "microsoft/codebert-base", | |
| "num_labels": 11, | |
| "stylometric_dim": 32, | |
| "num_gpad_layers": 1, | |
| "entropy_alpha": 0.1, | |
| "max_length": 512, | |
| "use_stylometric": true, | |
| "num_epochs": 5, | |
| "train_batch_size": 16, | |
| "eval_batch_size": 32, | |
| "learning_rate": 2e-05, | |
| "weight_decay": 0.01, | |
| "warmup_ratio": 0.1, | |
| "max_grad_norm": 1.0, | |
| "gradient_accumulation_steps": 1, | |
| "use_mixed_precision": true, | |
| "use_gradient_checkpointing": true, | |
| "val_split": 0.2, | |
| "sample_size": null, | |
| "output_dir": "./gpad2_results_taskB_taskb", | |
| "log_interval": 100, | |
| "save_strategy": "steps", | |
| "save_steps": 500, | |
| "eval_strategy": "steps", | |
| "eval_steps": 500, | |
| "early_stopping_patience": 3, | |
| "seed": 42, | |
| "use_wandb": false, | |
| "wandb_project": "gpad2_taskb", | |
| "wandb_run_name": null, | |
| "push_to_hub": true, | |
| "hub_model_id": "ranjan56cse/gpad2-taskb_smpl", | |
| "hub_token": null, | |
| "hub_private_repo": false | |
| } | |
| } |