| { | |
| "experiment": "exp_3_higher_capacity_lora", | |
| "model_name": "deepseek-ai/Janus-Pro-1B", | |
| "train_size": 1368, | |
| "val_size": 152, | |
| "prompt": "Read the handwritten text in this image and transcribe it exactly.", | |
| "hyperparameters": { | |
| "name": "exp_3_higher_capacity_lora", | |
| "num_train_epochs": 2, | |
| "learning_rate": 0.0002, | |
| "gradient_accumulation_steps": 8, | |
| "lora_r": 32, | |
| "lora_alpha": 64, | |
| "lora_dropout": 0.1, | |
| "per_device_train_batch_size": 1, | |
| "per_device_eval_batch_size": 1, | |
| "weight_decay": 0.01, | |
| "warmup_ratio": 0.03 | |
| }, | |
| "eval_metrics": { | |
| "eval_loss": 3.1364641189575195, | |
| "eval_runtime": 190.1631, | |
| "eval_samples_per_second": 0.799, | |
| "eval_steps_per_second": 0.799, | |
| "epoch": 2.0 | |
| }, | |
| "log_summary": { | |
| "best_eval_loss": 3.1364641189575195, | |
| "last_train_loss": 3.133, | |
| "last_learning_rate": 1.2084592145015106e-06 | |
| } | |
| } |