| { |
| "configuration": { |
| "base_model_name": "bert-base-uncased", |
| "max_length": 512, |
| "batch_size": 4, |
| "gradient_accumulation_steps": 4, |
| "effective_batch_size": 16, |
| "learning_rate": 0.0002, |
| "num_epochs": 4, |
| "warmup_steps": 500, |
| "weight_decay": 0.01, |
| "dataset_path": "/content/shakespeare_training_data.json", |
| "used_all_data": true, |
| "lora_config": { |
| "r": 16, |
| "alpha": 32, |
| "dropout": 0.1, |
| "target_modules": [ |
| "query", |
| "key", |
| "value", |
| "dense" |
| ] |
| } |
| }, |
| "dataset_statistics": { |
| "total_examples": 6032, |
| "train_examples": 5127, |
| "val_examples": 603, |
| "test_examples": 302, |
| "question_types": { |
| "factual": 4425, |
| "quote": 1520, |
| "analysis": 75, |
| "summary": 12 |
| } |
| }, |
| "model_statistics": { |
| "base_model": "bert-base-uncased", |
| "total_parameters": 111548932, |
| "trainable_parameters": 2655746, |
| "trainable_percentage": 2.3807901630111528, |
| "parameter_efficiency": "2,655,746 trainable out of 111,548,932 total", |
| "base_model_path": "/content/shakespeare-bert-base-model", |
| "lora_adapter_path": "/content/shakespeare-bert-qa-lora" |
| }, |
| "training_statistics": { |
| "start_time": "2025-06-04T19:18:02.140488", |
| "end_time": "2025-06-04T19:30:15.267651", |
| "training_time_seconds": 733.127163, |
| "training_time_formatted": "0:12:13.127163", |
| "final_train_loss": 0.7214135336356, |
| "total_steps": 1284, |
| "epochs_completed": 4, |
| "examples_trained": 5127, |
| "lora_config": { |
| "r": 16, |
| "alpha": 32, |
| "dropout": 0.1, |
| "target_modules": [ |
| "query", |
| "key", |
| "value", |
| "dense" |
| ] |
| }, |
| "model_config": { |
| "base_model": "bert-base-uncased", |
| "max_length": 512, |
| "effective_batch_size": 16, |
| "learning_rate": 0.0002 |
| }, |
| "test_evaluation": { |
| "eval_loss": 0.18932627141475677, |
| "eval_exact_match": 0.8609271523178808, |
| "eval_start_accuracy": 1.0, |
| "eval_end_accuracy": 0.8609271523178808, |
| "eval_start_f1": 1.0, |
| "eval_end_f1": 0.8286722954302209, |
| "eval_avg_f1": 0.9143361477151104, |
| "eval_runtime": 3.9174, |
| "eval_samples_per_second": 77.093, |
| "eval_steps_per_second": 19.401, |
| "epoch": 4.0 |
| }, |
| "validation_evaluation": { |
| "eval_loss": 0.19166630506515503, |
| "eval_exact_match": 0.8590381426202321, |
| "eval_start_accuracy": 1.0, |
| "eval_end_accuracy": 0.8590381426202321, |
| "eval_start_f1": 1.0, |
| "eval_end_f1": 0.8252087537483971, |
| "eval_avg_f1": 0.9126043768741985, |
| "eval_runtime": 7.7871, |
| "eval_samples_per_second": 77.436, |
| "eval_steps_per_second": 19.391, |
| "epoch": 4.0 |
| } |
| }, |
| "system_information": { |
| "device": "cuda", |
| "cuda_available": true, |
| "gpu_name": "Tesla T4", |
| "gpu_memory_gb": 15.828320256, |
| "pytorch_version": "2.6.0+cu124", |
| "transformers_version": "4.35.0+", |
| "peft_enabled": true |
| }, |
| "report_generated": "2025-06-04T19:30:27.650635", |
| "colab_environment": true, |
| "model_type": "BERT-Base-Uncased with LoRA" |
| } |