File size: 493 Bytes
17bde88 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | {
"model_type": "codi",
"base_model": "gpt2",
"architecture": "CODI",
"description": "Custom CODI wrapper around GPT-2 with LoRA, projection layers, and distillation for latent chain-of-thought reasoning on ProntoQA.",
"training_details": {
"dataset": "ProntoQA",
"epochs": 50,
"learning_rate": 0.003,
"seed": 11,
"num_latent": 5,
"final_ce_loss": 0.1202,
"final_distill_loss": 0.0759,
"final_ref_ce_loss": 0.0113,
"final_total_loss": 0.1931
}
} |