codi-gpt2-prontoqa-latent / codi_config.json
simon-pltk's picture
Upload folder using huggingface_hub
17bde88 verified
{
"model_type": "codi",
"base_model": "gpt2",
"architecture": "CODI",
"description": "Custom CODI wrapper around GPT-2 with LoRA, projection layers, and distillation for latent chain-of-thought reasoning on ProntoQA.",
"training_details": {
"dataset": "ProntoQA",
"epochs": 50,
"learning_rate": 0.003,
"seed": 11,
"num_latent": 5,
"final_ce_loss": 0.1202,
"final_distill_loss": 0.0759,
"final_ref_ce_loss": 0.0113,
"final_total_loss": 0.1931
}
}