{ "model_type": "codi", "base_model": "gpt2", "architecture": "CODI", "description": "Custom CODI wrapper around GPT-2 with LoRA, projection layers, and distillation for latent chain-of-thought reasoning on ProntoQA.", "training_details": { "dataset": "ProntoQA", "epochs": 50, "learning_rate": 0.003, "seed": 11, "num_latent": 5, "final_ce_loss": 0.1202, "final_distill_loss": 0.0759, "final_ref_ce_loss": 0.0113, "final_total_loss": 0.1931 } }