File size: 493 Bytes
17bde88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
{
  "model_type": "codi",
  "base_model": "gpt2",
  "architecture": "CODI",
  "description": "Custom CODI wrapper around GPT-2 with LoRA, projection layers, and distillation for latent chain-of-thought reasoning on ProntoQA.",
  "training_details": {
    "dataset": "ProntoQA",
    "epochs": 50,
    "learning_rate": 0.003,
    "seed": 11,
    "num_latent": 5,
    "final_ce_loss": 0.1202,
    "final_distill_loss": 0.0759,
    "final_ref_ce_loss": 0.0113,
    "final_total_loss": 0.1931
  }
}