lllezd commited on
Commit
c7e3000
·
verified ·
1 Parent(s): 8557934

Upload metrics.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. metrics.json +45 -0
metrics.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "final_model": "small + GQA + RoPE",
3
+ "main_repo": "lllezd/dl-course-hw13",
4
+ "variant_repo": "lllezd/dl-course-hw13-small-gqa-rope",
5
+ "eval_loss": 2.4694972038269043,
6
+ "eval_perplexity": 11.816504064743178,
7
+ "experiments": {
8
+ "1_small_gqa_baseline": {
9
+ "description": "small + GQA baseline",
10
+ "val_loss": 2.565,
11
+ "perplexity": 13.0
12
+ },
13
+ "2_small_gqa_rope": {
14
+ "description": "small + GQA + RoPE",
15
+ "val_loss": 2.4694972038269043,
16
+ "perplexity": 11.816504064743178
17
+ },
18
+ "3_mini_mla_rope": {
19
+ "description": "mini + MLA + RoPE",
20
+ "val_loss": 2.7306,
21
+ "perplexity": 15.34
22
+ },
23
+ "4_small_mla_rope": {
24
+ "description": "small + MLA + RoPE",
25
+ "val_loss": 2.4905,
26
+ "perplexity": 12.07
27
+ }
28
+ },
29
+ "config": {
30
+ "n_layer": 12,
31
+ "n_head": 12,
32
+ "n_kv_head": 6,
33
+ "hidden_dim": 768,
34
+ "intermediate_dim": 2048,
35
+ "dropout": 0.1,
36
+ "vocab_size": 1024,
37
+ "max_seq_len": 128,
38
+ "use_rope": true,
39
+ "rope_base": 10000.0,
40
+ "attention_type": "gqa",
41
+ "q_latent_dim": null,
42
+ "kv_latent_dim": null
43
+ },
44
+ "created_at": "2026-05-22T10:01:36.637143Z"
45
+ }