AbstractPhil commited on
Commit
ddc9d55
·
verified ·
1 Parent(s): b50dd1c

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +63 -0
config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bert_model": "google-bert/bert-large-uncased",
4
+ "hidden_size": 1024,
5
+ "freeze_bert": true,
6
+ "n_memory_tokens": 16,
7
+ "bank_size": 128,
8
+ "anchor_dim": 1024,
9
+ "n_bank_heads": 8,
10
+ "bank_cross_layers": 2,
11
+ "gate_type": "gru",
12
+ "extract_layers": [
13
+ 2,
14
+ 5,
15
+ 8,
16
+ 11,
17
+ 14,
18
+ 17,
19
+ 20,
20
+ 23
21
+ ],
22
+ "layer_fusion": "learned",
23
+ "max_content_tokens": 480,
24
+ "segment_overlap": 64,
25
+ "max_position": 512,
26
+ "n_teachers": 2,
27
+ "teacher_hidden": 1024,
28
+ "cv_target": 0.2
29
+ },
30
+ "training": {
31
+ "max_documents": 50000,
32
+ "max_val_documents": 500,
33
+ "segment_length": 480,
34
+ "segment_overlap": 64,
35
+ "target_chain_segments": 16,
36
+ "max_segments": 16,
37
+ "min_segments": 6,
38
+ "modern_bert_model": "answerdotai/ModernBERT-large",
39
+ "longformer_model": "allenai/longformer-large-4096",
40
+ "modern_max_len": 8192,
41
+ "longformer_max_len": 4096,
42
+ "procrustes_n_samples": 500,
43
+ "epochs": 10,
44
+ "batch_size": 4,
45
+ "lr_bank": 0.002,
46
+ "lr_output": 0.0005,
47
+ "lr_proj": 0.001,
48
+ "min_lr": 1e-06,
49
+ "weight_decay": 0.01,
50
+ "grad_clip": 1.0,
51
+ "warmup_steps": 300,
52
+ "tbptt_segments": 0,
53
+ "modern_weight": 1.0,
54
+ "longformer_weight": 0.5,
55
+ "cv_weight": 0.05,
56
+ "temperature": 0.07,
57
+ "checkpoint_dir": "/home/claude/deep_bert_v3_checkpoints",
58
+ "tensorboard_dir": "/home/claude/deep_bert_v3_tb",
59
+ "log_every": 20,
60
+ "eval_every": 200,
61
+ "save_every_epoch": true
62
+ }
63
+ }