AbstractPhil commited on
Commit
55fbc3d
·
verified ·
1 Parent(s): 63fe4a1

Upload checkpoints/phase2_e04/config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. checkpoints/phase2_e04/config.json +89 -0
checkpoints/phase2_e04/config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "clip_model": "openai/clip-vit-large-patch14",
4
+ "clip_hidden": 768,
5
+ "clip_layers": 12,
6
+ "clip_max_tokens": 77,
7
+ "freeze_clip": true,
8
+ "n_memory_tokens": 8,
9
+ "bank_size": 64,
10
+ "anchor_dim": 768,
11
+ "n_bank_heads": 8,
12
+ "bank_cross_layers": 2,
13
+ "gate_type": "gru",
14
+ "extract_layers": [
15
+ 1,
16
+ 3,
17
+ 5,
18
+ 7,
19
+ 9,
20
+ 11
21
+ ],
22
+ "layer_fusion": "learned",
23
+ "max_content_tokens": 18,
24
+ "segment_overlap": 4,
25
+ "max_segments": 32,
26
+ "cv_target": 0.2,
27
+ "sequence_output": true,
28
+ "sequence_len": 77,
29
+ "sequence_recon_layers": 2,
30
+ "sequence_recon_heads": 8,
31
+ "collect_content_tokens": true,
32
+ "max_content_positions": 256,
33
+ "teacher_model": "answerdotai/ModernBERT-large",
34
+ "teacher_hidden": 1024,
35
+ "return_dict": true,
36
+ "output_hidden_states": false,
37
+ "dtype": null,
38
+ "chunk_size_feed_forward": 0,
39
+ "is_encoder_decoder": false,
40
+ "architectures": null,
41
+ "id2label": {
42
+ "0": "LABEL_0",
43
+ "1": "LABEL_1"
44
+ },
45
+ "label2id": {
46
+ "LABEL_0": 0,
47
+ "LABEL_1": 1
48
+ },
49
+ "problem_type": null,
50
+ "_name_or_path": "",
51
+ "transformers_version": "5.0.0",
52
+ "model_type": "memory_clip_seq",
53
+ "output_attentions": false
54
+ },
55
+ "training": {
56
+ "max_train_samples": 50000,
57
+ "max_val_samples": 2000,
58
+ "min_caption_length": 100,
59
+ "phase1_epochs": 5,
60
+ "phase1_lr_seq": 0.002,
61
+ "phase1_lr_proj": 0.001,
62
+ "phase2_epochs": 5,
63
+ "phase2_lr_bank": 0.0005,
64
+ "phase2_lr_output": 0.0002,
65
+ "phase2_lr_proj": 0.0005,
66
+ "phase2_lr_seq": 0.001,
67
+ "batch_size": 64,
68
+ "min_lr": 1e-06,
69
+ "weight_decay": 0.01,
70
+ "grad_clip": 1.0,
71
+ "warmup_steps": 200,
72
+ "modern_weight": 1.0,
73
+ "procrustes_weight": 0.3,
74
+ "cv_weight": 0.05,
75
+ "temperature": 0.07,
76
+ "sequence_weight": 1.0,
77
+ "sequence_cosine_weight": 0.5,
78
+ "modern_max_len": 4096,
79
+ "procrustes_n_samples": 300,
80
+ "v1_checkpoint": "",
81
+ "v1_repo_id": "AbstractPhil/geolip-clip-vit-large-patch14-ctx576",
82
+ "v1_filename": "model.safetensors",
83
+ "checkpoint_dir": "/home/claude/memory_clip_seq_checkpoints",
84
+ "tensorboard_dir": "/home/claude/memory_clip_seq_tb",
85
+ "metrics_file": "/home/claude/memory_clip_seq_checkpoints/metrics.json",
86
+ "log_every": 20,
87
+ "eval_every": 200
88
+ }
89
+ }