Upload checkpoints/phase2_e04/config.json with huggingface_hub

Browse files

Files changed (1) hide show

checkpoints/phase2_e04/config.json +89 -0

checkpoints/phase2_e04/config.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+  "model": {
+    "clip_model": "openai/clip-vit-large-patch14",
+    "clip_hidden": 768,
+    "clip_layers": 12,
+    "clip_max_tokens": 77,
+    "freeze_clip": true,
+    "n_memory_tokens": 8,
+    "bank_size": 64,
+    "anchor_dim": 768,
+    "n_bank_heads": 8,
+    "bank_cross_layers": 2,
+    "gate_type": "gru",
+    "extract_layers": [
+      1,
+      3,
+      5,
+      7,
+      9,
+      11
+    ],
+    "layer_fusion": "learned",
+    "max_content_tokens": 18,
+    "segment_overlap": 4,
+    "max_segments": 32,
+    "cv_target": 0.2,
+    "sequence_output": true,
+    "sequence_len": 77,
+    "sequence_recon_layers": 2,
+    "sequence_recon_heads": 8,
+    "collect_content_tokens": true,
+    "max_content_positions": 256,
+    "teacher_model": "answerdotai/ModernBERT-large",
+    "teacher_hidden": 1024,
+    "return_dict": true,
+    "output_hidden_states": false,
+    "dtype": null,
+    "chunk_size_feed_forward": 0,
+    "is_encoder_decoder": false,
+    "architectures": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "problem_type": null,
+    "_name_or_path": "",
+    "transformers_version": "5.0.0",
+    "model_type": "memory_clip_seq",
+    "output_attentions": false
+  },
+  "training": {
+    "max_train_samples": 50000,
+    "max_val_samples": 2000,
+    "min_caption_length": 100,
+    "phase1_epochs": 5,
+    "phase1_lr_seq": 0.002,
+    "phase1_lr_proj": 0.001,
+    "phase2_epochs": 5,
+    "phase2_lr_bank": 0.0005,
+    "phase2_lr_output": 0.0002,
+    "phase2_lr_proj": 0.0005,
+    "phase2_lr_seq": 0.001,
+    "batch_size": 64,
+    "min_lr": 1e-06,
+    "weight_decay": 0.01,
+    "grad_clip": 1.0,
+    "warmup_steps": 200,
+    "modern_weight": 1.0,
+    "procrustes_weight": 0.3,
+    "cv_weight": 0.05,
+    "temperature": 0.07,
+    "sequence_weight": 1.0,
+    "sequence_cosine_weight": 0.5,
+    "modern_max_len": 4096,
+    "procrustes_n_samples": 300,
+    "v1_checkpoint": "",
+    "v1_repo_id": "AbstractPhil/geolip-clip-vit-large-patch14-ctx576",
+    "v1_filename": "model.safetensors",
+    "checkpoint_dir": "/home/claude/memory_clip_seq_checkpoints",
+    "tensorboard_dir": "/home/claude/memory_clip_seq_tb",
+    "metrics_file": "/home/claude/memory_clip_seq_checkpoints/metrics.json",
+    "log_every": 20,
+    "eval_every": 200
+  }
+}