AbstractPhil's picture
Upload checkpoints/phase2_e05/config.json with huggingface_hub
5c9568e verified
{
"model": {
"clip_model": "openai/clip-vit-large-patch14",
"clip_hidden": 768,
"clip_layers": 12,
"clip_max_tokens": 77,
"freeze_clip": true,
"n_memory_tokens": 8,
"bank_size": 64,
"anchor_dim": 768,
"n_bank_heads": 8,
"bank_cross_layers": 2,
"gate_type": "gru",
"extract_layers": [
1,
3,
5,
7,
9,
11
],
"layer_fusion": "learned",
"max_content_tokens": 18,
"segment_overlap": 4,
"max_segments": 32,
"cv_target": 0.2,
"sequence_output": true,
"sequence_len": 77,
"sequence_recon_layers": 2,
"sequence_recon_heads": 8,
"collect_content_tokens": true,
"max_content_positions": 256,
"teacher_model": "answerdotai/ModernBERT-large",
"teacher_hidden": 1024,
"return_dict": true,
"output_hidden_states": false,
"dtype": null,
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"architectures": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"problem_type": null,
"_name_or_path": "",
"transformers_version": "5.0.0",
"model_type": "memory_clip_seq",
"output_attentions": false
},
"training": {
"max_train_samples": 50000,
"max_val_samples": 2000,
"min_caption_length": 100,
"phase1_epochs": 5,
"phase1_lr_seq": 0.002,
"phase1_lr_proj": 0.001,
"phase2_epochs": 5,
"phase2_lr_bank": 0.0005,
"phase2_lr_output": 0.0002,
"phase2_lr_proj": 0.0005,
"phase2_lr_seq": 0.001,
"batch_size": 64,
"min_lr": 1e-06,
"weight_decay": 0.01,
"grad_clip": 1.0,
"warmup_steps": 200,
"modern_weight": 1.0,
"procrustes_weight": 0.3,
"cv_weight": 0.05,
"temperature": 0.07,
"sequence_weight": 1.0,
"sequence_cosine_weight": 0.5,
"modern_max_len": 4096,
"procrustes_n_samples": 300,
"v1_checkpoint": "",
"v1_repo_id": "AbstractPhil/geolip-clip-vit-large-patch14-ctx576",
"v1_filename": "model.safetensors",
"checkpoint_dir": "/home/claude/memory_clip_seq_checkpoints",
"tensorboard_dir": "/home/claude/memory_clip_seq_tb",
"metrics_file": "/home/claude/memory_clip_seq_checkpoints/metrics.json",
"log_every": 20,
"eval_every": 200
}
}