checkpoints/phase2_e05/config.json · AbstractPhil/geolip-clip-vit-large-patch14-ctx576-seq77 at main

geolip-clip-vit-large-patch14-ctx576-seq77 / checkpoints /phase2_e05 /config.json

Upload checkpoints/phase2_e05/config.json with huggingface_hub

5c9568e verified about 1 month ago

2.32 kB

	{
	"model": {
	"clip_model": "openai/clip-vit-large-patch14",
	"clip_hidden": 768,
	"clip_layers": 12,
	"clip_max_tokens": 77,
	"freeze_clip": true,
	"n_memory_tokens": 8,
	"bank_size": 64,
	"anchor_dim": 768,
	"n_bank_heads": 8,
	"bank_cross_layers": 2,
	"gate_type": "gru",
	"extract_layers": [
	1,
	3,
	5,
	7,
	9,
	11
	],
	"layer_fusion": "learned",
	"max_content_tokens": 18,
	"segment_overlap": 4,
	"max_segments": 32,
	"cv_target": 0.2,
	"sequence_output": true,
	"sequence_len": 77,
	"sequence_recon_layers": 2,
	"sequence_recon_heads": 8,
	"collect_content_tokens": true,
	"max_content_positions": 256,
	"teacher_model": "answerdotai/ModernBERT-large",
	"teacher_hidden": 1024,
	"return_dict": true,
	"output_hidden_states": false,
	"dtype": null,
	"chunk_size_feed_forward": 0,
	"is_encoder_decoder": false,
	"architectures": null,
	"id2label": {
	"0": "LABEL_0",
	"1": "LABEL_1"
	},
	"label2id": {
	"LABEL_0": 0,
	"LABEL_1": 1
	},
	"problem_type": null,
	"_name_or_path": "",
	"transformers_version": "5.0.0",
	"model_type": "memory_clip_seq",
	"output_attentions": false
	},
	"training": {
	"max_train_samples": 50000,
	"max_val_samples": 2000,
	"min_caption_length": 100,
	"phase1_epochs": 5,
	"phase1_lr_seq": 0.002,
	"phase1_lr_proj": 0.001,
	"phase2_epochs": 5,
	"phase2_lr_bank": 0.0005,
	"phase2_lr_output": 0.0002,
	"phase2_lr_proj": 0.0005,
	"phase2_lr_seq": 0.001,
	"batch_size": 64,
	"min_lr": 1e-06,
	"weight_decay": 0.01,
	"grad_clip": 1.0,
	"warmup_steps": 200,
	"modern_weight": 1.0,
	"procrustes_weight": 0.3,
	"cv_weight": 0.05,
	"temperature": 0.07,
	"sequence_weight": 1.0,
	"sequence_cosine_weight": 0.5,
	"modern_max_len": 4096,
	"procrustes_n_samples": 300,
	"v1_checkpoint": "",
	"v1_repo_id": "AbstractPhil/geolip-clip-vit-large-patch14-ctx576",
	"v1_filename": "model.safetensors",
	"checkpoint_dir": "/home/claude/memory_clip_seq_checkpoints",
	"tensorboard_dir": "/home/claude/memory_clip_seq_tb",
	"metrics_file": "/home/claude/memory_clip_seq_checkpoints/metrics.json",
	"log_every": 20,
	"eval_every": 200
	}
	}