batch_size: 256 checkpoint_dir: checkpoints data: data/processed/classic_triplets.parquet device: cpu embedding: embedding_dim: 64 type: lookup encoder: arch: mean hidden_dim: 128 tied_weights: true epochs: 3 huggingface: private: false push_to_hub: true repo_id: mlx7-two-tower-retrieval learning_rate: 1e-3 loss: margin: 0.2 type: triplet max_sequence_length: 64 optimizer: lr: 0.001 type: adamw tokeniser: max_len: 64 type: char use_wandb: true wandb: entity: azuremis project: two-tower-retrieval