Stella_Gte_diff_seed / config.toml
DornierDo17's picture
Upload folder using huggingface_hub
a8d47f4 verified
seed = 6
sampling_seed = 6
train_dataset_seed = 11
val_dataset_seed = 43
normalize_embeddings = true
mixed_precision = "bf16"
weight_init = "kaiming"
dataset = "nq"
max_seq_length = 32
unsup_emb = "stella"
sup_emb = "gte"
n_embs_per_batch = 1
finetune_mode = false
noise_level = 0.0
style = "res_mlp"
norm_style = "batch"
depth = 3
transform_depth = 4
d_adapter = 1024
d_hidden = 1024
d_transform = 1024
use_small_output_adapters = false
use_residual_adapters = true
gan_style = "least_squares"
disc_depth = 5
disc_dim = 1024
use_residual = true
bs = 256
gradient_accumulation_steps = 1
lr = 2e-5
no_scheduler = true
max_grad_norm = 1.0
loss_coefficient_reverse_rec = 0.0
loss_coefficient_rec = 1.0
loss_coefficient_vsp = 1.0
loss_coefficient_cc_trans = 10.0
loss_coefficient_cc_rec = 0.0
loss_coefficient_cc_vsp = 10.0
loss_coefficient_r1_penalty = 0.01
warmup_length = 2000
patience = 20
min_delta = 0.0
min_epochs = 80
disc_lr = 1e-5
eps = 6.25e-10
smooth = 0.9
loss_coefficient_disc = 1.0
loss_coefficient_gen = 1.0
loss_coefficient_latent_gen = 1.0
loss_coefficient_similarity_gen = 0.0
val_size = 4096
val_bs = 1024
top_k_size = 1024
top_k_batches = 4
k = 16
heatmap_size = 64
use_wandb = true
wandb_project = "unsupervised_disc"
wandb_name = "e:100,n:400000"
load_dir = "./finetuning_unsupervised/n:100000,e:10,s:n_double,d:4,d:4,d:64/"
save_dir = "./finetuning_unsupervised/{}/"
force_dump = true
epochs = 100
num_points = 400000
num_params = 21004800
num_disc_params = 3943425
num_sup_disc_params = 3943425
num_latent_disc_params = 4205569
num_similarity_disc_params = 3419137