LaughingLogits commited on
Commit
9895e62
·
verified ·
1 Parent(s): 7111c15

Upload APMAE

Browse files
Files changed (2) hide show
  1. config.json +36 -1
  2. model.safetensors +1 -1
config.json CHANGED
@@ -1,12 +1,23 @@
1
  {
 
2
  "architectures": [
3
  "APMAE"
4
  ],
 
 
 
 
 
 
 
 
 
5
  "decoder_dim": 512,
6
  "decoder_dim_head": 64,
7
  "decoder_heads": 8,
8
  "decoder_layers": 8,
9
  "decoder_mlp_dim": 2048,
 
10
  "encoder_dim": 512,
11
  "encoder_dim_head": 64,
12
  "encoder_dropout": 0.0,
@@ -15,13 +26,37 @@
15
  "encoder_layers": 24,
16
  "encoder_mlp_dim": 2048,
17
  "encoder_pool": "cls",
 
 
 
 
 
 
 
18
  "hidden_act": "gelu",
 
 
 
19
  "layer_norm_eps": 1e-12,
20
  "mask_ratio": 0.5,
 
21
  "max_length": 256,
 
22
  "model_type": "ap_mae",
 
 
23
  "patch_size": 32,
24
  "qkv_bias": false,
 
 
 
 
 
 
25
  "torch_dtype": "float32",
26
- "transformers_version": "4.42.4"
 
 
 
 
27
  }
 
1
  {
2
+ "_name_or_path": "AISE-TUDelft/ViTFT_256t_3bsc2_20240725",
3
  "architectures": [
4
  "APMAE"
5
  ],
6
+ "attention_scaler": "log_normalize",
7
+ "base_learning_rate": 0.00015,
8
+ "batch_size": 60,
9
+ "correct_only": true,
10
+ "dataset_location": "LaughingLogits/Stackless_Java_V2",
11
+ "dataset_name": "JavaNearDedupFull",
12
+ "dataset_split": "train",
13
+ "dataset_split_seed": 42,
14
+ "decoder_device": "cuda:0",
15
  "decoder_dim": 512,
16
  "decoder_dim_head": 64,
17
  "decoder_heads": 8,
18
  "decoder_layers": 8,
19
  "decoder_mlp_dim": 2048,
20
+ "encoder_device": "cuda:0",
21
  "encoder_dim": 512,
22
  "encoder_dim_head": 64,
23
  "encoder_dropout": 0.0,
 
26
  "encoder_layers": 24,
27
  "encoder_mlp_dim": 2048,
28
  "encoder_pool": "cls",
29
+ "head_selection_strategy": [
30
+ "layerwise",
31
+ 0.25
32
+ ],
33
+ "hf_datasets_cache": "./huggingface/datasets",
34
+ "hf_home": "./huggingface",
35
+ "hf_transformers_cache": "./huggingface/models",
36
  "hidden_act": "gelu",
37
+ "initial_seed": 45,
38
+ "iter_loader_workers": 8,
39
+ "lang": "java",
40
  "layer_norm_eps": 1e-12,
41
  "mask_ratio": 0.5,
42
+ "max_epochs": 1,
43
  "max_length": 256,
44
+ "min_length": 256,
45
  "model_type": "ap_mae",
46
+ "num_channels": 1,
47
+ "num_classes": 2,
48
  "patch_size": 32,
49
  "qkv_bias": false,
50
+ "queries": [
51
+ "random"
52
+ ],
53
+ "save_model_frequency": 15000,
54
+ "target_model_device": "cuda:0",
55
+ "target_model_name": "bigcode/starcoder2-3b",
56
  "torch_dtype": "float32",
57
+ "train_batches": 150000,
58
+ "transformers_version": "4.48.1",
59
+ "val_batches": 120,
60
+ "visualize_frequency": 2000,
61
+ "vitmae_preload_name": null
62
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1b115bf013e6ccb0ecafd5bcfe4edd7df10d87c3c25897d9951dd113ed5a018
3
  size 508504960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bacd20bb38580bda9601d8c0d38e16f996868f40edea112a56488747ce4f40fa
3
  size 508504960