Add models
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +62 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/command-log.txt +67 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/config.json +60 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/eval2da.pkl +3 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/eval2da.png +0 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/eval2da1.pkl +3 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/eval2da1.png +0 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/pytorch_model.bin +3 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/size_hist.png +0 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/size_hist.svg +1208 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/summary.json +6 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/summary.txt +8 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/trace_times.png +0 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/size_hist.png +0 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/size_hist.svg +1316 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/summary.json +7 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/summary.txt +9 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/trace_times.png +0 -0
- models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/runs/Jan30_11-14-48_as01r5b15/events.out.tfevents.1738232090.as01r5b15.163555.0 +3 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/command-log.txt +67 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/config.json +55 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/eval2da.pkl +3 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/eval2da.png +0 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/eval2da1.pkl +3 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/eval2da1.png +0 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/pytorch_model.bin +3 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/size_hist.png +0 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/size_hist.svg +1208 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/summary.json +6 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/summary.txt +8 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/trace_times.png +0 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/size_hist.png +0 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/size_hist.svg +1208 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/summary.json +6 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/summary.txt +8 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/trace_times.png +0 -0
- models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/runs/Jan28_01-12-19_as05r4b20/events.out.tfevents.1738023140.as05r4b20.327756.0 +3 -0
- models-prop/5ap/base-rop-bn0-fn1-ada1-s44/command-log.txt +67 -0
- models-prop/5ap/base-rop-bn0-fn1-ada1-s44/config.json +55 -0
- models-prop/5ap/base-rop-bn0-fn1-ada1-s44/pytorch_model.bin +3 -0
- models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/size_hist.png +0 -0
- models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/size_hist.svg +1316 -0
- models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/summary.json +7 -0
- models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/summary.txt +9 -0
- models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/trace_times.png +0 -0
- models-prop/5ap/base-rop-bn0-fn1-ada1-s44/runs/Jan28_01-12-17_as03r3b29/events.out.tfevents.1738023138.as03r3b29.2568593.0 +3 -0
- models-prop/5ap/d020-rop-bn0-fn1-ada1-s42/command-log.txt +67 -0
- models-prop/5ap/d020-rop-bn0-fn1-ada1-s42/config.json +55 -0
- models-prop/5ap/d020-rop-bn0-fn1-ada1-s42/eval2da.pkl +3 -0
- models-prop/5ap/d020-rop-bn0-fn1-ada1-s42/eval2da.png +0 -0
README.md
CHANGED
|
@@ -1,3 +1,65 @@
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
---
|
| 4 |
+
|
| 5 |
+
# Interchangeable Token Embeddings
|
| 6 |
+
|
| 7 |
+
This repository contains the models for the ICML 2025 paper, [Interchangeable Token Embeddings for Extendable Vocabulary and Alpha-Equivalence](https://arxiv.org/abs/2410.17161).
|
| 8 |
+
|
| 9 |
+
Links:
|
| 10 |
+
1. [arXiv](https://arxiv.org/abs/2410.17161)
|
| 11 |
+
2. [Project Page](https://necrashter.github.io/interchangeable-token-embeddings/)
|
| 12 |
+
3. [Code](https://github.com/necrashter/interchangeable-token-embeddings)
|
| 13 |
+
|
| 14 |
+
## Models
|
| 15 |
+
|
| 16 |
+
Figure 3a (LTL):
|
| 17 |
+
| Model | Path |
|
| 18 |
+
| ----- | ---- |
|
| 19 |
+
| Proposed Method | `models/ltl-5/d005-rop-bn1-fn1-ada1-s42` |
|
| 20 |
+
| Baseline | `models/adacos/nondyn-rope` |
|
| 21 |
+
| Full-Vocabulary | `models/10ap/adacos-nondyn` |
|
| 22 |
+
| Alpha-Renaming | `models/ltl-5/s010-rop-bn1-fn1-ada1-s46` |
|
| 23 |
+
|
| 24 |
+
Figure 3b (Propositional Logic):
|
| 25 |
+
| Model | Path |
|
| 26 |
+
| ----- | ---- |
|
| 27 |
+
| Proposed Method | `models-prop/5ap/d020-rop-bn1-fn1-ada1-s44` |
|
| 28 |
+
| Baseline | `models-prop/5ap/0000-rop-bn0-fn1-ada1-s42` |
|
| 29 |
+
| Full-Vocabulary | `models-prop/10ap/0000-rop-bn0-fn1-ada1-s43` |
|
| 30 |
+
| Alpha-Renaming | `models-prop/5ap/s010-rop-bn1-fn1-ada1-s44` |
|
| 31 |
+
|
| 32 |
+
Figure 5a (Ablation - LTL):
|
| 33 |
+
| Model | Path |
|
| 34 |
+
| ----- | ---- |
|
| 35 |
+
| w/o Adacos | `models/ltl-5/d005-rop-bn1-fn1-ada0-s42` |
|
| 36 |
+
| w/o $f_{{fn}}$ & AdaCos | `models/ltl-5/d005-rop-bn1-fn0-ada0-s42` |
|
| 37 |
+
| w/o $f_{{bn}}$ | `models/ltl-5/d005-rop-bn0-fn1-ada1-s42` |
|
| 38 |
+
|
| 39 |
+
Figure 5b (Ablation - Propositional Logic):
|
| 40 |
+
| Model | Path |
|
| 41 |
+
| ----- | ---- |
|
| 42 |
+
| w/o Adacos | `models-prop/5ap/d020-rop-bn1-fn1-ada0-s43` |
|
| 43 |
+
| w/o $f_{{fn}}$ & AdaCos | `models-prop/5ap/d020-rop-bn1-fn0-ada0-s44` |
|
| 44 |
+
| w/o $f_{{bn}}$ | `models-prop/5ap/d020-rop-bn0-fn1-ada1-s42` |
|
| 45 |
+
|
| 46 |
+
### Perturbation Experiments
|
| 47 |
+
|
| 48 |
+
Table 2 and 7 (LTL):
|
| 49 |
+
| Model | Path |
|
| 50 |
+
| ----- | ---- |
|
| 51 |
+
| Normal Baseline | `models/adacos/nondyn-rope` |
|
| 52 |
+
| Perturbed Baseline | `models/adacos/nondyn-rope-perturbed` |
|
| 53 |
+
| Perturbed Alpha-Renaming | `models/adacos/s005-rope-perturbed-s42` |
|
| 54 |
+
| Perturbed Proposed | `models/adacos/diagbor4-rope-perturbed` |
|
| 55 |
+
| Limited Baseline | `models/adacos/nondyn-rope-limited` |
|
| 56 |
+
| Limited Alpha-Renaming | `models/adacos/s005-rope-limited-s42` |
|
| 57 |
+
| Limited Proposed | `models/adacos/diagbor4-rope-limited` |
|
| 58 |
+
|
| 59 |
+
Table 8 (Propositional Logic):
|
| 60 |
+
| Model | Path |
|
| 61 |
+
| ----- | ---- |
|
| 62 |
+
| Normal Baseline | `models-prop/5ap/0000-rop-bn0-fn1-ada1-s42` |
|
| 63 |
+
| Perturbed Baseline | `models-prop/5ap/base-rop-bn0-fn1-ada1-s44` |
|
| 64 |
+
| Perturbed Alpha-Renaming | `models-prop/5ap/s005-rop-bn1-fn1-ada1-s43` |
|
| 65 |
+
| Perturbed Proposed | `models-prop/5ap/d020-rop-bn1-fn1-ada1-s42` |
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/command-log.txt
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Number of parameters: 2_904_336
|
| 2 |
+
Arguments:
|
| 3 |
+
{
|
| 4 |
+
"model_path": "models-prop/10ap/0000-rop-bn0-fn1-ada1-s43",
|
| 5 |
+
"device": "cuda",
|
| 6 |
+
"seed": 43,
|
| 7 |
+
"subparser": "train-ted",
|
| 8 |
+
"data_dir": "data-prop",
|
| 9 |
+
"ds_name": "ltl-35-10ap",
|
| 10 |
+
"max_trace_length": -1,
|
| 11 |
+
"min_aps": null,
|
| 12 |
+
"max_aps": null,
|
| 13 |
+
"exact_aps": null,
|
| 14 |
+
"vocab_aps": null,
|
| 15 |
+
"val_split": "val",
|
| 16 |
+
"merged_vocab": true,
|
| 17 |
+
"merge_tokens": "all",
|
| 18 |
+
"dynamic_aps": false,
|
| 19 |
+
"learning_rate": 0.001,
|
| 20 |
+
"lr_scheduler_type": "cosine",
|
| 21 |
+
"warmup_steps": 1000,
|
| 22 |
+
"weight_decay": 0.1,
|
| 23 |
+
"adam_beta1": 0.9,
|
| 24 |
+
"adam_beta2": 0.95,
|
| 25 |
+
"max_grad_norm": 1.0,
|
| 26 |
+
"epochs": 64,
|
| 27 |
+
"batch_size": 1024,
|
| 28 |
+
"grad_acc_steps": 1,
|
| 29 |
+
"logging_steps": 500,
|
| 30 |
+
"eval_steps": 3000,
|
| 31 |
+
"train_max_samples": null,
|
| 32 |
+
"val_max_samples": 10000,
|
| 33 |
+
"trace_max_samples": 100,
|
| 34 |
+
"dry": false,
|
| 35 |
+
"eval": false,
|
| 36 |
+
"resume": false,
|
| 37 |
+
"loss_fct": "adacos",
|
| 38 |
+
"num_heads": 6,
|
| 39 |
+
"d_embed_enc": 132,
|
| 40 |
+
"d_embed_dec": null,
|
| 41 |
+
"d_ff": 512,
|
| 42 |
+
"ff_activation": "relu",
|
| 43 |
+
"num_layers": 6,
|
| 44 |
+
"dropout": 0.1,
|
| 45 |
+
"layer_norm_eps": 1e-06,
|
| 46 |
+
"enc_pe": "sinusoid",
|
| 47 |
+
"dec_pe": "rope",
|
| 48 |
+
"no_pe_cross_keys": false,
|
| 49 |
+
"tree_pos_enc": true,
|
| 50 |
+
"d_ap": 0,
|
| 51 |
+
"ap_embed": "randn",
|
| 52 |
+
"embed_base_normalization": "disabled",
|
| 53 |
+
"embed_ap_normalization": "disabled",
|
| 54 |
+
"embed_final_normalization": "l2",
|
| 55 |
+
"feature_normalization": "l2",
|
| 56 |
+
"embed_scaling": "sqrtd",
|
| 57 |
+
"shuffle_aps": null,
|
| 58 |
+
"action": "train",
|
| 59 |
+
"model_type": "ted",
|
| 60 |
+
"decoder_only": false
|
| 61 |
+
}
|
| 62 |
+
Using CUDA device: NVIDIA H100
|
| 63 |
+
Python version: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
|
| 64 |
+
numpy version: 1.26.2
|
| 65 |
+
torch version: 2.1.1+cu121
|
| 66 |
+
transformers version: 4.39.3
|
| 67 |
+
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/config.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"vocab": {
|
| 3 |
+
"aps": [
|
| 4 |
+
"a",
|
| 5 |
+
"b",
|
| 6 |
+
"c",
|
| 7 |
+
"d",
|
| 8 |
+
"e",
|
| 9 |
+
"f",
|
| 10 |
+
"g",
|
| 11 |
+
"h",
|
| 12 |
+
"i",
|
| 13 |
+
"j"
|
| 14 |
+
],
|
| 15 |
+
"consts": [
|
| 16 |
+
"0",
|
| 17 |
+
"1"
|
| 18 |
+
],
|
| 19 |
+
"trace_ops": [],
|
| 20 |
+
"ltl_ops": [
|
| 21 |
+
"!",
|
| 22 |
+
"&",
|
| 23 |
+
"|",
|
| 24 |
+
"=",
|
| 25 |
+
"^"
|
| 26 |
+
],
|
| 27 |
+
"merge_tokens": "all",
|
| 28 |
+
"dynamic_aps": false,
|
| 29 |
+
"use_start_token": true,
|
| 30 |
+
"use_pad_token": true,
|
| 31 |
+
"use_eos_token": true
|
| 32 |
+
},
|
| 33 |
+
"d_embed_enc": 132,
|
| 34 |
+
"d_embed_dec": 132,
|
| 35 |
+
"d_ff": 512,
|
| 36 |
+
"ff_activation": "relu",
|
| 37 |
+
"dropout": 0.1,
|
| 38 |
+
"num_heads": 6,
|
| 39 |
+
"num_layers": 6,
|
| 40 |
+
"layer_norm_eps": 1e-06,
|
| 41 |
+
"merged_embedder": {
|
| 42 |
+
"tie_embeddings": true,
|
| 43 |
+
"pad_vocab_size_multiple": 8,
|
| 44 |
+
"d_ap": 0,
|
| 45 |
+
"ap_embed": "randn",
|
| 46 |
+
"base_normalization": "disabled",
|
| 47 |
+
"ap_normalization": "disabled",
|
| 48 |
+
"final_normalization": "l2",
|
| 49 |
+
"feature_normalization": "l2",
|
| 50 |
+
"embed_scaling": "sqrtd",
|
| 51 |
+
"shuffle_aps": null
|
| 52 |
+
},
|
| 53 |
+
"max_encode_length": 1024,
|
| 54 |
+
"max_decode_length": 1024,
|
| 55 |
+
"tree_pos_enc": true,
|
| 56 |
+
"datatype": "float32",
|
| 57 |
+
"enc_pe": "sinusoid",
|
| 58 |
+
"dec_pe": "rope",
|
| 59 |
+
"no_pe_cross_keys": false
|
| 60 |
+
}
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/eval2da.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ae22f58ed2164dbda8a13f51c013c5c39e50583c3415e795d5c017144064187
|
| 3 |
+
size 2682581
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/eval2da.png
ADDED
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/eval2da1.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4772a6943f13f92b797149e7e6f206c3ee4c411cf7681fd32fc7be3f669330ae
|
| 3 |
+
size 3218204
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/eval2da1.png
ADDED
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91f72633f1aa61db03ec9e1aeeee6e2179f700dda5f7a8cdfc1780dc7af73a49
|
| 3 |
+
size 11717438
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/size_hist.png
ADDED
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/size_hist.svg
ADDED
|
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/summary.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exact match": 29523,
|
| 3 |
+
"semantically correct": 58346,
|
| 4 |
+
"incorrect": 12131,
|
| 5 |
+
"correct": 87869
|
| 6 |
+
}
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/summary.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Command Line Arguments:
|
| 2 |
+
--model-path=models-prop/10ap/0000-rop-bn0-fn1-ada1-s43 --seed=42 eval-ted --ds-name=ltl-35-10ap --beam-size=3
|
| 3 |
+
|
| 4 |
+
EVALUATION SUMMARY
|
| 5 |
+
Correct: 87869/100000, 87.869000%
|
| 6 |
+
Exact match: 29523/100000, 29.523000%
|
| 7 |
+
Semantically correct: 58346/100000, 58.346000%
|
| 8 |
+
Incorrect: 12131/100000, 12.131000%
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-10ap-val-b3/trace_times.png
ADDED
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/size_hist.png
ADDED
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/size_hist.svg
ADDED
|
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/summary.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exact match": 41493,
|
| 3 |
+
"semantically correct": 49374,
|
| 4 |
+
"incorrect": 9132,
|
| 5 |
+
"invalid": 1,
|
| 6 |
+
"correct": 90867
|
| 7 |
+
}
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/summary.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Command Line Arguments:
|
| 2 |
+
--model-path=models-prop/10ap/0000-rop-bn0-fn1-ada1-s43 eval-ted --beam-size=3
|
| 3 |
+
|
| 4 |
+
EVALUATION SUMMARY
|
| 5 |
+
Correct: 90867/100000, 90.867000%
|
| 6 |
+
Exact match: 41493/100000, 41.493000%
|
| 7 |
+
Semantically correct: 49374/100000, 49.374000%
|
| 8 |
+
Incorrect: 9132/100000, 9.132000%
|
| 9 |
+
Invalid: 1/100000, 0.001000%
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/results/ltl-35-val-b3/trace_times.png
ADDED
|
models-prop/10ap/0000-rop-bn0-fn1-ada1-s43/runs/Jan30_11-14-48_as01r5b15/events.out.tfevents.1738232090.as01r5b15.163555.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd098aa2760321cfd10e6d4a2d47755cc572b65b170aa2bca67dd13dff15b3fe
|
| 3 |
+
size 44797
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/command-log.txt
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Number of parameters: 2_903_676
|
| 2 |
+
Arguments:
|
| 3 |
+
{
|
| 4 |
+
"model_path": "models-prop/5ap/0000-rop-bn0-fn1-ada1-s42",
|
| 5 |
+
"device": "cuda",
|
| 6 |
+
"seed": 42,
|
| 7 |
+
"subparser": "train-ted",
|
| 8 |
+
"data_dir": "data-prop",
|
| 9 |
+
"ds_name": "ltl-35",
|
| 10 |
+
"max_trace_length": -1,
|
| 11 |
+
"min_aps": null,
|
| 12 |
+
"max_aps": null,
|
| 13 |
+
"exact_aps": null,
|
| 14 |
+
"vocab_aps": null,
|
| 15 |
+
"val_split": "val",
|
| 16 |
+
"merged_vocab": true,
|
| 17 |
+
"merge_tokens": "all",
|
| 18 |
+
"dynamic_aps": false,
|
| 19 |
+
"learning_rate": 0.001,
|
| 20 |
+
"lr_scheduler_type": "cosine",
|
| 21 |
+
"warmup_steps": 1000,
|
| 22 |
+
"weight_decay": 0.1,
|
| 23 |
+
"adam_beta1": 0.9,
|
| 24 |
+
"adam_beta2": 0.95,
|
| 25 |
+
"max_grad_norm": 1.0,
|
| 26 |
+
"epochs": 64,
|
| 27 |
+
"batch_size": 1024,
|
| 28 |
+
"grad_acc_steps": 1,
|
| 29 |
+
"logging_steps": 500,
|
| 30 |
+
"eval_steps": 3000,
|
| 31 |
+
"train_max_samples": null,
|
| 32 |
+
"val_max_samples": 10000,
|
| 33 |
+
"trace_max_samples": 100,
|
| 34 |
+
"dry": false,
|
| 35 |
+
"eval": false,
|
| 36 |
+
"resume": false,
|
| 37 |
+
"loss_fct": "adacos",
|
| 38 |
+
"num_heads": 6,
|
| 39 |
+
"d_embed_enc": 132,
|
| 40 |
+
"d_embed_dec": null,
|
| 41 |
+
"d_ff": 512,
|
| 42 |
+
"ff_activation": "relu",
|
| 43 |
+
"num_layers": 6,
|
| 44 |
+
"dropout": 0.1,
|
| 45 |
+
"layer_norm_eps": 1e-06,
|
| 46 |
+
"enc_pe": "sinusoid",
|
| 47 |
+
"dec_pe": "rope",
|
| 48 |
+
"no_pe_cross_keys": false,
|
| 49 |
+
"tree_pos_enc": true,
|
| 50 |
+
"d_ap": 0,
|
| 51 |
+
"ap_embed": "randn",
|
| 52 |
+
"embed_base_normalization": "disabled",
|
| 53 |
+
"embed_ap_normalization": "disabled",
|
| 54 |
+
"embed_final_normalization": "l2",
|
| 55 |
+
"feature_normalization": "l2",
|
| 56 |
+
"embed_scaling": "sqrtd",
|
| 57 |
+
"shuffle_aps": null,
|
| 58 |
+
"action": "train",
|
| 59 |
+
"model_type": "ted",
|
| 60 |
+
"decoder_only": false
|
| 61 |
+
}
|
| 62 |
+
Using CUDA device: NVIDIA H100
|
| 63 |
+
Python version: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
|
| 64 |
+
numpy version: 1.26.2
|
| 65 |
+
torch version: 2.1.1+cu121
|
| 66 |
+
transformers version: 4.39.3
|
| 67 |
+
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"vocab": {
|
| 3 |
+
"aps": [
|
| 4 |
+
"a",
|
| 5 |
+
"b",
|
| 6 |
+
"c",
|
| 7 |
+
"d",
|
| 8 |
+
"e"
|
| 9 |
+
],
|
| 10 |
+
"consts": [
|
| 11 |
+
"0",
|
| 12 |
+
"1"
|
| 13 |
+
],
|
| 14 |
+
"trace_ops": [],
|
| 15 |
+
"ltl_ops": [
|
| 16 |
+
"!",
|
| 17 |
+
"&",
|
| 18 |
+
"|",
|
| 19 |
+
"=",
|
| 20 |
+
"^"
|
| 21 |
+
],
|
| 22 |
+
"merge_tokens": "all",
|
| 23 |
+
"dynamic_aps": false,
|
| 24 |
+
"use_start_token": true,
|
| 25 |
+
"use_pad_token": true,
|
| 26 |
+
"use_eos_token": true
|
| 27 |
+
},
|
| 28 |
+
"d_embed_enc": 132,
|
| 29 |
+
"d_embed_dec": 132,
|
| 30 |
+
"d_ff": 512,
|
| 31 |
+
"ff_activation": "relu",
|
| 32 |
+
"dropout": 0.1,
|
| 33 |
+
"num_heads": 6,
|
| 34 |
+
"num_layers": 6,
|
| 35 |
+
"layer_norm_eps": 1e-06,
|
| 36 |
+
"merged_embedder": {
|
| 37 |
+
"tie_embeddings": true,
|
| 38 |
+
"pad_vocab_size_multiple": 8,
|
| 39 |
+
"d_ap": 0,
|
| 40 |
+
"ap_embed": "randn",
|
| 41 |
+
"base_normalization": "disabled",
|
| 42 |
+
"ap_normalization": "disabled",
|
| 43 |
+
"final_normalization": "l2",
|
| 44 |
+
"feature_normalization": "l2",
|
| 45 |
+
"embed_scaling": "sqrtd",
|
| 46 |
+
"shuffle_aps": null
|
| 47 |
+
},
|
| 48 |
+
"max_encode_length": 1024,
|
| 49 |
+
"max_decode_length": 1024,
|
| 50 |
+
"tree_pos_enc": true,
|
| 51 |
+
"datatype": "float32",
|
| 52 |
+
"enc_pe": "sinusoid",
|
| 53 |
+
"dec_pe": "rope",
|
| 54 |
+
"no_pe_cross_keys": false
|
| 55 |
+
}
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/eval2da.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bd6c2548b27305d6dbe17bac2e7c913388bb22e33c371b5e7318395a5c8854e
|
| 3 |
+
size 1041085
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/eval2da.png
ADDED
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/eval2da1.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6a0b2e7b6aa98a075b70b8031f36fa48ffe6b58777437a44f405e192ab608ee
|
| 3 |
+
size 1110745
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/eval2da1.png
ADDED
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f9de0a6bbd108c71c720a4706b4dd1b2877fc2a70183cb28da63cd873d80fa5
|
| 3 |
+
size 11714750
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/size_hist.png
ADDED
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/size_hist.svg
ADDED
|
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/summary.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"semantically correct": 37680,
|
| 3 |
+
"exact match": 57941,
|
| 4 |
+
"incorrect": 4379,
|
| 5 |
+
"correct": 95621
|
| 6 |
+
}
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/summary.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Command Line Arguments:
|
| 2 |
+
--model-path=models-prop/5ap/0000-rop-bn0-fn1-ada1-s42 eval-ted --split=test --beam-size=3
|
| 3 |
+
|
| 4 |
+
EVALUATION SUMMARY
|
| 5 |
+
Correct: 95621/100000, 95.621000%
|
| 6 |
+
Exact match: 57941/100000, 57.941000%
|
| 7 |
+
Semantically correct: 37680/100000, 37.680000%
|
| 8 |
+
Incorrect: 4379/100000, 4.379000%
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-test-b3/trace_times.png
ADDED
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/size_hist.png
ADDED
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/size_hist.svg
ADDED
|
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/summary.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exact match": 57872,
|
| 3 |
+
"semantically correct": 37801,
|
| 4 |
+
"incorrect": 4327,
|
| 5 |
+
"correct": 95673
|
| 6 |
+
}
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/summary.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Command Line Arguments:
|
| 2 |
+
--model-path=models-prop/5ap/0000-rop-bn0-fn1-ada1-s42 eval-ted --beam-size=3
|
| 3 |
+
|
| 4 |
+
EVALUATION SUMMARY
|
| 5 |
+
Correct: 95673/100000, 95.673000%
|
| 6 |
+
Exact match: 57872/100000, 57.872000%
|
| 7 |
+
Semantically correct: 37801/100000, 37.801000%
|
| 8 |
+
Incorrect: 4327/100000, 4.327000%
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/results/ltl-35-val-b3/trace_times.png
ADDED
|
models-prop/5ap/0000-rop-bn0-fn1-ada1-s42/runs/Jan28_01-12-19_as05r4b20/events.out.tfevents.1738023140.as05r4b20.327756.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8088afad1ed66b245a7af4280334a473b80b364e97bc3bc8f480b76b2fb9b4ab
|
| 3 |
+
size 44769
|
models-prop/5ap/base-rop-bn0-fn1-ada1-s44/command-log.txt
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Number of parameters: 2_903_676
|
| 2 |
+
Arguments:
|
| 3 |
+
{
|
| 4 |
+
"model_path": "models-prop/5ap/base-rop-bn0-fn1-ada1-s44",
|
| 5 |
+
"device": "cuda",
|
| 6 |
+
"seed": 44,
|
| 7 |
+
"subparser": "train-ted",
|
| 8 |
+
"data_dir": "data-prop",
|
| 9 |
+
"ds_name": "ltl-35-perturbed",
|
| 10 |
+
"max_trace_length": -1,
|
| 11 |
+
"min_aps": null,
|
| 12 |
+
"max_aps": null,
|
| 13 |
+
"exact_aps": null,
|
| 14 |
+
"vocab_aps": null,
|
| 15 |
+
"val_split": "val",
|
| 16 |
+
"merged_vocab": true,
|
| 17 |
+
"merge_tokens": "all",
|
| 18 |
+
"dynamic_aps": false,
|
| 19 |
+
"learning_rate": 0.001,
|
| 20 |
+
"lr_scheduler_type": "cosine",
|
| 21 |
+
"warmup_steps": 1000,
|
| 22 |
+
"weight_decay": 0.1,
|
| 23 |
+
"adam_beta1": 0.9,
|
| 24 |
+
"adam_beta2": 0.95,
|
| 25 |
+
"max_grad_norm": 1.0,
|
| 26 |
+
"epochs": 64,
|
| 27 |
+
"batch_size": 1024,
|
| 28 |
+
"grad_acc_steps": 1,
|
| 29 |
+
"logging_steps": 500,
|
| 30 |
+
"eval_steps": 3000,
|
| 31 |
+
"train_max_samples": null,
|
| 32 |
+
"val_max_samples": 10000,
|
| 33 |
+
"trace_max_samples": 100,
|
| 34 |
+
"dry": false,
|
| 35 |
+
"eval": false,
|
| 36 |
+
"resume": false,
|
| 37 |
+
"loss_fct": "adacos",
|
| 38 |
+
"num_heads": 6,
|
| 39 |
+
"d_embed_enc": 132,
|
| 40 |
+
"d_embed_dec": null,
|
| 41 |
+
"d_ff": 512,
|
| 42 |
+
"ff_activation": "relu",
|
| 43 |
+
"num_layers": 6,
|
| 44 |
+
"dropout": 0.1,
|
| 45 |
+
"layer_norm_eps": 1e-06,
|
| 46 |
+
"enc_pe": "sinusoid",
|
| 47 |
+
"dec_pe": "rope",
|
| 48 |
+
"no_pe_cross_keys": false,
|
| 49 |
+
"tree_pos_enc": true,
|
| 50 |
+
"d_ap": 0,
|
| 51 |
+
"ap_embed": "randn",
|
| 52 |
+
"embed_base_normalization": "disabled",
|
| 53 |
+
"embed_ap_normalization": "disabled",
|
| 54 |
+
"embed_final_normalization": "l2",
|
| 55 |
+
"feature_normalization": "l2",
|
| 56 |
+
"embed_scaling": "sqrtd",
|
| 57 |
+
"shuffle_aps": null,
|
| 58 |
+
"action": "train",
|
| 59 |
+
"model_type": "ted",
|
| 60 |
+
"decoder_only": false
|
| 61 |
+
}
|
| 62 |
+
Using CUDA device: NVIDIA H100
|
| 63 |
+
Python version: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
|
| 64 |
+
numpy version: 1.26.2
|
| 65 |
+
torch version: 2.1.1+cu121
|
| 66 |
+
transformers version: 4.39.3
|
| 67 |
+
|
models-prop/5ap/base-rop-bn0-fn1-ada1-s44/config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"vocab": {
|
| 3 |
+
"aps": [
|
| 4 |
+
"a",
|
| 5 |
+
"b",
|
| 6 |
+
"c",
|
| 7 |
+
"d",
|
| 8 |
+
"e"
|
| 9 |
+
],
|
| 10 |
+
"consts": [
|
| 11 |
+
"0",
|
| 12 |
+
"1"
|
| 13 |
+
],
|
| 14 |
+
"trace_ops": [],
|
| 15 |
+
"ltl_ops": [
|
| 16 |
+
"!",
|
| 17 |
+
"&",
|
| 18 |
+
"|",
|
| 19 |
+
"=",
|
| 20 |
+
"^"
|
| 21 |
+
],
|
| 22 |
+
"merge_tokens": "all",
|
| 23 |
+
"dynamic_aps": false,
|
| 24 |
+
"use_start_token": true,
|
| 25 |
+
"use_pad_token": true,
|
| 26 |
+
"use_eos_token": true
|
| 27 |
+
},
|
| 28 |
+
"d_embed_enc": 132,
|
| 29 |
+
"d_embed_dec": 132,
|
| 30 |
+
"d_ff": 512,
|
| 31 |
+
"ff_activation": "relu",
|
| 32 |
+
"dropout": 0.1,
|
| 33 |
+
"num_heads": 6,
|
| 34 |
+
"num_layers": 6,
|
| 35 |
+
"layer_norm_eps": 1e-06,
|
| 36 |
+
"merged_embedder": {
|
| 37 |
+
"tie_embeddings": true,
|
| 38 |
+
"pad_vocab_size_multiple": 8,
|
| 39 |
+
"d_ap": 0,
|
| 40 |
+
"ap_embed": "randn",
|
| 41 |
+
"base_normalization": "disabled",
|
| 42 |
+
"ap_normalization": "disabled",
|
| 43 |
+
"final_normalization": "l2",
|
| 44 |
+
"feature_normalization": "l2",
|
| 45 |
+
"embed_scaling": "sqrtd",
|
| 46 |
+
"shuffle_aps": null
|
| 47 |
+
},
|
| 48 |
+
"max_encode_length": 1024,
|
| 49 |
+
"max_decode_length": 1024,
|
| 50 |
+
"tree_pos_enc": true,
|
| 51 |
+
"datatype": "float32",
|
| 52 |
+
"enc_pe": "sinusoid",
|
| 53 |
+
"dec_pe": "rope",
|
| 54 |
+
"no_pe_cross_keys": false
|
| 55 |
+
}
|
models-prop/5ap/base-rop-bn0-fn1-ada1-s44/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ede6d600c5e51399f68ddde2ba86c617ba15f9873b4509e54dd399fbb98e3214
|
| 3 |
+
size 11714750
|
models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/size_hist.png
ADDED
|
models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/size_hist.svg
ADDED
|
|
models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/summary.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"incorrect": 58216,
|
| 3 |
+
"semantically correct": 32531,
|
| 4 |
+
"exact match": 9035,
|
| 5 |
+
"invalid": 218,
|
| 6 |
+
"correct": 41566
|
| 7 |
+
}
|
models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/summary.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Command Line Arguments:
|
| 2 |
+
--model-path=models-prop/5ap/base-rop-bn0-fn1-ada1-s44 eval-ted --beam-size=3
|
| 3 |
+
|
| 4 |
+
EVALUATION SUMMARY
|
| 5 |
+
Correct: 41566/100000, 41.566000%
|
| 6 |
+
Exact match: 9035/100000, 9.035000%
|
| 7 |
+
Semantically correct: 32531/100000, 32.531000%
|
| 8 |
+
Incorrect: 58216/100000, 58.216000%
|
| 9 |
+
Invalid: 218/100000, 0.218000%
|
models-prop/5ap/base-rop-bn0-fn1-ada1-s44/results/ltl-35-val-b3/trace_times.png
ADDED
|
models-prop/5ap/base-rop-bn0-fn1-ada1-s44/runs/Jan28_01-12-17_as03r3b29/events.out.tfevents.1738023138.as03r3b29.2568593.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c929f62de6a1da0bfbca8f120193478a2bd087579deb16faae4eeb4231729ee
|
| 3 |
+
size 44769
|
models-prop/5ap/d020-rop-bn0-fn1-ada1-s42/command-log.txt
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Number of parameters: 2_902_928
|
| 2 |
+
Arguments:
|
| 3 |
+
{
|
| 4 |
+
"model_path": "models-prop/5ap/d020-rop-bn0-fn1-ada1-s42",
|
| 5 |
+
"device": "cuda",
|
| 6 |
+
"seed": 42,
|
| 7 |
+
"subparser": "train-ted",
|
| 8 |
+
"data_dir": "data-prop",
|
| 9 |
+
"ds_name": "ltl-35-perturbed",
|
| 10 |
+
"max_trace_length": -1,
|
| 11 |
+
"min_aps": null,
|
| 12 |
+
"max_aps": null,
|
| 13 |
+
"exact_aps": null,
|
| 14 |
+
"vocab_aps": null,
|
| 15 |
+
"val_split": "val",
|
| 16 |
+
"merged_vocab": true,
|
| 17 |
+
"merge_tokens": "all",
|
| 18 |
+
"dynamic_aps": true,
|
| 19 |
+
"learning_rate": 0.001,
|
| 20 |
+
"lr_scheduler_type": "cosine",
|
| 21 |
+
"warmup_steps": 1000,
|
| 22 |
+
"weight_decay": 0.1,
|
| 23 |
+
"adam_beta1": 0.9,
|
| 24 |
+
"adam_beta2": 0.95,
|
| 25 |
+
"max_grad_norm": 1.0,
|
| 26 |
+
"epochs": 64,
|
| 27 |
+
"batch_size": 1024,
|
| 28 |
+
"grad_acc_steps": 1,
|
| 29 |
+
"logging_steps": 500,
|
| 30 |
+
"eval_steps": 3000,
|
| 31 |
+
"train_max_samples": null,
|
| 32 |
+
"val_max_samples": 10000,
|
| 33 |
+
"trace_max_samples": 100,
|
| 34 |
+
"dry": false,
|
| 35 |
+
"eval": false,
|
| 36 |
+
"resume": false,
|
| 37 |
+
"loss_fct": "adacos",
|
| 38 |
+
"num_heads": 6,
|
| 39 |
+
"d_embed_enc": 132,
|
| 40 |
+
"d_embed_dec": null,
|
| 41 |
+
"d_ff": 512,
|
| 42 |
+
"ff_activation": "relu",
|
| 43 |
+
"num_layers": 6,
|
| 44 |
+
"dropout": 0.1,
|
| 45 |
+
"layer_norm_eps": 1e-06,
|
| 46 |
+
"enc_pe": "sinusoid",
|
| 47 |
+
"dec_pe": "rope",
|
| 48 |
+
"no_pe_cross_keys": false,
|
| 49 |
+
"tree_pos_enc": true,
|
| 50 |
+
"d_ap": 20,
|
| 51 |
+
"ap_embed": "diagbor",
|
| 52 |
+
"embed_base_normalization": "disabled",
|
| 53 |
+
"embed_ap_normalization": "disabled",
|
| 54 |
+
"embed_final_normalization": "l2",
|
| 55 |
+
"feature_normalization": "l2",
|
| 56 |
+
"embed_scaling": "sqrtd",
|
| 57 |
+
"shuffle_aps": null,
|
| 58 |
+
"action": "train",
|
| 59 |
+
"model_type": "ted",
|
| 60 |
+
"decoder_only": false
|
| 61 |
+
}
|
| 62 |
+
Using CUDA device: NVIDIA H100
|
| 63 |
+
Python version: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
|
| 64 |
+
numpy version: 1.26.2
|
| 65 |
+
torch version: 2.1.1+cu121
|
| 66 |
+
transformers version: 4.39.3
|
| 67 |
+
|
models-prop/5ap/d020-rop-bn0-fn1-ada1-s42/config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"vocab": {
|
| 3 |
+
"aps": [
|
| 4 |
+
"a",
|
| 5 |
+
"b",
|
| 6 |
+
"c",
|
| 7 |
+
"d",
|
| 8 |
+
"e"
|
| 9 |
+
],
|
| 10 |
+
"consts": [
|
| 11 |
+
"0",
|
| 12 |
+
"1"
|
| 13 |
+
],
|
| 14 |
+
"trace_ops": [],
|
| 15 |
+
"ltl_ops": [
|
| 16 |
+
"!",
|
| 17 |
+
"&",
|
| 18 |
+
"|",
|
| 19 |
+
"=",
|
| 20 |
+
"^"
|
| 21 |
+
],
|
| 22 |
+
"merge_tokens": "all",
|
| 23 |
+
"dynamic_aps": true,
|
| 24 |
+
"use_start_token": true,
|
| 25 |
+
"use_pad_token": true,
|
| 26 |
+
"use_eos_token": true
|
| 27 |
+
},
|
| 28 |
+
"d_embed_enc": 132,
|
| 29 |
+
"d_embed_dec": 132,
|
| 30 |
+
"d_ff": 512,
|
| 31 |
+
"ff_activation": "relu",
|
| 32 |
+
"dropout": 0.1,
|
| 33 |
+
"num_heads": 6,
|
| 34 |
+
"num_layers": 6,
|
| 35 |
+
"layer_norm_eps": 1e-06,
|
| 36 |
+
"merged_embedder": {
|
| 37 |
+
"tie_embeddings": true,
|
| 38 |
+
"pad_vocab_size_multiple": 8,
|
| 39 |
+
"d_ap": 20,
|
| 40 |
+
"ap_embed": "diagbor",
|
| 41 |
+
"base_normalization": "disabled",
|
| 42 |
+
"ap_normalization": "disabled",
|
| 43 |
+
"final_normalization": "l2",
|
| 44 |
+
"feature_normalization": "l2",
|
| 45 |
+
"embed_scaling": "sqrtd",
|
| 46 |
+
"shuffle_aps": null
|
| 47 |
+
},
|
| 48 |
+
"max_encode_length": 1024,
|
| 49 |
+
"max_decode_length": 1024,
|
| 50 |
+
"tree_pos_enc": true,
|
| 51 |
+
"datatype": "float32",
|
| 52 |
+
"enc_pe": "sinusoid",
|
| 53 |
+
"dec_pe": "rope",
|
| 54 |
+
"no_pe_cross_keys": false
|
| 55 |
+
}
|
models-prop/5ap/d020-rop-bn0-fn1-ada1-s42/eval2da.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83c8fc3ec07546be0401ab55376327937250b593ce774b6eadb8471c4b6b0e78
|
| 3 |
+
size 2545486
|
models-prop/5ap/d020-rop-bn0-fn1-ada1-s42/eval2da.png
ADDED
|