BabyLM-community/ace-baseline-small
Browse files- README.md +15 -1
- merges.txt +1 -1
- model.safetensors +1 -1
- tokenizer.json +56 -56
- training_args.bin +1 -1
- vocab.json +0 -0
README.md
CHANGED
|
@@ -13,6 +13,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 13 |
# ace-baseline-small
|
| 14 |
|
| 15 |
This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
|
|
|
|
|
|
|
| 16 |
|
| 17 |
## Model description
|
| 18 |
|
|
@@ -32,7 +34,7 @@ More information needed
|
|
| 32 |
|
| 33 |
The following hyperparameters were used during training:
|
| 34 |
- learning_rate: 0.0001
|
| 35 |
-
- train_batch_size:
|
| 36 |
- eval_batch_size: 8
|
| 37 |
- seed: 42
|
| 38 |
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
|
@@ -41,6 +43,18 @@ The following hyperparameters were used during training:
|
|
| 41 |
|
| 42 |
### Training results
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
### Framework versions
|
|
|
|
| 13 |
# ace-baseline-small
|
| 14 |
|
| 15 |
This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
|
| 16 |
+
It achieves the following results on the evaluation set:
|
| 17 |
+
- Loss: 3.3775
|
| 18 |
|
| 19 |
## Model description
|
| 20 |
|
|
|
|
| 34 |
|
| 35 |
The following hyperparameters were used during training:
|
| 36 |
- learning_rate: 0.0001
|
| 37 |
+
- train_batch_size: 8
|
| 38 |
- eval_batch_size: 8
|
| 39 |
- seed: 42
|
| 40 |
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
|
|
|
| 43 |
|
| 44 |
### Training results
|
| 45 |
|
| 46 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
| 47 |
+
|:-------------:|:-----:|:-----:|:---------------:|
|
| 48 |
+
| 4.827 | 1.0 | 1282 | 4.1363 |
|
| 49 |
+
| 4.0453 | 2.0 | 2564 | 3.8261 |
|
| 50 |
+
| 3.7125 | 3.0 | 3846 | 3.6729 |
|
| 51 |
+
| 3.5361 | 4.0 | 5128 | 3.5691 |
|
| 52 |
+
| 3.3578 | 5.0 | 6410 | 3.4945 |
|
| 53 |
+
| 3.2356 | 6.0 | 7692 | 3.4469 |
|
| 54 |
+
| 3.0982 | 7.0 | 8974 | 3.4051 |
|
| 55 |
+
| 3.0001 | 8.0 | 10256 | 3.3922 |
|
| 56 |
+
| 2.9559 | 9.0 | 11538 | 3.3814 |
|
| 57 |
+
| 2.8719 | 10.0 | 12820 | 3.3775 |
|
| 58 |
|
| 59 |
|
| 60 |
### Framework versions
|
merges.txt
CHANGED
|
@@ -3446,7 +3446,6 @@ f an
|
|
| 3446 |
h ôn
|
| 3447 |
t er
|
| 3448 |
v e
|
| 3449 |
-
Ġ Ċ
|
| 3450 |
ng at
|
| 3451 |
Ġs ujud
|
| 3452 |
Ġb id
|
|
@@ -3498,6 +3497,7 @@ m i
|
|
| 3498 |
o si
|
| 3499 |
p a
|
| 3500 |
s h
|
|
|
|
| 3501 |
Ġd udoë
|
| 3502 |
Ġs el
|
| 3503 |
Ġb euj
|
|
|
|
| 3446 |
h ôn
|
| 3447 |
t er
|
| 3448 |
v e
|
|
|
|
| 3449 |
ng at
|
| 3450 |
Ġs ujud
|
| 3451 |
Ġb id
|
|
|
|
| 3497 |
o si
|
| 3498 |
p a
|
| 3499 |
s h
|
| 3500 |
+
Ġ Ċ
|
| 3501 |
Ġd udoë
|
| 3502 |
Ġs el
|
| 3503 |
Ġb euj
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 68273200
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6531f656690f65d08ac00198aba5670b0b27eb80cd1b5b231e117c0186dd42cb
|
| 3 |
size 68273200
|
tokenizer.json
CHANGED
|
@@ -3800,58 +3800,58 @@
|
|
| 3800 |
"hôn": 3705,
|
| 3801 |
"ter": 3706,
|
| 3802 |
"ve": 3707,
|
| 3803 |
-
"
|
| 3804 |
-
"
|
| 3805 |
-
"
|
| 3806 |
-
"
|
| 3807 |
-
"
|
| 3808 |
-
"
|
| 3809 |
-
"
|
| 3810 |
-
"
|
| 3811 |
-
"
|
| 3812 |
-
"
|
| 3813 |
-
"
|
| 3814 |
-
"
|
| 3815 |
-
"
|
| 3816 |
-
"
|
| 3817 |
-
"
|
| 3818 |
-
"
|
| 3819 |
-
"
|
| 3820 |
-
"
|
| 3821 |
-
"
|
| 3822 |
-
"
|
| 3823 |
-
"
|
| 3824 |
-
"
|
| 3825 |
-
"
|
| 3826 |
-
"
|
| 3827 |
-
"
|
| 3828 |
-
"
|
| 3829 |
-
"
|
| 3830 |
-
"
|
| 3831 |
-
"
|
| 3832 |
-
"
|
| 3833 |
-
"
|
| 3834 |
-
"
|
| 3835 |
-
"
|
| 3836 |
-
"
|
| 3837 |
-
"
|
| 3838 |
-
"
|
| 3839 |
-
"
|
| 3840 |
-
"
|
| 3841 |
-
"
|
| 3842 |
-
"
|
| 3843 |
-
"
|
| 3844 |
-
"
|
| 3845 |
-
"
|
| 3846 |
-
"
|
| 3847 |
-
"
|
| 3848 |
-
"
|
| 3849 |
-
"
|
| 3850 |
-
"
|
| 3851 |
-
"
|
| 3852 |
-
"
|
| 3853 |
-
"
|
| 3854 |
-
"
|
| 3855 |
"Ġdudoë": 3760,
|
| 3856 |
"Ġsel": 3761,
|
| 3857 |
"Ġbeuj": 3762,
|
|
@@ -22074,10 +22074,6 @@
|
|
| 22074 |
"v",
|
| 22075 |
"e"
|
| 22076 |
],
|
| 22077 |
-
[
|
| 22078 |
-
"Ġ",
|
| 22079 |
-
"Ċ"
|
| 22080 |
-
],
|
| 22081 |
[
|
| 22082 |
"ng",
|
| 22083 |
"at"
|
|
@@ -22282,6 +22278,10 @@
|
|
| 22282 |
"s",
|
| 22283 |
"h"
|
| 22284 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22285 |
[
|
| 22286 |
"Ġd",
|
| 22287 |
"udoë"
|
|
|
|
| 3800 |
"hôn": 3705,
|
| 3801 |
"ter": 3706,
|
| 3802 |
"ve": 3707,
|
| 3803 |
+
"ngat": 3708,
|
| 3804 |
+
"Ġsujud": 3709,
|
| 3805 |
+
"Ġbid": 3710,
|
| 3806 |
+
"Ġbeukah": 3711,
|
| 3807 |
+
"Ġpol": 3712,
|
| 3808 |
+
"Ġpujoe": 3713,
|
| 3809 |
+
"Ġturi": 3714,
|
| 3810 |
+
"abu": 3715,
|
| 3811 |
+
"Ġkama": 3716,
|
| 3812 |
+
"Ġkalim": 3717,
|
| 3813 |
+
"ĠBag": 3718,
|
| 3814 |
+
"Ġjing": 3719,
|
| 3815 |
+
"ĠKak": 3720,
|
| 3816 |
+
"ĠSungai": 3721,
|
| 3817 |
+
"Ġreumbang": 3722,
|
| 3818 |
+
"ĠLila": 3723,
|
| 3819 |
+
"agak": 3724,
|
| 3820 |
+
"ikri": 3725,
|
| 3821 |
+
"eusoh": 3726,
|
| 3822 |
+
"ĠUj": 3727,
|
| 3823 |
+
"inge": 3728,
|
| 3824 |
+
"Ġdumpeue": 3729,
|
| 3825 |
+
"tic": 3730,
|
| 3826 |
+
"ĠIndah": 3731,
|
| 3827 |
+
"Ġjipoh": 3732,
|
| 3828 |
+
"Ġsigeutu": 3733,
|
| 3829 |
+
"Ġkuala": 3734,
|
| 3830 |
+
"2031": 3735,
|
| 3831 |
+
"2032": 3736,
|
| 3832 |
+
"istem": 3737,
|
| 3833 |
+
"Ġkeumawé": 3738,
|
| 3834 |
+
"Ġdairah": 3739,
|
| 3835 |
+
"ĠNeutanyong": 3740,
|
| 3836 |
+
"Ġbeunagi": 3741,
|
| 3837 |
+
"Ġpeungeuh": 3742,
|
| 3838 |
+
"ĠTuruki": 3743,
|
| 3839 |
+
"Lhôk": 3744,
|
| 3840 |
+
"Ġkafe": 3745,
|
| 3841 |
+
"ĠKeupeue": 3746,
|
| 3842 |
+
"Ġriwayat": 3747,
|
| 3843 |
+
"Ġrancak": 3748,
|
| 3844 |
+
"ĠGandapura": 3749,
|
| 3845 |
+
"76": 3750,
|
| 3846 |
+
"98": 3751,
|
| 3847 |
+
"Keud": 3752,
|
| 3848 |
+
"Lamp": 3753,
|
| 3849 |
+
"iang": 3754,
|
| 3850 |
+
"mi": 3755,
|
| 3851 |
+
"osi": 3756,
|
| 3852 |
+
"pa": 3757,
|
| 3853 |
+
"sh": 3758,
|
| 3854 |
+
"ĠĊ": 3759,
|
| 3855 |
"Ġdudoë": 3760,
|
| 3856 |
"Ġsel": 3761,
|
| 3857 |
"Ġbeuj": 3762,
|
|
|
|
| 22074 |
"v",
|
| 22075 |
"e"
|
| 22076 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22077 |
[
|
| 22078 |
"ng",
|
| 22079 |
"at"
|
|
|
|
| 22278 |
"s",
|
| 22279 |
"h"
|
| 22280 |
],
|
| 22281 |
+
[
|
| 22282 |
+
"Ġ",
|
| 22283 |
+
"Ċ"
|
| 22284 |
+
],
|
| 22285 |
[
|
| 22286 |
"Ġd",
|
| 22287 |
"udoë"
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5777
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:278a0d0a5a746282b60d311fd8ced2785f557a06825fd810cb303b4101b860f0
|
| 3 |
size 5777
|
vocab.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|