AlonKellner-Jounce commited on
Commit
42e1b9e
·
verified ·
1 Parent(s): 835dfcd

phi tokenizer

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce086d93d685cbfd962b5517f44d739122c1478d294ec51e8853ad0be1ef80d4
3
  size 8328624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa21b14a9c4cbcc0bd141d83cfe99a35de4544d5001db5a1239ebe65ef78b07
3
  size 8328624
special_tokens_map.json CHANGED
@@ -2,21 +2,28 @@
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
5
- "normalized": true,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "</s>",
11
  "lstrip": false,
12
- "normalized": true,
 
 
 
 
 
 
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "unk_token": {
17
  "content": "<unk>",
18
  "lstrip": false,
19
- "normalized": true,
20
  "rstrip": false,
21
  "single_word": false
22
  }
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
5
+ "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|endoftext|>",
11
  "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
26
+ "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  }
tokenizer.json CHANGED
@@ -9,7 +9,7 @@
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": true,
13
  "special": true
14
  },
15
  {
@@ -18,7 +18,7 @@
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": true,
22
  "special": true
23
  },
24
  {
@@ -26,8 +26,107 @@
26
  "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
 
 
 
 
 
 
 
 
 
29
  "rstrip": false,
30
- "normalized": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "special": true
32
  }
33
  ],
@@ -51,12 +150,6 @@
51
  "post_processor": {
52
  "type": "TemplateProcessing",
53
  "single": [
54
- {
55
- "SpecialToken": {
56
- "id": "<s>",
57
- "type_id": 0
58
- }
59
- },
60
  {
61
  "Sequence": {
62
  "id": "A",
@@ -65,24 +158,12 @@
65
  }
66
  ],
67
  "pair": [
68
- {
69
- "SpecialToken": {
70
- "id": "<s>",
71
- "type_id": 0
72
- }
73
- },
74
  {
75
  "Sequence": {
76
  "id": "A",
77
  "type_id": 0
78
  }
79
  },
80
- {
81
- "SpecialToken": {
82
- "id": "<s>",
83
- "type_id": 1
84
- }
85
- },
86
  {
87
  "Sequence": {
88
  "id": "B",
@@ -90,17 +171,7 @@
90
  }
91
  }
92
  ],
93
- "special_tokens": {
94
- "<s>": {
95
- "id": "<s>",
96
- "ids": [
97
- 1
98
- ],
99
- "tokens": [
100
- "<s>"
101
- ]
102
- }
103
- }
104
  },
105
  "decoder": {
106
  "type": "Sequence",
@@ -276655,484 +276726,484 @@
276655
  "ub"
276656
  ],
276657
  [
276658
- "▁▁",
276659
- "▁▁"
276660
  ],
276661
  [
276662
  "▁▁",
276663
- "▁▁▁▁"
276664
  ],
276665
  [
276666
- "▁▁",
276667
- "▁▁▁▁▁▁▁▁"
276668
  ],
276669
  [
276670
- "▁▁",
276671
- "▁▁▁▁▁"
276672
  ],
276673
  [
276674
  "▁▁",
276675
  "▁▁▁▁▁▁"
276676
  ],
276677
  [
276678
- "▁▁",
276679
- "▁▁▁▁▁▁▁▁▁▁▁▁"
276680
- ],
276681
- [
276682
- "▁▁",
276683
- "▁▁▁▁▁▁▁▁▁▁▁▁▁"
276684
  ],
276685
  [
276686
- "▁▁",
276687
- "▁▁▁▁▁▁▁▁▁▁"
276688
  ],
276689
  [
276690
- "▁▁",
276691
- "▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
276692
  ],
276693
  [
276694
- "▁▁",
276695
- "▁▁▁"
276696
  ],
276697
  [
276698
- "▁▁",
276699
- "▁▁▁▁▁▁▁▁▁"
276700
  ],
276701
  [
276702
- "▁▁",
276703
  "▁▁▁▁▁▁▁"
276704
  ],
276705
  [
276706
  "▁▁",
276707
- "▁▁▁▁▁▁▁▁▁▁▁"
276708
  ],
276709
  [
276710
- "▁▁",
276711
  "▁"
276712
  ],
276713
  [
276714
- "▁▁▁▁",
276715
  "▁▁"
276716
  ],
276717
  [
276718
- "▁▁▁▁",
276719
  "▁▁▁▁"
276720
  ],
276721
  [
276722
- "▁▁▁▁",
276723
- "▁▁▁▁▁▁▁▁"
276724
- ],
276725
- [
276726
- "▁▁▁▁",
276727
- "▁▁▁▁▁"
276728
- ],
276729
- [
276730
- "▁▁▁▁",
276731
- "▁▁▁▁▁▁"
276732
  ],
276733
  [
276734
  "▁▁▁▁",
276735
  "▁▁▁▁▁▁▁▁▁▁▁▁"
276736
  ],
276737
  [
276738
- "▁▁▁▁",
276739
- "▁▁▁▁▁▁▁▁▁▁"
276740
  ],
276741
  [
276742
- "▁▁▁▁",
276743
- "▁▁▁"
276744
  ],
276745
  [
276746
- "▁▁▁▁",
276747
- "▁▁▁▁▁▁▁▁▁"
276748
  ],
276749
  [
276750
- "▁▁▁▁",
276751
- "▁▁▁▁▁▁▁"
276752
  ],
276753
  [
276754
- "▁▁▁▁",
276755
- "▁▁▁▁▁▁▁▁▁▁▁"
276756
  ],
276757
  [
276758
- "▁▁▁▁",
276759
- ""
276760
  ],
276761
  [
276762
- "▁▁▁▁▁▁▁▁",
276763
  "▁▁"
276764
  ],
276765
  [
276766
- "▁▁▁▁▁▁▁▁",
276767
- "▁▁▁▁"
276768
  ],
276769
  [
276770
- "▁▁▁▁▁▁▁▁",
276771
- "▁▁▁▁▁▁▁▁"
276772
  ],
276773
  [
276774
- "▁▁▁▁▁▁▁▁",
276775
- "▁▁▁▁▁"
276776
  ],
276777
  [
276778
- "▁▁▁▁▁▁▁▁",
276779
- "▁▁▁▁▁▁"
276780
  ],
276781
  [
276782
- "▁▁▁▁▁▁▁▁",
276783
- "▁▁▁"
276784
  ],
276785
  [
276786
- "▁▁▁▁▁▁▁▁",
276787
- "▁▁▁▁▁▁▁"
276788
  ],
276789
  [
276790
- "▁▁▁▁▁▁▁▁",
276791
- ""
276792
  ],
276793
  [
276794
- "▁▁▁▁▁",
276795
  "▁▁"
276796
  ],
276797
  [
276798
  "▁▁▁▁▁",
276799
- "▁▁▁▁"
276800
  ],
276801
  [
276802
- "▁▁▁▁▁",
276803
- "▁▁▁▁▁▁▁▁"
276804
  ],
276805
  [
276806
- "▁▁▁▁▁",
276807
  "▁▁▁▁▁"
276808
  ],
276809
  [
276810
- "▁▁▁▁▁",
276811
- "▁▁▁▁▁▁"
276812
- ],
276813
- [
276814
- "▁▁▁▁▁",
276815
  "▁▁▁▁▁▁▁▁▁▁"
276816
  ],
276817
  [
276818
- "▁▁▁▁▁",
276819
- "▁▁▁"
276820
  ],
276821
  [
276822
- "▁▁▁▁▁",
276823
- "▁▁▁▁▁▁▁▁▁"
276824
  ],
276825
  [
276826
  "▁▁▁▁▁",
276827
  "▁▁▁▁▁▁▁"
276828
  ],
276829
  [
276830
- "▁▁▁▁▁",
276831
- "▁▁▁▁▁▁▁▁▁▁▁"
276832
- ],
276833
- [
276834
- "▁▁▁▁▁",
276835
- "▁"
276836
  ],
276837
  [
276838
- "▁▁▁▁▁▁",
276839
  "▁▁"
276840
  ],
276841
  [
276842
- "▁▁▁▁▁▁",
276843
- "▁▁▁▁"
276844
  ],
276845
  [
276846
- "▁▁▁▁▁▁",
276847
- "▁▁▁▁▁▁▁▁"
276848
  ],
276849
  [
276850
- "▁▁▁▁▁▁",
276851
  "▁▁▁▁▁"
276852
  ],
276853
  [
276854
- "▁▁▁▁▁▁",
276855
- "▁▁▁▁▁▁"
276856
  ],
276857
  [
276858
- "▁▁▁▁▁▁",
276859
- "▁▁▁▁▁▁▁▁▁▁"
276860
  ],
276861
  [
276862
- "▁▁▁▁▁▁",
276863
- "▁▁▁"
276864
  ],
276865
  [
276866
- "▁▁▁▁▁▁",
276867
  "▁▁▁▁▁▁▁▁▁"
276868
  ],
 
 
 
 
 
 
 
 
276869
  [
276870
  "▁▁▁▁▁▁",
276871
  "▁▁▁▁▁▁▁"
276872
  ],
276873
  [
276874
- "▁▁▁▁▁▁",
276875
  "▁"
276876
  ],
276877
  [
276878
- "▁▁▁▁▁▁▁▁▁▁▁▁",
276879
- "▁▁"
276880
  ],
276881
  [
276882
- "▁▁▁▁▁▁▁▁▁▁▁▁",
276883
- "▁▁▁▁"
276884
  ],
276885
  [
276886
- "▁▁▁▁▁▁▁▁▁▁▁▁",
276887
- "▁▁▁"
276888
  ],
276889
  [
276890
- "▁▁▁▁▁▁▁▁▁▁▁▁",
276891
- ""
276892
  ],
276893
  [
276894
- "▁▁▁▁▁▁▁▁▁▁▁▁▁",
276895
  "▁▁"
276896
  ],
276897
  [
276898
- "▁▁▁▁▁▁▁▁▁▁▁▁▁",
276899
- "▁▁▁"
276900
  ],
276901
  [
276902
- "▁▁▁▁▁▁▁▁▁▁▁▁▁",
276903
- ""
276904
  ],
276905
  [
276906
- "▁▁▁▁▁▁▁▁▁▁",
276907
- "▁▁"
276908
  ],
276909
  [
276910
- "▁▁▁▁▁▁▁▁▁▁",
276911
- "▁▁▁▁"
276912
  ],
276913
  [
276914
- "▁▁▁▁▁▁▁▁▁▁",
276915
  "▁▁▁▁▁"
276916
  ],
276917
  [
276918
- "▁▁▁▁▁▁▁▁▁▁",
276919
- "▁▁▁▁▁▁"
276920
  ],
276921
  [
276922
- "▁▁▁▁▁▁▁▁▁▁",
276923
- "▁▁▁"
276924
  ],
276925
  [
276926
- "▁▁▁▁▁▁▁▁▁▁",
276927
  "▁"
276928
  ],
276929
  [
276930
- "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
276931
- "▁▁"
276932
  ],
276933
  [
276934
- "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
276935
- ""
276936
  ],
276937
  [
276938
- "▁▁▁",
276939
- "▁▁"
276940
  ],
276941
  [
276942
- "▁▁▁",
276943
- "▁▁▁▁"
276944
  ],
276945
  [
276946
- "▁▁▁",
276947
- "▁▁▁▁▁▁▁▁"
276948
  ],
276949
  [
276950
- "▁▁▁",
276951
- "▁▁▁▁▁"
276952
  ],
276953
  [
276954
- "▁▁▁",
276955
- "▁▁▁▁▁▁"
276956
  ],
276957
  [
276958
- "▁▁▁",
276959
- "▁▁▁▁▁▁▁▁▁▁▁▁"
276960
  ],
276961
  [
276962
- "▁▁▁",
276963
- "▁▁▁▁▁▁▁▁▁▁▁▁▁"
276964
  ],
276965
  [
276966
- "▁▁▁",
276967
- "▁▁▁▁▁▁▁▁▁▁"
276968
  ],
276969
  [
276970
  "▁▁▁",
276971
- "▁▁▁"
276972
  ],
276973
  [
276974
- "▁▁▁",
276975
- "▁▁▁▁▁▁▁▁▁"
276976
  ],
276977
  [
276978
- "▁▁▁",
276979
  "▁▁▁▁▁▁▁"
276980
  ],
276981
  [
276982
- "▁▁▁",
276983
- "▁▁▁▁▁▁▁▁▁▁▁"
276984
  ],
276985
  [
276986
- "▁▁▁",
 
 
 
 
276987
  "▁"
276988
  ],
276989
  [
276990
- "▁▁▁▁▁▁▁▁▁",
276991
  "▁▁"
276992
  ],
276993
  [
276994
- "▁▁▁▁▁▁▁▁▁",
276995
- "▁▁▁▁"
276996
  ],
276997
  [
276998
- "▁▁▁▁▁▁▁▁▁",
276999
  "▁▁▁▁▁"
277000
  ],
277001
  [
277002
- "▁▁▁▁▁▁▁▁▁",
277003
- "▁▁▁▁▁▁"
277004
  ],
277005
  [
277006
- "▁▁▁▁▁▁▁▁▁",
277007
- "▁▁▁"
277008
  ],
277009
  [
277010
- "▁▁▁▁▁▁▁▁▁",
277011
- "▁▁▁▁▁▁▁"
277012
  ],
277013
  [
277014
- "▁▁▁▁▁▁▁▁▁",
277015
- ""
277016
  ],
277017
  [
277018
  "▁▁▁▁▁▁▁",
277019
  "▁▁"
277020
  ],
277021
  [
277022
- "▁▁▁▁▁▁▁",
277023
- "▁▁▁▁"
277024
- ],
277025
- [
277026
- "▁▁▁▁▁▁▁",
277027
  "▁▁▁▁▁▁▁▁"
277028
  ],
277029
  [
277030
- "▁▁▁▁▁▁▁",
277031
  "▁▁▁▁▁"
277032
  ],
277033
  [
277034
- "▁▁▁▁▁▁▁",
277035
- "▁▁▁▁▁▁"
277036
- ],
277037
- [
277038
- "▁▁▁▁▁▁▁",
277039
  "▁▁▁"
277040
  ],
277041
  [
277042
- "▁▁▁▁▁▁▁",
277043
- "▁▁▁▁▁▁▁▁▁"
277044
  ],
277045
  [
277046
- "▁▁▁▁▁▁▁",
277047
- "▁▁▁▁▁▁▁"
277048
  ],
277049
  [
277050
- "▁▁▁▁▁▁▁",
277051
- ""
277052
  ],
277053
  [
277054
- "▁▁▁▁▁▁▁▁▁▁▁",
277055
- "▁▁"
277056
  ],
277057
  [
277058
- "▁▁▁▁▁▁▁▁▁▁▁",
277059
- "▁▁▁▁"
277060
  ],
277061
  [
277062
- "▁▁▁▁▁▁▁▁▁▁▁",
277063
- "▁▁▁▁▁"
277064
  ],
277065
  [
277066
- "▁▁▁▁▁▁▁▁▁▁▁",
277067
  "▁▁▁"
277068
  ],
277069
  [
277070
- "▁▁▁▁▁▁▁▁▁▁▁",
277071
- ""
277072
  ],
277073
  [
277074
- "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
 
 
 
 
277075
  "▁"
277076
  ],
277077
  [
277078
- "",
277079
- "▁▁"
277080
  ],
277081
  [
277082
- "",
277083
- "▁▁▁▁"
277084
  ],
277085
  [
277086
- "",
277087
- "▁▁▁▁▁▁▁▁"
277088
  ],
277089
  [
277090
  "▁",
277091
- "▁▁▁▁▁"
277092
  ],
277093
  [
277094
- "",
277095
- "▁▁▁▁▁▁"
277096
  ],
277097
  [
277098
- "",
277099
- "▁▁▁▁▁▁▁▁▁▁▁▁"
277100
  ],
277101
  [
277102
- "",
277103
- "▁▁▁▁▁▁▁▁▁▁▁▁▁"
277104
  ],
277105
  [
277106
- "",
277107
  "▁▁▁▁▁▁▁▁▁▁"
277108
  ],
277109
  [
277110
- "",
277111
- "▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
277112
  ],
277113
  [
277114
- "",
277115
  "▁▁▁"
277116
  ],
277117
  [
277118
- "",
277119
- "▁▁▁▁▁▁▁▁▁"
277120
  ],
277121
  [
277122
- "",
277123
- "▁▁▁▁▁▁▁"
277124
  ],
277125
  [
277126
- "",
277127
- "▁▁▁▁▁▁▁▁▁▁▁"
277128
  ],
277129
  [
277130
- "",
277131
- "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
 
 
 
 
 
 
 
 
 
 
 
 
277132
  ],
277133
  [
277134
  "▁",
277135
- ""
277136
  ]
277137
  ]
277138
  }
 
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": false,
13
  "special": true
14
  },
15
  {
 
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": false,
22
  "special": true
23
  },
24
  {
 
26
  "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
+ "rstrip": true,
30
+ "normalized": false,
31
+ "special": false
32
+ },
33
+ {
34
+ "id": 32000,
35
+ "content": "<|endoftext|>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
  "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 32001,
44
+ "content": "<|assistant|>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": true,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 32002,
53
+ "content": "<|placeholder1|>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": true,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 32003,
62
+ "content": "<|placeholder2|>",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": true,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
+ {
70
+ "id": 32004,
71
+ "content": "<|placeholder3|>",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": true,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 32005,
80
+ "content": "<|placeholder4|>",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": true,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 32006,
89
+ "content": "<|system|>",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": true,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 32007,
98
+ "content": "<|end|>",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": true,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 32008,
107
+ "content": "<|placeholder5|>",
108
+ "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": true,
111
+ "normalized": false,
112
+ "special": true
113
+ },
114
+ {
115
+ "id": 32009,
116
+ "content": "<|placeholder6|>",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": true,
120
+ "normalized": false,
121
+ "special": true
122
+ },
123
+ {
124
+ "id": 32010,
125
+ "content": "<|user|>",
126
+ "single_word": false,
127
+ "lstrip": false,
128
+ "rstrip": true,
129
+ "normalized": false,
130
  "special": true
131
  }
132
  ],
 
150
  "post_processor": {
151
  "type": "TemplateProcessing",
152
  "single": [
 
 
 
 
 
 
153
  {
154
  "Sequence": {
155
  "id": "A",
 
158
  }
159
  ],
160
  "pair": [
 
 
 
 
 
 
161
  {
162
  "Sequence": {
163
  "id": "A",
164
  "type_id": 0
165
  }
166
  },
 
 
 
 
 
 
167
  {
168
  "Sequence": {
169
  "id": "B",
 
171
  }
172
  }
173
  ],
174
+ "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
175
  },
176
  "decoder": {
177
  "type": "Sequence",
 
276726
  "ub"
276727
  ],
276728
  [
276729
+ "",
276730
+ ""
276731
  ],
276732
  [
276733
  "▁▁",
276734
+ "▁▁"
276735
  ],
276736
  [
276737
+ "▁▁▁",
276738
+ ""
276739
  ],
276740
  [
276741
+ "",
276742
+ "▁▁▁"
276743
  ],
276744
  [
276745
  "▁▁",
276746
  "▁▁▁▁▁▁"
276747
  ],
276748
  [
276749
+ "▁▁▁▁",
276750
+ "▁▁▁▁"
 
 
 
 
276751
  ],
276752
  [
276753
+ "▁▁▁▁▁",
276754
+ "▁▁▁"
276755
  ],
276756
  [
276757
+ "▁▁▁▁▁▁",
276758
+ "▁▁"
276759
  ],
276760
  [
276761
+ "▁▁▁",
276762
+ "▁▁▁▁▁"
276763
  ],
276764
  [
276765
+ "▁▁▁▁▁▁▁",
276766
+ ""
276767
  ],
276768
  [
276769
+ "",
276770
  "▁▁▁▁▁▁▁"
276771
  ],
276772
  [
276773
  "▁▁",
276774
+ "▁▁▁"
276775
  ],
276776
  [
276777
+ "▁▁▁▁",
276778
  "▁"
276779
  ],
276780
  [
276781
+ "▁▁▁",
276782
  "▁▁"
276783
  ],
276784
  [
276785
+ "",
276786
  "▁▁▁▁"
276787
  ],
276788
  [
276789
+ "▁▁",
276790
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
 
 
 
 
 
 
 
 
276791
  ],
276792
  [
276793
  "▁▁▁▁",
276794
  "▁▁▁▁▁▁▁▁▁▁▁▁"
276795
  ],
276796
  [
276797
+ "▁▁▁▁▁▁▁▁",
276798
+ "▁▁▁▁▁▁▁▁"
276799
  ],
276800
  [
276801
+ "▁▁▁▁▁",
276802
+ "▁▁▁▁▁▁▁▁▁▁▁"
276803
  ],
276804
  [
276805
+ "▁▁▁▁▁▁",
276806
+ "▁▁▁▁▁▁▁▁▁▁"
276807
  ],
276808
  [
276809
+ "▁▁▁▁▁▁▁▁▁▁▁▁",
276810
+ "▁▁▁▁"
276811
  ],
276812
  [
276813
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁",
276814
+ "▁▁▁"
276815
  ],
276816
  [
276817
+ "▁▁▁▁▁▁▁▁▁▁",
276818
+ "▁▁▁▁▁▁"
276819
  ],
276820
  [
276821
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
276822
  "▁▁"
276823
  ],
276824
  [
276825
+ "▁▁▁",
276826
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁"
276827
  ],
276828
  [
276829
+ "▁▁▁▁▁▁▁▁▁",
276830
+ "▁▁▁▁▁▁▁"
276831
  ],
276832
  [
276833
+ "▁▁▁▁▁▁▁",
276834
+ "▁▁▁▁▁▁▁▁▁"
276835
  ],
276836
  [
276837
+ "▁▁▁▁▁▁▁▁▁▁▁",
276838
+ "▁▁▁▁▁"
276839
  ],
276840
  [
276841
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
276842
+ ""
276843
  ],
276844
  [
276845
+ "",
276846
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
276847
  ],
276848
  [
276849
+ "▁▁",
276850
+ "▁▁▁▁"
276851
  ],
276852
  [
276853
+ "▁▁▁▁",
276854
  "▁▁"
276855
  ],
276856
  [
276857
  "▁▁▁▁▁",
276858
+ ""
276859
  ],
276860
  [
276861
+ "▁▁▁",
276862
+ "▁▁▁"
276863
  ],
276864
  [
276865
+ "",
276866
  "▁▁▁▁▁"
276867
  ],
276868
  [
276869
+ "▁▁",
 
 
 
 
276870
  "▁▁▁▁▁▁▁▁▁▁"
276871
  ],
276872
  [
276873
+ "▁▁▁▁",
276874
+ "▁▁▁▁▁▁▁▁"
276875
  ],
276876
  [
276877
+ "▁▁▁▁▁▁▁▁",
276878
+ "▁▁▁▁"
276879
  ],
276880
  [
276881
  "▁▁▁▁▁",
276882
  "▁▁▁▁▁▁▁"
276883
  ],
276884
  [
276885
+ "▁▁▁▁▁▁",
276886
+ "▁▁▁▁▁▁"
 
 
 
 
276887
  ],
276888
  [
276889
+ "▁▁▁▁▁▁▁▁▁▁",
276890
  "▁▁"
276891
  ],
276892
  [
276893
+ "▁▁▁",
276894
+ "▁▁▁▁▁▁▁▁▁"
276895
  ],
276896
  [
276897
+ "▁▁▁▁▁▁▁▁▁",
276898
+ "▁▁▁"
276899
  ],
276900
  [
276901
+ "▁▁▁▁▁▁▁",
276902
  "▁▁▁▁▁"
276903
  ],
276904
  [
276905
+ "▁▁▁▁▁▁▁▁▁▁▁",
276906
+ ""
276907
  ],
276908
  [
276909
+ "",
276910
+ "▁▁▁▁▁▁▁▁▁▁▁"
276911
  ],
276912
  [
276913
+ "▁▁",
276914
+ "▁▁▁▁▁▁▁▁▁▁▁"
276915
  ],
276916
  [
276917
+ "▁▁▁▁",
276918
  "▁▁▁▁▁▁▁▁▁"
276919
  ],
276920
+ [
276921
+ "▁▁▁▁▁▁▁▁",
276922
+ "▁▁▁▁▁"
276923
+ ],
276924
+ [
276925
+ "▁▁▁▁▁",
276926
+ "▁▁▁▁▁▁▁▁"
276927
+ ],
276928
  [
276929
  "▁▁▁▁▁▁",
276930
  "▁▁▁▁▁▁▁"
276931
  ],
276932
  [
276933
+ "▁▁▁▁▁▁▁▁▁▁▁▁",
276934
  "▁"
276935
  ],
276936
  [
276937
+ "▁▁▁▁▁▁▁▁▁▁",
276938
+ "▁▁▁"
276939
  ],
276940
  [
276941
+ "▁▁▁",
276942
+ "▁▁▁▁▁▁▁▁▁▁"
276943
  ],
276944
  [
276945
+ "▁▁▁▁▁▁▁▁▁",
276946
+ "▁▁▁▁"
276947
  ],
276948
  [
276949
+ "▁▁▁▁▁▁▁",
276950
+ "▁▁▁▁▁▁"
276951
  ],
276952
  [
276953
+ "▁▁▁▁▁▁▁▁▁▁▁",
276954
  "▁▁"
276955
  ],
276956
  [
276957
+ "",
276958
+ "▁▁▁▁▁▁▁▁▁▁▁▁"
276959
  ],
276960
  [
276961
+ "▁▁",
276962
+ "▁▁▁▁▁▁▁▁"
276963
  ],
276964
  [
276965
+ "▁▁▁▁",
276966
+ "▁▁▁▁▁▁"
276967
  ],
276968
  [
276969
+ "▁▁▁▁▁▁▁▁",
276970
+ "▁▁"
276971
  ],
276972
  [
276973
+ "▁▁▁▁▁",
276974
  "▁▁▁▁▁"
276975
  ],
276976
  [
276977
+ "▁▁▁▁▁▁",
276978
+ "▁▁▁▁"
276979
  ],
276980
  [
276981
+ "▁▁▁",
276982
+ "▁▁▁▁▁▁▁"
276983
  ],
276984
  [
276985
+ "▁▁▁▁▁▁▁▁▁",
276986
  "▁"
276987
  ],
276988
  [
276989
+ "▁▁▁▁▁▁▁",
276990
+ "▁▁▁"
276991
  ],
276992
  [
276993
+ "",
276994
+ "▁▁▁▁▁▁▁▁▁"
276995
  ],
276996
  [
276997
+ "▁▁",
276998
+ "▁▁▁▁▁▁▁▁▁▁▁▁"
276999
  ],
277000
  [
277001
+ "▁▁▁▁",
277002
+ "▁▁▁▁▁▁▁▁▁▁"
277003
  ],
277004
  [
277005
+ "▁▁▁▁▁▁▁▁",
277006
+ "▁▁▁▁▁▁"
277007
  ],
277008
  [
277009
+ "▁▁▁▁▁",
277010
+ "▁▁▁▁▁▁▁▁▁"
277011
  ],
277012
  [
277013
+ "▁▁▁▁▁▁",
277014
+ "▁▁▁▁▁▁▁▁"
277015
  ],
277016
  [
277017
+ "▁▁▁▁▁▁▁▁▁▁▁▁",
277018
+ "▁▁"
277019
  ],
277020
  [
277021
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁",
277022
+ ""
277023
  ],
277024
  [
277025
+ "▁▁▁▁▁▁▁▁▁▁",
277026
+ "▁▁▁▁"
277027
  ],
277028
  [
277029
  "▁▁▁",
277030
+ "▁▁▁▁▁▁▁▁▁▁▁"
277031
  ],
277032
  [
277033
+ "▁▁▁▁▁▁▁▁▁",
277034
+ "▁▁▁▁▁"
277035
  ],
277036
  [
277037
+ "▁▁▁▁▁▁▁",
277038
  "▁▁▁▁▁▁▁"
277039
  ],
277040
  [
277041
+ "▁▁▁▁▁▁▁▁▁▁▁",
277042
+ "▁▁▁"
277043
  ],
277044
  [
277045
+ "",
277046
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁"
277047
+ ],
277048
+ [
277049
+ "▁▁",
277050
  "▁"
277051
  ],
277052
  [
277053
+ "",
277054
  "▁▁"
277055
  ],
277056
  [
277057
+ "▁▁",
277058
+ "▁▁▁▁▁▁▁"
277059
  ],
277060
  [
277061
+ "▁▁▁▁",
277062
  "▁▁▁▁▁"
277063
  ],
277064
  [
277065
+ "▁▁▁▁▁▁▁▁",
277066
+ ""
277067
  ],
277068
  [
277069
+ "▁▁▁▁▁",
277070
+ "▁▁▁▁"
277071
  ],
277072
  [
277073
+ "▁▁▁▁▁▁",
277074
+ "▁▁▁"
277075
  ],
277076
  [
277077
+ "▁▁▁",
277078
+ "▁▁▁▁▁▁"
277079
  ],
277080
  [
277081
  "▁▁▁▁▁▁▁",
277082
  "▁▁"
277083
  ],
277084
  [
277085
+ "",
 
 
 
 
277086
  "▁▁▁▁▁▁▁▁"
277087
  ],
277088
  [
277089
+ "▁▁",
277090
  "▁▁▁▁▁"
277091
  ],
277092
  [
277093
+ "▁▁▁▁",
 
 
 
 
277094
  "▁▁▁"
277095
  ],
277096
  [
277097
+ "▁▁▁▁▁",
277098
+ "▁▁"
277099
  ],
277100
  [
277101
+ "▁▁▁▁▁▁",
277102
+ ""
277103
  ],
277104
  [
277105
+ "▁▁▁",
277106
+ "▁▁▁▁"
277107
  ],
277108
  [
277109
+ "",
277110
+ "▁▁▁▁▁▁"
277111
  ],
277112
  [
277113
+ "▁▁",
277114
+ "▁▁▁▁▁▁▁▁▁"
277115
  ],
277116
  [
277117
+ "▁▁▁▁",
277118
+ "▁▁▁▁▁▁▁"
277119
  ],
277120
  [
277121
+ "▁▁▁▁▁▁▁▁",
277122
  "▁▁▁"
277123
  ],
277124
  [
277125
+ "▁▁▁▁▁",
277126
+ "▁▁▁▁▁▁"
277127
  ],
277128
  [
277129
+ "▁▁▁▁▁▁",
277130
+ "▁▁▁▁▁"
277131
+ ],
277132
+ [
277133
+ "▁▁▁▁▁▁▁▁▁▁",
277134
  "▁"
277135
  ],
277136
  [
277137
+ "▁▁▁",
277138
+ "▁▁▁▁▁▁▁▁"
277139
  ],
277140
  [
277141
+ "▁▁▁▁▁▁▁▁▁",
277142
+ "▁▁"
277143
  ],
277144
  [
277145
+ "▁▁▁▁▁▁▁",
277146
+ "▁▁▁▁"
277147
  ],
277148
  [
277149
  "▁",
277150
+ "▁▁▁▁▁▁▁▁▁▁"
277151
  ],
277152
  [
277153
+ "▁▁",
277154
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁"
277155
  ],
277156
  [
277157
+ "▁▁▁▁",
277158
+ "▁▁▁▁▁▁▁▁▁▁▁"
277159
  ],
277160
  [
277161
+ "▁▁▁▁▁▁▁▁",
277162
+ "▁▁▁▁▁▁▁"
277163
  ],
277164
  [
277165
+ "▁▁▁▁▁",
277166
  "▁▁▁▁▁▁▁▁▁▁"
277167
  ],
277168
  [
277169
+ "▁▁▁▁▁▁",
277170
+ "▁▁▁▁▁▁▁▁▁"
277171
  ],
277172
  [
277173
+ "▁▁▁▁▁▁▁▁▁▁▁▁",
277174
  "▁▁▁"
277175
  ],
277176
  [
277177
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁",
277178
+ "▁▁"
277179
  ],
277180
  [
277181
+ "▁▁▁▁▁▁▁▁▁▁",
277182
+ "▁▁▁▁▁"
277183
  ],
277184
  [
277185
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
277186
+ ""
277187
  ],
277188
  [
277189
+ "▁▁▁",
277190
+ "▁▁▁▁▁▁▁▁▁▁▁▁"
277191
+ ],
277192
+ [
277193
+ "▁▁▁▁▁▁▁▁▁",
277194
+ "▁▁▁▁▁▁"
277195
+ ],
277196
+ [
277197
+ "▁▁▁▁▁▁▁",
277198
+ "▁▁▁▁▁▁▁▁"
277199
+ ],
277200
+ [
277201
+ "▁▁▁▁▁▁▁▁▁▁▁",
277202
+ "▁▁▁▁"
277203
  ],
277204
  [
277205
  "▁",
277206
+ "▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
277207
  ]
277208
  ]
277209
  }
tokenizer_config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "add_bos_token": true,
3
  "add_eos_token": false,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
8
  "lstrip": false,
9
- "normalized": true,
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
@@ -14,7 +14,7 @@
14
  "1": {
15
  "content": "<s>",
16
  "lstrip": false,
17
- "normalized": true,
18
  "rstrip": false,
19
  "single_word": false,
20
  "special": true
@@ -22,18 +22,108 @@
22
  "2": {
23
  "content": "</s>",
24
  "lstrip": false,
25
- "normalized": true,
 
 
 
 
 
 
 
 
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  },
31
  "bos_token": "<s>",
 
32
  "clean_up_tokenization_spaces": false,
33
- "eos_token": "</s>",
34
- "legacy": true,
35
- "model_max_length": 2048,
36
- "pad_token": null,
 
37
  "sp_model_kwargs": {},
38
  "tokenizer_class": "LlamaTokenizer",
39
  "unk_token": "<unk>",
 
1
  {
2
+ "add_bos_token": false,
3
  "add_eos_token": false,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
8
  "lstrip": false,
9
+ "normalized": false,
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
 
14
  "1": {
15
  "content": "<s>",
16
  "lstrip": false,
17
+ "normalized": false,
18
  "rstrip": false,
19
  "single_word": false,
20
  "special": true
 
22
  "2": {
23
  "content": "</s>",
24
  "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": true,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "32000": {
31
+ "content": "<|endoftext|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
  "rstrip": false,
35
  "single_word": false,
36
  "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<|assistant|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": true,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32002": {
47
+ "content": "<|placeholder1|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": true,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "32003": {
55
+ "content": "<|placeholder2|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": true,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "32004": {
63
+ "content": "<|placeholder3|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": true,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "32005": {
71
+ "content": "<|placeholder4|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": true,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "32006": {
79
+ "content": "<|system|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": true,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "32007": {
87
+ "content": "<|end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": true,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "32008": {
95
+ "content": "<|placeholder5|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": true,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "32009": {
103
+ "content": "<|placeholder6|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": true,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "32010": {
111
+ "content": "<|user|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": true,
115
+ "single_word": false,
116
+ "special": true
117
  }
118
  },
119
  "bos_token": "<s>",
120
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
121
  "clean_up_tokenization_spaces": false,
122
+ "eos_token": "<|endoftext|>",
123
+ "legacy": false,
124
+ "model_max_length": 131072,
125
+ "pad_token": "<|endoftext|>",
126
+ "padding_side": "left",
127
  "sp_model_kwargs": {},
128
  "tokenizer_class": "LlamaTokenizer",
129
  "unk_token": "<unk>",