Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +460 -0
- config.json +24 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +0 -0
- tokenizer_config.json +72 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,460 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
- dataset_size:20108
|
| 8 |
+
- loss:MultipleNegativesRankingLoss
|
| 9 |
+
base_model: sentence-transformers/all-mpnet-base-v2
|
| 10 |
+
widget:
|
| 11 |
+
- source_sentence: I'm in search of a durable cover that can safeguard my cycling
|
| 12 |
+
device while still allowing easy access to its features. It should be lightweight
|
| 13 |
+
and available in different color options.
|
| 14 |
+
sentences:
|
| 15 |
+
- 'Title: Portable Radio AM FM KAVNLON-001 Descripion: [''1'']'
|
| 16 |
+
- 'Title: GE WB27T11078 Keypanel Supt Asm Wh Descripion: [''This is an O.E.M. Authorized
|
| 17 |
+
part . This is an authorized aftermarket product. Fits with various WB27T11078
|
| 18 |
+
brand models.'']'
|
| 19 |
+
- 'Title: Garmin 010-12791-00 Silicone Case, Edge 530, Black Descripion: [''Protect
|
| 20 |
+
your Edge® 830 with this form-fitting, removable silicone case. Available in other
|
| 21 |
+
colors (sold separately).'']'
|
| 22 |
+
- source_sentence: I'm looking for a stylish handbag with a textured finish that offers
|
| 23 |
+
a central storage area. It should have a chic design suitable for everyday use.
|
| 24 |
+
sentences:
|
| 25 |
+
- "Title: LINERY Ski Gloves Men Waterproof Snowboard Gloves for Cold Weather Skiing\
|
| 26 |
+
\ & Snowboarding Gloves Fits Both Men & Women Descripion: ['Our' 'LINERY'\n 'ski\
|
| 27 |
+
\ glove will be the only glove you need for a whole season at the ski resort.\
|
| 28 |
+
\ Thanks to water-repellent shells, fleece liner and durable PU palms, these insulated\
|
| 29 |
+
\ gloves keep your hands dry and cozy while you shred all season long.'\n '4 Size\
|
| 30 |
+
\ for You to Choose( Glove Size ):'\n 'LADY SMALL: 3.25-3.45 inches across palm\
|
| 31 |
+
\ of gloves, 2.75-2.95 inch length of middle finger, 10.25 inches total length.'\n\
|
| 32 |
+
\ 'LADY MEDIUM: 3.45-3.65 inches across palm of gloves, 2.95-3.15 inch length\
|
| 33 |
+
\ of middle finger, 10.63 inches total length.'\n 'MEN MEDIUM: 3.55-3.75 inches\
|
| 34 |
+
\ across palm of gloves, 3.15-3.35 inch length of middle finger, 10.95 inches\
|
| 35 |
+
\ total length.'\n 'MEN LARGE: 3.85-4.05 inches across palm of gloves, 3.35-3.54\
|
| 36 |
+
\ inch length of middle finger, 11.25 inches total length.'\n 'Note: It’s suggested\
|
| 37 |
+
\ to test the palm circumference and middle finger length, take the size chart\
|
| 38 |
+
\ as reference to choose the right size. Manual measured, 0.4-0.8 inches deviation\
|
| 39 |
+
\ allowed.']"
|
| 40 |
+
- 'Title: GUESS Colette Girlfriend Satchel Descripion: [''Colette Girlfriend Satchel
|
| 41 |
+
in pebble pu with center zip compartment'']'
|
| 42 |
+
- 'Title: French Toast Girls'' Stretch Skinny Pull-on Capri Pant Descripion: ["Easy
|
| 43 |
+
to wear and even easier to love! French Toast''s classroom capri features a simple
|
| 44 |
+
navy and white elastic stripe on the waistband, functional front and back pockets
|
| 45 |
+
and pull-on styling, making it even to easier to get her dressed and out the door."]'
|
| 46 |
+
- source_sentence: I'm looking for a versatile bag that’s perfect for daily use, with
|
| 47 |
+
ample room for my essentials. It should be made from durable materials, have a
|
| 48 |
+
secure closure, and offer comfortable carrying options like adjustable straps.
|
| 49 |
+
sentences:
|
| 50 |
+
- 'Title: COVERGIRL Katy Kat Gift Set with Matte Lipsticks in Crimson Cat, Kitty
|
| 51 |
+
Purry, Magenta Minx and Maroon Meow (packaging may vary) Descripion: [''The Purrrfect
|
| 52 |
+
holiday gift. Deck yourself or someone you love with the new Katy Kat Collection.
|
| 53 |
+
This gift was created by Katy Perry with 4 shades of Katy Kat Matte Lipstick in
|
| 54 |
+
Crimson Cat, Kitty Purry, Magenta Minx, and Maroon Meow. You get Katy Kat Matte
|
| 55 |
+
Lipstick that keeps lips pillowy soft. So enjoy! And have a beautiful holiday
|
| 56 |
+
season!'']'
|
| 57 |
+
- 'Title: Bewahly Vertical Laptop Stand[Adjustable Size],Aluminum Adjustable Laptop
|
| 58 |
+
Holder, Saving Space, Suitable for MacBook Pro/Air, iPad, Samsung, Huawei, Surface,
|
| 59 |
+
Dell, HP, Lenovo and Others (Gray) Descripion: [''Colour:grey Adjustable width:
|
| 60 |
+
Adjustable for laptop width from 0.6 inch to 2.6 inch. Supplied Allen key to freely
|
| 61 |
+
adjust to the most suitable width, suitable for all types of laptops. Aluminium
|
| 62 |
+
alloy. The vertical laptop stand is made of superior aluminum alloy that resembles
|
| 63 |
+
the material of the MacBook. Non-slip design: The excellent notebook stand provides
|
| 64 |
+
many silicone pads such as in the U-type groove and under the notebook dock to
|
| 65 |
+
prevent your device from being scratched and stay on the desk. Wide compatibility:
|
| 66 |
+
Compatible with iPad/iPhone, MacBook Pro/Air, Microsoft Surface, Laptops, Notebooks,
|
| 67 |
+
Dell Samsung Lenovo Acer Sony Asus XPS HP ENVY Series and even books. Space-saving:
|
| 68 |
+
Keep your laptop vertical and make your desktop well organized to save more space
|
| 69 |
+
quickly.'']'
|
| 70 |
+
- 'Title: Peak Design Everyday Messenger 13L Descripion: [''The Peak Design(tm)
|
| 71 |
+
13 L Everyday Messenger V2 bag offers adequate space to keep your essentials handy.
|
| 72 |
+
It will be your go-to everyday carry. Made of recycled 400D nylon, polyester,
|
| 73 |
+
EVA, and Hypalon. MagLatch closure. Removable and adjustable padded shoulder straps.
|
| 74 |
+
Crossbody strap and two hidden external straps.'']'
|
| 75 |
+
- source_sentence: I'm looking for a special keychain gift that celebrates a love
|
| 76 |
+
for dogs and is perfect for a grandmother. It should be made from quality materials
|
| 77 |
+
and have a meaningful design that conveys affection.
|
| 78 |
+
sentences:
|
| 79 |
+
- 'Title: Bosch XR7LDC Super Spark Plug, (Pack of 1) Descripion: [''Item Name:'',
|
| 80 |
+
''Spark Plug 1pcs'', ''Part Brand:'', ''BOSCH'', ''OEM Numbers:'', ''12 12 1 465
|
| 81 |
+
104 / XX XX 1 464 104'', ''Electrode Gap [mm]:'', ''0,8'', ''Tightening Torque
|
| 82 |
+
[Nm]:'', ''23'', ''Tightening angle [degrees]:'', ''90'', ''Outer Thread [mm]:'',
|
| 83 |
+
''12'', ''Thread Pitch [mm]:'', ''1,25'', ''Thread Length [mm]:'', ''19'', ''Spanner
|
| 84 |
+
Size:'', ''17,5'', ''Spark Position [mm]:'', ''3'', ''Number of Poles:'', ''2'']'
|
| 85 |
+
- 'Title: Aloe Vera Microwave Wax - Home Hair Removal Hard Wax - Brazilian Microwaveable
|
| 86 |
+
Wax - Hot Stripless Wax for Body Waxing (Face, Eyebrows, Upper lip, Underarms,
|
| 87 |
+
Legs and Arms) - Self Waxing Pot 200g Descripion: []'
|
| 88 |
+
- "Title: Kivosliviz Dog Grandma Gifts Keychain Dog Paw Print Jewelry Best Dog Grandma\
|
| 89 |
+
\ Ever Dog Lover Keychain Dog Grandmother Keychains Descripion: ['❤PRODUCT NAME❤'\n\
|
| 90 |
+
\ 'Kivosliviz Dog Grandma Gifts Dog Paw Print Jewelry Best Dog Lover Keychain\
|
| 91 |
+
\ This jewelry is made of high quality 316L stainless steel, it is processed by\
|
| 92 |
+
\ precise polishing and made of environmental-friendly material.'\n '❤KIVOSLIVIZ\
|
| 93 |
+
\ JEWELRY❤'\n 'Love can Change the World.You are more beautiful and confident\
|
| 94 |
+
\ than you imagine.'\n '❤Maintenance Instruction❤'\n 'a. Keep it dry and avoid\
|
| 95 |
+
\ wearing it when bathing, doing housework and swimming. b. Perfume is also corrosive\
|
| 96 |
+
\ to jewelry. Wear your perfume before putting on the piece. c. Grease and sweat\
|
| 97 |
+
\ on hands will caused corrosion. Be careful. d. Rub your Jewelry with baby wipes,they\
|
| 98 |
+
\ perform miracles on jewelry without damaging the surface, then rub with a clean\
|
| 99 |
+
\ dry soft cotton cloth to bring back the shine.']"
|
| 100 |
+
- source_sentence: I'm looking for a casual short-sleeve top with a fun and stylish
|
| 101 |
+
design for women. It should have a round neck and a playful message, perfect for
|
| 102 |
+
everyday wear. I'd prefer something that feels comfortable and has a unique print.
|
| 103 |
+
sentences:
|
| 104 |
+
- "Title: 2 Pack USB-C Female to for Garmin Watch Charger Connector Male Adapter,\
|
| 105 |
+
\ Type C to Charging Adapter for Garmin Instinct 2 Solar/Fenix 5/6/7X/7/6X/Venu\
|
| 106 |
+
\ 2 Plus/EPIX/Forerunner 955 255 Descripion: ['Notice:'\n '-- Charing Connector\
|
| 107 |
+
\ Adapter only, Charging Cable not included.'\n \"-- This adapter can only be\
|
| 108 |
+
\ used for USB-A at one end and USB-C at the other end. BUT it can't be used for\
|
| 109 |
+
\ USB-C at both ends !!! It can charge and transmit data, Please pay attention\
|
| 110 |
+
\ that!\"\n 'From Abanen, Smart Watch Accessories Brand'\n '-- An ideal solution\
|
| 111 |
+
\ to charge and sync Garmin watchs by a USB-C cable, saving the trouble of carrying\
|
| 112 |
+
\ extra cable around.'\n 'Compatible with Garmin Smartwatch:'\n '-- Fenix 7X Solar\
|
| 113 |
+
\ / Fenix 7X Sapphire Solar'\n '-- Fenix 7S Solar / Fenix 7S Sapphire Solar'\n\
|
| 114 |
+
\ '-- Fenix 7 Solar / Fenix 7 Sapphire Solar' '-- EPIX (Gen 2)'\n '-- Tactix 7\
|
| 115 |
+
\ Pro' '-- Instinct 2 / Instinct 2S'\n '-- Fenix 6X Pro / Fenix 6X Sapphire' '--\
|
| 116 |
+
\ Fenix 5X /Fenix 5X Plus'\n '-- Tactix Delta/Tactix Charlie/Tactix Bravo' '--\
|
| 117 |
+
\ Quatix 3'\n '-- D2 Charlie/D2 Delta PX' '-- Fenix 5/Fenix 5 Plus,'\n '-- Fenix\
|
| 118 |
+
\ 6 Pro/Sapphire' '-- Forerunner 935/945,'\n '-- Instinct /Tactical Solar' '--\
|
| 119 |
+
\ Tactix Charlie /Delta,'\n '-- Quatix 5 / Quatix 6,' '-- Fenix 5S/Fenix 5S Plus,'\n\
|
| 120 |
+
\ '-- Fenix 6S Pro/Sapphire,' '-- Forerunner 245/645,'\n '-- Approach S10 / S40\
|
| 121 |
+
\ /S60 ,' '-- Vivoactive 3 / 3Music'\n '-- Vivoactive 4 / 4S,']"
|
| 122 |
+
- "Title: HONTOUTE Women Mom Boss T Shirt Funny Leopard Letter Print Shirts with\
|
| 123 |
+
\ Saying Vintage O Neck Short Sleeve Tees Casual Tops Descripion: ['Women Mom\
|
| 124 |
+
\ Boss T-Shirt Funny Leopard Letters Printed Shirts with Saying Vintage Round\
|
| 125 |
+
\ Neck Short Sleeve Tees Cute Casual Tops'\n 'Size Chart:(1inch=2.54cm)'\n 'Size\
|
| 126 |
+
\ S: Length 66cm/25.98\" Bust 94cm/37.01\" Size M: Length 67cm/26.38\" Bust 98cm/38.58\"\
|
| 127 |
+
\ Size L: Length 68cm/26.77\" Bust 102cm/40.16\" Size XL: Length 69cm/27.17\"\
|
| 128 |
+
\ Bust 110cm/43.31\" Please allow slight (±3cm)manual measurement deviation for\
|
| 129 |
+
\ the data The real color of the item may be slightly different from the pictures\
|
| 130 |
+
\ shown on website,caused by many factors such as brightness of your monitor and\
|
| 131 |
+
\ light brightness'\n 'Two Ways About Delivery:' 'FBM:'\n 'Ship from China,88%\
|
| 132 |
+
\ customers will receive within 2 weeks,9.9% lucky dog will receive within 1 week,and\
|
| 133 |
+
\ others will receive within 3-4 weeks'\n 'FBA:' 'Customers will receive within\
|
| 134 |
+
\ 1-3 days' 'Service Guarantee:'\n 'We endeavors 100% customer satisfaction service\
|
| 135 |
+
\ and experience If you receive damaged or wrong items Please contact us with\
|
| 136 |
+
\ attached pictures about the problem We will provide you a satisfactory solution\
|
| 137 |
+
\ within 24 hours You may find that someone sells at a lower price than us But\
|
| 138 |
+
\ they cannot guarantee the same quality and service as we do If you are satisfied\
|
| 139 |
+
\ with our product or service Hope you can leave your positive feedback']"
|
| 140 |
+
- 'Title: Batman: Gotham By Gaslight Descripion: ["It''s Batman vs. Jack the Ripper
|
| 141 |
+
in an Elseworld''s adventure that imagines the Dark Knight over a hundred years
|
| 142 |
+
ago in a turn-of-the-century Gotham."]'
|
| 143 |
+
pipeline_tag: sentence-similarity
|
| 144 |
+
library_name: sentence-transformers
|
| 145 |
+
---
|
| 146 |
+
|
| 147 |
+
# SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
|
| 148 |
+
|
| 149 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 150 |
+
|
| 151 |
+
## Model Details
|
| 152 |
+
|
| 153 |
+
### Model Description
|
| 154 |
+
- **Model Type:** Sentence Transformer
|
| 155 |
+
- **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision 9a3225965996d404b775526de6dbfe85d3368642 -->
|
| 156 |
+
- **Maximum Sequence Length:** 128 tokens
|
| 157 |
+
- **Output Dimensionality:** 768 tokens
|
| 158 |
+
- **Similarity Function:** Cosine Similarity
|
| 159 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 160 |
+
<!-- - **Language:** Unknown -->
|
| 161 |
+
<!-- - **License:** Unknown -->
|
| 162 |
+
|
| 163 |
+
### Model Sources
|
| 164 |
+
|
| 165 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 166 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 167 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 168 |
+
|
| 169 |
+
### Full Model Architecture
|
| 170 |
+
|
| 171 |
+
```
|
| 172 |
+
SentenceTransformer(
|
| 173 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: MPNetModel
|
| 174 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 175 |
+
)
|
| 176 |
+
```
|
| 177 |
+
|
| 178 |
+
## Usage
|
| 179 |
+
|
| 180 |
+
### Direct Usage (Sentence Transformers)
|
| 181 |
+
|
| 182 |
+
First install the Sentence Transformers library:
|
| 183 |
+
|
| 184 |
+
```bash
|
| 185 |
+
pip install -U sentence-transformers
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
Then you can load this model and run inference.
|
| 189 |
+
```python
|
| 190 |
+
from sentence_transformers import SentenceTransformer
|
| 191 |
+
|
| 192 |
+
# Download from the 🤗 Hub
|
| 193 |
+
model = SentenceTransformer("knguyennguyen/mpnet_20k")
|
| 194 |
+
# Run inference
|
| 195 |
+
sentences = [
|
| 196 |
+
"I'm looking for a casual short-sleeve top with a fun and stylish design for women. It should have a round neck and a playful message, perfect for everyday wear. I'd prefer something that feels comfortable and has a unique print.",
|
| 197 |
+
'Title: HONTOUTE Women Mom Boss T Shirt Funny Leopard Letter Print Shirts with Saying Vintage O Neck Short Sleeve Tees Casual Tops Descripion: [\'Women Mom Boss T-Shirt Funny Leopard Letters Printed Shirts with Saying Vintage Round Neck Short Sleeve Tees Cute Casual Tops\'\n \'Size Chart:(1inch=2.54cm)\'\n \'Size S: Length 66cm/25.98" Bust 94cm/37.01" Size M: Length 67cm/26.38" Bust 98cm/38.58" Size L: Length 68cm/26.77" Bust 102cm/40.16" Size XL: Length 69cm/27.17" Bust 110cm/43.31" Please allow slight (±3cm)manual measurement deviation for the data The real color of the item may be slightly different from the pictures shown on website,caused by many factors such as brightness of your monitor and light brightness\'\n \'Two Ways About Delivery:\' \'FBM:\'\n \'Ship from China,88% customers will receive within 2 weeks,9.9% lucky dog will receive within 1 week,and others will receive within 3-4 weeks\'\n \'FBA:\' \'Customers will receive within 1-3 days\' \'Service Guarantee:\'\n \'We endeavors 100% customer satisfaction service and experience If you receive damaged or wrong items Please contact us with attached pictures about the problem We will provide you a satisfactory solution within 24 hours You may find that someone sells at a lower price than us But they cannot guarantee the same quality and service as we do If you are satisfied with our product or service Hope you can leave your positive feedback\']',
|
| 198 |
+
'Title: Batman: Gotham By Gaslight Descripion: ["It\'s Batman vs. Jack the Ripper in an Elseworld\'s adventure that imagines the Dark Knight over a hundred years ago in a turn-of-the-century Gotham."]',
|
| 199 |
+
]
|
| 200 |
+
embeddings = model.encode(sentences)
|
| 201 |
+
print(embeddings.shape)
|
| 202 |
+
# [3, 768]
|
| 203 |
+
|
| 204 |
+
# Get the similarity scores for the embeddings
|
| 205 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 206 |
+
print(similarities.shape)
|
| 207 |
+
# [3, 3]
|
| 208 |
+
```
|
| 209 |
+
|
| 210 |
+
<!--
|
| 211 |
+
### Direct Usage (Transformers)
|
| 212 |
+
|
| 213 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 214 |
+
|
| 215 |
+
</details>
|
| 216 |
+
-->
|
| 217 |
+
|
| 218 |
+
<!--
|
| 219 |
+
### Downstream Usage (Sentence Transformers)
|
| 220 |
+
|
| 221 |
+
You can finetune this model on your own dataset.
|
| 222 |
+
|
| 223 |
+
<details><summary>Click to expand</summary>
|
| 224 |
+
|
| 225 |
+
</details>
|
| 226 |
+
-->
|
| 227 |
+
|
| 228 |
+
<!--
|
| 229 |
+
### Out-of-Scope Use
|
| 230 |
+
|
| 231 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 232 |
+
-->
|
| 233 |
+
|
| 234 |
+
<!--
|
| 235 |
+
## Bias, Risks and Limitations
|
| 236 |
+
|
| 237 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 238 |
+
-->
|
| 239 |
+
|
| 240 |
+
<!--
|
| 241 |
+
### Recommendations
|
| 242 |
+
|
| 243 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 244 |
+
-->
|
| 245 |
+
|
| 246 |
+
## Training Details
|
| 247 |
+
|
| 248 |
+
### Training Dataset
|
| 249 |
+
|
| 250 |
+
#### Unnamed Dataset
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
* Size: 20,108 training samples
|
| 254 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
| 255 |
+
* Approximate statistics based on the first 1000 samples:
|
| 256 |
+
| | sentence_0 | sentence_1 |
|
| 257 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 258 |
+
| type | string | string |
|
| 259 |
+
| details | <ul><li>min: 11 tokens</li><li>mean: 36.23 tokens</li><li>max: 73 tokens</li></ul> | <ul><li>min: 13 tokens</li><li>mean: 88.42 tokens</li><li>max: 128 tokens</li></ul> |
|
| 260 |
+
* Samples:
|
| 261 |
+
| sentence_0 | sentence_1 |
|
| 262 |
+
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 263 |
+
| <code>I'm looking for a stylish pair of eyewear with a luxurious touch. They should have a unique color combination and come with a special case and cleaning accessory.</code> | <code>Title: Sunglasses Gucci GG 0528 S- 008 GOLD/BROWN CRYSTAL, 63-14-150 Descripion: ['Authentic Gucci GG0528 S 008 Gold Crystal/Brown Sunglasses. Comes with a matching satin flannel pouch and ivory microfiber cloth and Authenticity card.']</code> |
|
| 264 |
+
| <code>I'm looking for comfortable and stylish capri pants for girls that are easy to wear and have a stretchy fit.</code> | <code>Title: French Toast Girls' Stretch Skinny Pull-on Capri Pant Descripion: ["Easy to wear and even easier to love! French Toast's classroom capri features a simple navy and white elastic stripe on the waistband, functional front and back pockets and pull-on styling, making it even to easier to get her dressed and out the door."]</code> |
|
| 265 |
+
| <code>I'm in need of a replacement screen for a laptop that offers clear visuals and fits a specific model. It should provide high-definition quality for general use.</code> | <code>Title: BRIGHTFOCAL New Screen Replacement for HP 14-CF0006DX HD 1366x768 LCD LED Display Panel Descripion: ['BRIGHTFOCAL New Screen Replacement for HP 14-CF0006DX HD 1366x768 LCD LED Display Panel']</code> |
|
| 266 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 267 |
+
```json
|
| 268 |
+
{
|
| 269 |
+
"scale": 20.0,
|
| 270 |
+
"similarity_fct": "cos_sim"
|
| 271 |
+
}
|
| 272 |
+
```
|
| 273 |
+
|
| 274 |
+
### Training Hyperparameters
|
| 275 |
+
#### Non-Default Hyperparameters
|
| 276 |
+
|
| 277 |
+
- `per_device_train_batch_size`: 128
|
| 278 |
+
- `per_device_eval_batch_size`: 128
|
| 279 |
+
- `num_train_epochs`: 5
|
| 280 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 281 |
+
|
| 282 |
+
#### All Hyperparameters
|
| 283 |
+
<details><summary>Click to expand</summary>
|
| 284 |
+
|
| 285 |
+
- `overwrite_output_dir`: False
|
| 286 |
+
- `do_predict`: False
|
| 287 |
+
- `eval_strategy`: no
|
| 288 |
+
- `prediction_loss_only`: True
|
| 289 |
+
- `per_device_train_batch_size`: 128
|
| 290 |
+
- `per_device_eval_batch_size`: 128
|
| 291 |
+
- `per_gpu_train_batch_size`: None
|
| 292 |
+
- `per_gpu_eval_batch_size`: None
|
| 293 |
+
- `gradient_accumulation_steps`: 1
|
| 294 |
+
- `eval_accumulation_steps`: None
|
| 295 |
+
- `torch_empty_cache_steps`: None
|
| 296 |
+
- `learning_rate`: 5e-05
|
| 297 |
+
- `weight_decay`: 0.0
|
| 298 |
+
- `adam_beta1`: 0.9
|
| 299 |
+
- `adam_beta2`: 0.999
|
| 300 |
+
- `adam_epsilon`: 1e-08
|
| 301 |
+
- `max_grad_norm`: 1
|
| 302 |
+
- `num_train_epochs`: 5
|
| 303 |
+
- `max_steps`: -1
|
| 304 |
+
- `lr_scheduler_type`: linear
|
| 305 |
+
- `lr_scheduler_kwargs`: {}
|
| 306 |
+
- `warmup_ratio`: 0.0
|
| 307 |
+
- `warmup_steps`: 0
|
| 308 |
+
- `log_level`: passive
|
| 309 |
+
- `log_level_replica`: warning
|
| 310 |
+
- `log_on_each_node`: True
|
| 311 |
+
- `logging_nan_inf_filter`: True
|
| 312 |
+
- `save_safetensors`: True
|
| 313 |
+
- `save_on_each_node`: False
|
| 314 |
+
- `save_only_model`: False
|
| 315 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 316 |
+
- `no_cuda`: False
|
| 317 |
+
- `use_cpu`: False
|
| 318 |
+
- `use_mps_device`: False
|
| 319 |
+
- `seed`: 42
|
| 320 |
+
- `data_seed`: None
|
| 321 |
+
- `jit_mode_eval`: False
|
| 322 |
+
- `use_ipex`: False
|
| 323 |
+
- `bf16`: False
|
| 324 |
+
- `fp16`: False
|
| 325 |
+
- `fp16_opt_level`: O1
|
| 326 |
+
- `half_precision_backend`: auto
|
| 327 |
+
- `bf16_full_eval`: False
|
| 328 |
+
- `fp16_full_eval`: False
|
| 329 |
+
- `tf32`: None
|
| 330 |
+
- `local_rank`: 0
|
| 331 |
+
- `ddp_backend`: None
|
| 332 |
+
- `tpu_num_cores`: None
|
| 333 |
+
- `tpu_metrics_debug`: False
|
| 334 |
+
- `debug`: []
|
| 335 |
+
- `dataloader_drop_last`: False
|
| 336 |
+
- `dataloader_num_workers`: 0
|
| 337 |
+
- `dataloader_prefetch_factor`: None
|
| 338 |
+
- `past_index`: -1
|
| 339 |
+
- `disable_tqdm`: False
|
| 340 |
+
- `remove_unused_columns`: True
|
| 341 |
+
- `label_names`: None
|
| 342 |
+
- `load_best_model_at_end`: False
|
| 343 |
+
- `ignore_data_skip`: False
|
| 344 |
+
- `fsdp`: []
|
| 345 |
+
- `fsdp_min_num_params`: 0
|
| 346 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 347 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 348 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 349 |
+
- `deepspeed`: None
|
| 350 |
+
- `label_smoothing_factor`: 0.0
|
| 351 |
+
- `optim`: adamw_torch
|
| 352 |
+
- `optim_args`: None
|
| 353 |
+
- `adafactor`: False
|
| 354 |
+
- `group_by_length`: False
|
| 355 |
+
- `length_column_name`: length
|
| 356 |
+
- `ddp_find_unused_parameters`: None
|
| 357 |
+
- `ddp_bucket_cap_mb`: None
|
| 358 |
+
- `ddp_broadcast_buffers`: False
|
| 359 |
+
- `dataloader_pin_memory`: True
|
| 360 |
+
- `dataloader_persistent_workers`: False
|
| 361 |
+
- `skip_memory_metrics`: True
|
| 362 |
+
- `use_legacy_prediction_loop`: False
|
| 363 |
+
- `push_to_hub`: False
|
| 364 |
+
- `resume_from_checkpoint`: None
|
| 365 |
+
- `hub_model_id`: None
|
| 366 |
+
- `hub_strategy`: every_save
|
| 367 |
+
- `hub_private_repo`: False
|
| 368 |
+
- `hub_always_push`: False
|
| 369 |
+
- `gradient_checkpointing`: False
|
| 370 |
+
- `gradient_checkpointing_kwargs`: None
|
| 371 |
+
- `include_inputs_for_metrics`: False
|
| 372 |
+
- `eval_do_concat_batches`: True
|
| 373 |
+
- `fp16_backend`: auto
|
| 374 |
+
- `push_to_hub_model_id`: None
|
| 375 |
+
- `push_to_hub_organization`: None
|
| 376 |
+
- `mp_parameters`:
|
| 377 |
+
- `auto_find_batch_size`: False
|
| 378 |
+
- `full_determinism`: False
|
| 379 |
+
- `torchdynamo`: None
|
| 380 |
+
- `ray_scope`: last
|
| 381 |
+
- `ddp_timeout`: 1800
|
| 382 |
+
- `torch_compile`: False
|
| 383 |
+
- `torch_compile_backend`: None
|
| 384 |
+
- `torch_compile_mode`: None
|
| 385 |
+
- `dispatch_batches`: None
|
| 386 |
+
- `split_batches`: None
|
| 387 |
+
- `include_tokens_per_second`: False
|
| 388 |
+
- `include_num_input_tokens_seen`: False
|
| 389 |
+
- `neftune_noise_alpha`: None
|
| 390 |
+
- `optim_target_modules`: None
|
| 391 |
+
- `batch_eval_metrics`: False
|
| 392 |
+
- `eval_on_start`: False
|
| 393 |
+
- `use_liger_kernel`: False
|
| 394 |
+
- `eval_use_gather_object`: False
|
| 395 |
+
- `batch_sampler`: batch_sampler
|
| 396 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 397 |
+
|
| 398 |
+
</details>
|
| 399 |
+
|
| 400 |
+
### Training Logs
|
| 401 |
+
| Epoch | Step | Training Loss |
|
| 402 |
+
|:------:|:----:|:-------------:|
|
| 403 |
+
| 3.1646 | 500 | 0.493 |
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
### Framework Versions
|
| 407 |
+
- Python: 3.11.11
|
| 408 |
+
- Sentence Transformers: 3.1.1
|
| 409 |
+
- Transformers: 4.45.2
|
| 410 |
+
- PyTorch: 2.5.1+cu121
|
| 411 |
+
- Accelerate: 1.2.1
|
| 412 |
+
- Datasets: 3.2.0
|
| 413 |
+
- Tokenizers: 0.20.3
|
| 414 |
+
|
| 415 |
+
## Citation
|
| 416 |
+
|
| 417 |
+
### BibTeX
|
| 418 |
+
|
| 419 |
+
#### Sentence Transformers
|
| 420 |
+
```bibtex
|
| 421 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 422 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 423 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 424 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 425 |
+
month = "11",
|
| 426 |
+
year = "2019",
|
| 427 |
+
publisher = "Association for Computational Linguistics",
|
| 428 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 429 |
+
}
|
| 430 |
+
```
|
| 431 |
+
|
| 432 |
+
#### MultipleNegativesRankingLoss
|
| 433 |
+
```bibtex
|
| 434 |
+
@misc{henderson2017efficient,
|
| 435 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 436 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 437 |
+
year={2017},
|
| 438 |
+
eprint={1705.00652},
|
| 439 |
+
archivePrefix={arXiv},
|
| 440 |
+
primaryClass={cs.CL}
|
| 441 |
+
}
|
| 442 |
+
```
|
| 443 |
+
|
| 444 |
+
<!--
|
| 445 |
+
## Glossary
|
| 446 |
+
|
| 447 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 448 |
+
-->
|
| 449 |
+
|
| 450 |
+
<!--
|
| 451 |
+
## Model Card Authors
|
| 452 |
+
|
| 453 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 454 |
+
-->
|
| 455 |
+
|
| 456 |
+
<!--
|
| 457 |
+
## Model Card Contact
|
| 458 |
+
|
| 459 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 460 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "sentence-transformers/all-mpnet-base-v2",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"MPNetModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 514,
|
| 16 |
+
"model_type": "mpnet",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"pad_token_id": 1,
|
| 20 |
+
"relative_attention_num_buckets": 32,
|
| 21 |
+
"torch_dtype": "float32",
|
| 22 |
+
"transformers_version": "4.45.2",
|
| 23 |
+
"vocab_size": 30527
|
| 24 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "3.1.1",
|
| 4 |
+
"transformers": "4.45.2",
|
| 5 |
+
"pytorch": "2.5.1+cu121"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null,
|
| 9 |
+
"similarity_fn_name": null
|
| 10 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11114953c0df482b9d4fede610b0104d051c6fa258d971726aa0d490504a7430
|
| 3 |
+
size 437967672
|
modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 128,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "[UNK]",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"104": {
|
| 36 |
+
"content": "[UNK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"30526": {
|
| 44 |
+
"content": "<mask>",
|
| 45 |
+
"lstrip": true,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"bos_token": "<s>",
|
| 53 |
+
"clean_up_tokenization_spaces": false,
|
| 54 |
+
"cls_token": "<s>",
|
| 55 |
+
"do_lower_case": true,
|
| 56 |
+
"eos_token": "</s>",
|
| 57 |
+
"mask_token": "<mask>",
|
| 58 |
+
"max_length": 128,
|
| 59 |
+
"model_max_length": 128,
|
| 60 |
+
"pad_to_multiple_of": null,
|
| 61 |
+
"pad_token": "<pad>",
|
| 62 |
+
"pad_token_type_id": 0,
|
| 63 |
+
"padding_side": "right",
|
| 64 |
+
"sep_token": "</s>",
|
| 65 |
+
"stride": 0,
|
| 66 |
+
"strip_accents": null,
|
| 67 |
+
"tokenize_chinese_chars": true,
|
| 68 |
+
"tokenizer_class": "MPNetTokenizer",
|
| 69 |
+
"truncation_side": "right",
|
| 70 |
+
"truncation_strategy": "longest_first",
|
| 71 |
+
"unk_token": "[UNK]"
|
| 72 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|