Training in progress, step 43, checkpoint
Browse files- .gitattributes +1 -0
- last-checkpoint/1_Pooling/config.json +10 -0
- last-checkpoint/README.md +564 -0
- last-checkpoint/added_tokens.json +28 -0
- last-checkpoint/chat_template.jinja +85 -0
- last-checkpoint/config.json +60 -0
- last-checkpoint/config_sentence_transformers.json +14 -0
- last-checkpoint/merges.txt +0 -0
- last-checkpoint/model.safetensors +3 -0
- last-checkpoint/modules.json +20 -0
- last-checkpoint/optimizer.pt +3 -0
- last-checkpoint/rng_state.pth +3 -0
- last-checkpoint/scheduler.pt +3 -0
- last-checkpoint/sentence_bert_config.json +4 -0
- last-checkpoint/special_tokens_map.json +31 -0
- last-checkpoint/tokenizer.json +3 -0
- last-checkpoint/tokenizer_config.json +239 -0
- last-checkpoint/trainer_state.json +33 -0
- last-checkpoint/training_args.bin +3 -0
- last-checkpoint/vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
last-checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
last-checkpoint/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": false,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": true,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
last-checkpoint/README.md
ADDED
|
@@ -0,0 +1,564 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:21955
|
| 9 |
+
- loss:CachedMultipleNegativesRankingLoss
|
| 10 |
+
base_model: Qwen/Qwen3-Embedding-0.6B
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: 'Title: Gold extatic Musk EDT 90ml'
|
| 13 |
+
sentences:
|
| 14 |
+
- 'Description: [''When you’re looking for soothing relief, reach for Aloe MSM Gel.
|
| 15 |
+
MSM stands for Methyl Sulfonyl Methane, an organic sulfur found in almost all
|
| 16 |
+
living organisms. In fact, sulfur is the third most abundant substance in our
|
| 17 |
+
body. The other main ingredient in Aloe MSM Gel is pure, stabilized Aloe Vera.
|
| 18 |
+
Aloe MSM Gel combines these two powerful ingredients with herbal extracts and
|
| 19 |
+
other select ingredients for soothing relief anytime. Try Aloe MSM Gel today!
|
| 20 |
+
DIRECTIONS: Apply liberally and massage into skin areas where needed. Should eye
|
| 21 |
+
contact occur, flush with water for several minutes. Repeat application as needed.'']
|
| 22 |
+
|
| 23 |
+
Clear, non-staining formula
|
| 24 |
+
|
| 25 |
+
For soothing relief anytime
|
| 26 |
+
|
| 27 |
+
Contains pure, stabilized Aloe Vera'
|
| 28 |
+
- 'Description: [''Edt spray 3 oz design house: balmain'']
|
| 29 |
+
|
| 30 |
+
Extatic Balmain Gold Musk By Balmain Edt Spray 3 Oz'
|
| 31 |
+
- 'Description: [''Euphoria is a heavenly and intoxicating perfume for the woman
|
| 32 |
+
who enjoys attention. Its original blend of sparkly and sweet ingredients is simply
|
| 33 |
+
irresistible. Persimmon and pomegranate open the scent with an uncommon kick.
|
| 34 |
+
The two fruits combine for a rich aroma of musky, sweet wine deepened by green
|
| 35 |
+
accords. Black orchid and lotus blossom provide clean, watery flower notes just
|
| 36 |
+
exotic enough to suggest the marvels of a faraway land. Violet, cream, amber,
|
| 37 |
+
and wood notes support the perfume with a milky, savory aroma. Made for the uncommon
|
| 38 |
+
woman, this perfume longs to be noticed.'']'
|
| 39 |
+
- source_sentence: 'Title: Nail Clippers for Thick Nails - Heavy Duty Stainless Steel
|
| 40 |
+
Fingernail Toenail Clipper for Tough Nails,Wide Jaw Extra Large Nail Cutter for
|
| 41 |
+
Thick Toenails for Seniors Men & Women,Curved Blades'
|
| 42 |
+
sentences:
|
| 43 |
+
- 'Description: [''Launched by the design house of Chanel in 2002, CHANEL CHANCE
|
| 44 |
+
by Chanel is classified as a flowery fragrance. This feminine scent posesses a
|
| 45 |
+
blend of: a refined blend of jasmine and citrus. It is recommended for daytime
|
| 46 |
+
wear.'']'
|
| 47 |
+
- 'Description: [''Medicated Treatment Conditioner'', ''🥇 Intense multi-functional
|
| 48 |
+
formula.🥈 Designed to penetrate the hair shaft to stop drying and add moisture.
|
| 49 |
+
🥉 Also helps in the aid of minor scalp irritations and fungus.'']
|
| 50 |
+
|
| 51 |
+
✅ 👩🏾 👩🏽🦰👩🏼 Works on all hair types
|
| 52 |
+
|
| 53 |
+
✅ Formulated with Aloe
|
| 54 |
+
|
| 55 |
+
✅ Works best when used with Kiti Kiti Medicated Treatment Shampoo
|
| 56 |
+
|
| 57 |
+
✅ Help remove dry, flaking skin from scalp'
|
| 58 |
+
- 'Description: [''Are you still spending thousands of dollars a year for your thick
|
| 59 |
+
nail problems? Are you still complaining about traditional nail clippers for men
|
| 60 |
+
that are easily damaged?'', ''We recommend you to try the new professional nail
|
| 61 |
+
clippers of the DEJLIG brand.'', ''Toenail clippers for thick nails adopt a unique
|
| 62 |
+
lever design, and the blade of nail clippers for thick toenails can be opened
|
| 63 |
+
up to 16mm. Fingernail clippers for men can trim nails of any thickness and easily
|
| 64 |
+
cut our thick nails. Clipper pro nail cutters are very durable and sharp, saying
|
| 65 |
+
goodbye to the annoying thick nail problem.'', ''DEJLIG is an American brand dedicated
|
| 66 |
+
to producing high-quality large nail clippers for thick toenails.'', "Our heavy
|
| 67 |
+
duty nail clippers for thick nails have an elegant ergonomic design and have the
|
| 68 |
+
most durable and sharpest blades. Professional nail clippers for seniors made
|
| 69 |
+
of surgical grade stainless steel have outstanding performance and are today''s
|
| 70 |
+
masterpieces. She is one of the best nail clippers set for men on the market today. It''s
|
| 71 |
+
time to say goodbye to cheap and low-quality toenail clippers for thick toenails.",
|
| 72 |
+
''Try our wide jaw toenail clippers now! When you trim your nails, they will provide
|
| 73 |
+
you with the smoothest and most enjoyable experience.'', ''【Gift Box Includes】'',
|
| 74 |
+
''1 x Big Toenail Clippers 1 x Nail File1×Instructions'', ''【After-sales Service:
|
| 75 |
+
Lifetime Service】'', ''All heavy duty toenail clippers must pass strict quality
|
| 76 |
+
inspections before they can be sold. If you have any questions about our stainless
|
| 77 |
+
steel nail clippers, please go to the following path: "My Order"-"Contact Seller"
|
| 78 |
+
to contact us.'']
|
| 79 |
+
|
| 80 |
+
【Professional Nail Clippers Set】- Toe nail clippers for thick toenails are designed
|
| 81 |
+
for various nail types and are suitable for professional occasions such as nail
|
| 82 |
+
salons, including men, women and seniors. This is also a great nail clipper kit
|
| 83 |
+
(including a nail clipper and a nail file), which can provide a relaxing and enjoyable
|
| 84 |
+
experience for heavy duty toenail clippers, nail cutters for seniors and travel
|
| 85 |
+
nail clippers. Large Toenail Clippers for Thick Toenails Are the Best Choice for
|
| 86 |
+
Gifts.
|
| 87 |
+
|
| 88 |
+
【Sharp & Durable】- Toenail clippers for thick nails for seniors are made of high-quality
|
| 89 |
+
surgical grade stainless steel for rust and durability .The new sharp curved blade
|
| 90 |
+
adapts to the curvature of the nail and trims sharp pieces of corners precisely.
|
| 91 |
+
Nail clippers for thick toenails use a unique lever, which is very durable, ergonomic
|
| 92 |
+
and comfortable to use. Nail Clippers for Women Are Professional Cutting Tools
|
| 93 |
+
Manufactured According to The Highest Standards.
|
| 94 |
+
|
| 95 |
+
【Ultra Wide Jaw Opening】- Professional toenail clippers use a unique lever design,
|
| 96 |
+
allowing the wide jaw blade to open up to 16mm, suitable for trimming nails of
|
| 97 |
+
any thickness. Ultra wide jaw opening and non-slip handle work perfectly together,
|
| 98 |
+
and nail clippers for men can reduce the pressure required to cut thick nails
|
| 99 |
+
or tough toenails. Fingernail clippers for women help us cut nails easily and
|
| 100 |
+
comfortably, so Large Nail Clippers for Thick Toenails Will Be Our Best Tool for
|
| 101 |
+
Trimming Nails.
|
| 102 |
+
|
| 103 |
+
【Designed for Thick Nails】- Nail clippers for thick nails can easily trim nails
|
| 104 |
+
of various thicknesses, saving us strength and time. Sturdy and sharp curved blade
|
| 105 |
+
with double curved edges allows toenail clippers for thick toenails to trim nails
|
| 106 |
+
precisely without pain. Heavy duty nail clippers for thick nails effectively alleviate
|
| 107 |
+
tough nail problems caused by fungus, diabetes, and aging. Fingernail Clippers
|
| 108 |
+
for Men Are A Good Helper for Every Thick Nail Friend.
|
| 109 |
+
|
| 110 |
+
【Lifetime Warranty】- We are very confident in the quality of our nail trimmer
|
| 111 |
+
for women and offer a leading lifetime replacement warranty to every customer
|
| 112 |
+
who buys thick toenail clippers. If you have any questions about our toenail clippers
|
| 113 |
+
for thick nails, please feel free to email us. Our US professional team will give
|
| 114 |
+
you the most satisfactory answer within 24 hours. Clipper Pro Nail Cutter Provide
|
| 115 |
+
Reliable Quality, So You Can Buy Stainless Steel Nail Clippers with More Confidence.'
|
| 116 |
+
- source_sentence: 'Title: Nail Art Brushes,Acrylic Nail Brush,Nail Art Brush,Acrylic
|
| 117 |
+
Brush Embossed Sable Nail Tools for Nail Salon Home Use,Nail Tools (#20)'
|
| 118 |
+
sentences:
|
| 119 |
+
- 'Description: [''Product Details: Weight: about 0.63-0.98 oz Material: alloy +
|
| 120 |
+
plastic + Kolinsky hair Color: Black Size: #8,10,12,14,16,18,20 Length: about
|
| 121 |
+
17.2-18.5 cm Nail brushes soft, flexible bristles give the artist great control
|
| 122 |
+
over the product. Long-lasting and strong, it will not split or deform, the alloy
|
| 123 |
+
handle is easy to handle, has a very smooth surface, more durable than any type
|
| 124 |
+
of brush, both beautiful and practical. Nail acrylic brush is the perfect nail
|
| 125 |
+
art tool for professional salons and home DIY nail art, the more times you use
|
| 126 |
+
it, the smoother the result. Warm Tips: Please clean our nail brushes with a brush
|
| 127 |
+
before use. After the manicure, please clean the nail brush with Brush Liquid
|
| 128 |
+
Monomer. Wipe off as much water as possible with a paper towel and re-store. Proper
|
| 129 |
+
care and cleaning will extend the life of your nail brushes. Proper use and proper
|
| 130 |
+
care will ensure a longer life for your nail art brushes. Always keep air circulating
|
| 131 |
+
and in an upright position. The acrylic nail brushes need to breathe to prevent
|
| 132 |
+
bad stuff growth. Also, keep pinceles para acrilico out of direct sunlight. With
|
| 133 |
+
proper care, your acrylic brush will have a longer life.'']'
|
| 134 |
+
- 'Description: [''For carefree hair with time to spare start your day with Aussie
|
| 135 |
+
Total Miracle Collection 7N1 Shampoo. Packed with 7 benefits in 1 bottle it starts
|
| 136 |
+
by cleansing your tresses to reveal brilliant natural shine. The moisture-enriched
|
| 137 |
+
serum infusion protects your hair from dryness split ends and breakage. On top
|
| 138 |
+
of that this miraculous shampoo makes your hair more manageable by detangling
|
| 139 |
+
your strands. The result? Your hair is less prone to damage and breakage. Follow
|
| 140 |
+
with Aussie Total Miracle Collection 7N1 Conditioner and emerge from the shower
|
| 141 |
+
with a head full of silky smooth hair. Because around here we make Aussome hair
|
| 142 |
+
easy!'']'
|
| 143 |
+
- 'Description: [''EVA Hard Protective Travel Case Carrying Pouch Cover Bag for
|
| 144 |
+
John Frieda Salon Shape 1.5 Inch Hot Air Brush By Hermitshell'']
|
| 145 |
+
|
| 146 |
+
Hermitshell Hard Travel Storage Carrying Case Bag
|
| 147 |
+
|
| 148 |
+
Protect your favorite device from bumps dents and scratches
|
| 149 |
+
|
| 150 |
+
Made to fit John Frieda Salon Shape 1.5 Inch Hot Air Brush
|
| 151 |
+
|
| 152 |
+
Material:EVA ,Color: Black
|
| 153 |
+
|
| 154 |
+
For sale is case only (device and accessories are sold separately)'
|
| 155 |
+
- source_sentence: 'Title: Stand Electric Toothbrush Heads Case Holder for Braun Oral
|
| 156 |
+
B'
|
| 157 |
+
sentences:
|
| 158 |
+
- 'Description: [''3 bottles of ROSE WATER & IVY Shea & Vitamin E SHOWER GELS.'']'
|
| 159 |
+
- 'Description: [''Package Content: 1 x Toothbrush Stand (The toothbrush, brush
|
| 160 |
+
heads, and charger are not included in the package.)'']
|
| 161 |
+
|
| 162 |
+
A perfect solution to organize your toothbrush, brush heads and charger in one
|
| 163 |
+
stand.Hold up to 4 brush heads,2 Oral B electric toothbrush.and 1 charger, (Charging
|
| 164 |
+
port size is only suitable for original oral b charger),(The toothbrush, brush
|
| 165 |
+
heads, and charger are not included in the package)
|
| 166 |
+
|
| 167 |
+
It comes with a lid that can keep your brush heads safe and clean.the holder is
|
| 168 |
+
made of ABS,100% Environmental protection material,safe and durable
|
| 169 |
+
|
| 170 |
+
This fit right around the toothbrush charger perfectly. the cover keeps it extra
|
| 171 |
+
clean and Convenient storage
|
| 172 |
+
|
| 173 |
+
Size: 19.5 * 9.5 * 3.5cm, IRLIC Stand Electric Toothbrush Heads Case Holder for
|
| 174 |
+
Braun Oral B
|
| 175 |
+
|
| 176 |
+
Compatible with Braun Oral-B rechargeable toothbrush, Oral B Stages Power Kids-950,
|
| 177 |
+
Oral B Stages Power Kids, Oral B Vitality Sensitive Clean, Oral B Pro 600, Oral
|
| 178 |
+
B Pro 650, Oral B Pro 700 3d white, Oral B Pro 1000, Oral B Pro 2000, Oral B Pro
|
| 179 |
+
3000, Oral B Pro 4000, Oral B Pro 5000, Oral B Pro 6000, Oral B Pro 7000, Oral
|
| 180 |
+
B Pro 8000, Oral B Genius 9000, Oral B Genius 10000n'
|
| 181 |
+
- 'Description: []
|
| 182 |
+
|
| 183 |
+
❤️[Short Hairstyles]- Natural Grey Wigs for White Women, 12 inch Length Bob Wigs
|
| 184 |
+
for Women, Heat Resistant Fiber Synthetic Hair Replacement Wigs Colored Blond
|
| 185 |
+
Side Parting Bangs Trendy Haircuts Wig with Free Wig Cap x 1
|
| 186 |
+
|
| 187 |
+
❤️[Hair Material]- Heat resistant hair tinsel that holds up to styling tools providing
|
| 188 |
+
a similar styling versatility as with human hair. The wig can also be customized
|
| 189 |
+
to suit your own individual look by cutting them or using a curling iron/hair
|
| 190 |
+
straightener. (Note: optimal temperature is 250-275 degrees, but going above 350
|
| 191 |
+
degrees is not recommended.)
|
| 192 |
+
|
| 193 |
+
❤️[Occasions]- Fashionable and stylish short bob wig look natural, real like human
|
| 194 |
+
hair wig, very pretty, and feminine, soft touch. You can wear it for parties,
|
| 195 |
+
Halloween, cosplay, daily use, gift sending to friends and certain themed performances,
|
| 196 |
+
fashion and attractive, adding more charm and fun.
|
| 197 |
+
|
| 198 |
+
❤️[Adjustable Size]- Cap size 20-22.5 inches,There are two adjustment straps inside
|
| 199 |
+
the wig, which can be intertwined to a fixed position to suit different head sizes.
|
| 200 |
+
|
| 201 |
+
❤️[Worry-free After-sales]- We Are Committed To Providing Customers with Quality
|
| 202 |
+
Products and Attentive Services, Free Returns If You Don''t Like It or a Quality
|
| 203 |
+
Problem.'
|
| 204 |
+
- source_sentence: 'Title: Sinful Colors Finger Nail Polish Color Lacquer Set 16-Piece
|
| 205 |
+
Collection'
|
| 206 |
+
sentences:
|
| 207 |
+
- 'Description: []
|
| 208 |
+
|
| 209 |
+
Lot of 16 Random Sinful Colors Finger Nail Polish Color Lacquer All Different
|
| 210 |
+
Colors No Repeats
|
| 211 |
+
|
| 212 |
+
Images for reference only
|
| 213 |
+
|
| 214 |
+
Actual color/texture may vary from the image shown
|
| 215 |
+
|
| 216 |
+
Randomly pre-packed 16 nail polishes'
|
| 217 |
+
- 'Description: ["Adorox Red Horn Devil Woman''s Wig Demon Angel Halloween Costume
|
| 218 |
+
Prop Measures about: 27 Inches Length. Perfect for a Woman''s Devil Halloween
|
| 219 |
+
Costume. Sized for adults and teens. One Size fits most"]
|
| 220 |
+
|
| 221 |
+
Long fiery Red hair with Horns
|
| 222 |
+
|
| 223 |
+
Measures about: 27 Inches Length
|
| 224 |
+
|
| 225 |
+
100% Polyester
|
| 226 |
+
|
| 227 |
+
Perfect for a Woman''s Devil Halloween Costume
|
| 228 |
+
|
| 229 |
+
Sized for adults and teens.'
|
| 230 |
+
- 'Description: [''Ovvio Oils All Natural Smooth Strength Plus Lip Balm For Chapped
|
| 231 |
+
Lips 0.5 oz (14.2 g)'']'
|
| 232 |
+
pipeline_tag: sentence-similarity
|
| 233 |
+
library_name: sentence-transformers
|
| 234 |
+
---
|
| 235 |
+
|
| 236 |
+
# SentenceTransformer based on Qwen/Qwen3-Embedding-0.6B
|
| 237 |
+
|
| 238 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Qwen/Qwen3-Embedding-0.6B](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B) on the amazon-reviews-2023 dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 239 |
+
|
| 240 |
+
## Model Details
|
| 241 |
+
|
| 242 |
+
### Model Description
|
| 243 |
+
- **Model Type:** Sentence Transformer
|
| 244 |
+
- **Base model:** [Qwen/Qwen3-Embedding-0.6B](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B) <!-- at revision c54f2e6e80b2d7b7de06f51cec4959f6b3e03418 -->
|
| 245 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 246 |
+
- **Output Dimensionality:** 1024 dimensions
|
| 247 |
+
- **Similarity Function:** Cosine Similarity
|
| 248 |
+
- **Training Dataset:**
|
| 249 |
+
- amazon-reviews-2023
|
| 250 |
+
<!-- - **Language:** Unknown -->
|
| 251 |
+
<!-- - **License:** Unknown -->
|
| 252 |
+
|
| 253 |
+
### Model Sources
|
| 254 |
+
|
| 255 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 256 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 257 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 258 |
+
|
| 259 |
+
### Full Model Architecture
|
| 260 |
+
|
| 261 |
+
```
|
| 262 |
+
SentenceTransformer(
|
| 263 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'Qwen3Model'})
|
| 264 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': True, 'include_prompt': True})
|
| 265 |
+
(2): Normalize()
|
| 266 |
+
)
|
| 267 |
+
```
|
| 268 |
+
|
| 269 |
+
## Usage
|
| 270 |
+
|
| 271 |
+
### Direct Usage (Sentence Transformers)
|
| 272 |
+
|
| 273 |
+
First install the Sentence Transformers library:
|
| 274 |
+
|
| 275 |
+
```bash
|
| 276 |
+
pip install -U sentence-transformers
|
| 277 |
+
```
|
| 278 |
+
|
| 279 |
+
Then you can load this model and run inference.
|
| 280 |
+
```python
|
| 281 |
+
from sentence_transformers import SentenceTransformer
|
| 282 |
+
|
| 283 |
+
# Download from the 🤗 Hub
|
| 284 |
+
model = SentenceTransformer("guyhadad01/EncodeRec_600M_Beauty")
|
| 285 |
+
# Run inference
|
| 286 |
+
queries = [
|
| 287 |
+
"Title: Sinful Colors Finger Nail Polish Color Lacquer Set 16-Piece Collection",
|
| 288 |
+
]
|
| 289 |
+
documents = [
|
| 290 |
+
'Description: []\nLot of 16 Random Sinful Colors Finger Nail Polish Color Lacquer All Different Colors No Repeats\nImages for reference only\nActual color/texture may vary from the image shown\nRandomly pre-packed 16 nail polishes',
|
| 291 |
+
"Description: ['Ovvio Oils All Natural Smooth Strength Plus Lip Balm For Chapped Lips 0.5 oz (14.2 g)']",
|
| 292 |
+
'Description: ["Adorox Red Horn Devil Woman\'s Wig Demon Angel Halloween Costume Prop Measures about: 27 Inches Length. Perfect for a Woman\'s Devil Halloween Costume. Sized for adults and teens. One Size fits most"]\nLong fiery Red hair with Horns\nMeasures about: 27 Inches Length\n100% Polyester\nPerfect for a Woman\'s Devil Halloween Costume\nSized for adults and teens.',
|
| 293 |
+
]
|
| 294 |
+
query_embeddings = model.encode_query(queries)
|
| 295 |
+
document_embeddings = model.encode_document(documents)
|
| 296 |
+
print(query_embeddings.shape, document_embeddings.shape)
|
| 297 |
+
# [1, 1024] [3, 1024]
|
| 298 |
+
|
| 299 |
+
# Get the similarity scores for the embeddings
|
| 300 |
+
similarities = model.similarity(query_embeddings, document_embeddings)
|
| 301 |
+
print(similarities)
|
| 302 |
+
# tensor([[ 0.7898, -0.0228, -0.0027]])
|
| 303 |
+
```
|
| 304 |
+
|
| 305 |
+
<!--
|
| 306 |
+
### Direct Usage (Transformers)
|
| 307 |
+
|
| 308 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 309 |
+
|
| 310 |
+
</details>
|
| 311 |
+
-->
|
| 312 |
+
|
| 313 |
+
<!--
|
| 314 |
+
### Downstream Usage (Sentence Transformers)
|
| 315 |
+
|
| 316 |
+
You can finetune this model on your own dataset.
|
| 317 |
+
|
| 318 |
+
<details><summary>Click to expand</summary>
|
| 319 |
+
|
| 320 |
+
</details>
|
| 321 |
+
-->
|
| 322 |
+
|
| 323 |
+
<!--
|
| 324 |
+
### Out-of-Scope Use
|
| 325 |
+
|
| 326 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 327 |
+
-->
|
| 328 |
+
|
| 329 |
+
<!--
|
| 330 |
+
## Bias, Risks and Limitations
|
| 331 |
+
|
| 332 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 333 |
+
-->
|
| 334 |
+
|
| 335 |
+
<!--
|
| 336 |
+
### Recommendations
|
| 337 |
+
|
| 338 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 339 |
+
-->
|
| 340 |
+
|
| 341 |
+
## Training Details
|
| 342 |
+
|
| 343 |
+
### Training Dataset
|
| 344 |
+
|
| 345 |
+
#### amazon-reviews-2023
|
| 346 |
+
|
| 347 |
+
* Dataset: amazon-reviews-2023
|
| 348 |
+
* Size: 21,955 training samples
|
| 349 |
+
* Columns: <code>title</code> and <code>description</code>
|
| 350 |
+
* Approximate statistics based on the first 1000 samples:
|
| 351 |
+
| | title | description |
|
| 352 |
+
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 353 |
+
| type | string | string |
|
| 354 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 29.97 tokens</li><li>max: 114 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 197.11 tokens</li><li>max: 512 tokens</li></ul> |
|
| 355 |
+
* Samples:
|
| 356 |
+
| title | description |
|
| 357 |
+
|:--------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 358 |
+
| <code>Title: Precision Plunger Bars for Cartridge Grips – 93mm – Bag of 10 Plungers</code> | <code>Description: ['The Precision Plunger Bars are designed to work seamlessly with the\xa0Precision Disposable 1. 25" Contoured Soft Cartridge Grips\xa0and the\xa0Precision Disposable 1" Textured Soft Cartridge Grips\xa0to drive cartridge needles with vice style or standard tattoo machine setups. These plunger bars are manufactured from 304 Stainless Steel and feature a brass tip. The plungers are sold in a bag of ten in your choice of 88mm, 93mm, or 98mm length.']<br>Material: 304 Stainless Steel; Brass tip<br>Lengths Available: 88mm, 93mm, 98mm<br>Accepts cartridge needles with vice style tattoo machines<br>Works perfectly with Precision Disposable Soft Cartridge Grips<br>Price per one bag of 10 plungers</code> |
|
| 359 |
+
| <code>Title: Lurrose 100Pcs Full Cover Fake Toenails Artificial Transparent Nail Tips Nail Art for DIY</code> | <code>Description: ['Description', 'The false toenails are durable with perfect length. You have the option to wear them long or clip them short, easy to trim and file them to in any length and shape you like. Plus, ABS is kind of green enviromental material, and makes the nails durable, breathable, light even no pressure on your own toenails. Fit well to your natural toenails. Non toxic, no smell, no harm to your health.', 'Feature', '- Color: As Shown.- Material: ABS.- Size: 14.3 x 7.2 x 1cm.', 'Package Including', '100 x Pieces fake toenails']<br>The false toenails are durable with perfect length. You have the option to wear them long or clip them short, easy to trim and file them to in any length and shape you like.<br>ABS is kind of green enviromental material, and makes the nails durable, breathable, light even no pressure on your own nails.<br>Fit well to your natural toenails. Non toxic, no smell, no harm to your health.<br>Wonderful as gift for girlfriend, family and friends.<br>The easiest and mo...</code> |
|
| 360 |
+
| <code>Title: Gold extatic Musk EDT 90ml</code> | <code>Description: ['Edt spray 3 oz design house: balmain']<br>Extatic Balmain Gold Musk By Balmain Edt Spray 3 Oz</code> |
|
| 361 |
+
* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
|
| 362 |
+
```json
|
| 363 |
+
{
|
| 364 |
+
"scale": 20.0,
|
| 365 |
+
"similarity_fct": "cos_sim",
|
| 366 |
+
"mini_batch_size": 8,
|
| 367 |
+
"gather_across_devices": false
|
| 368 |
+
}
|
| 369 |
+
```
|
| 370 |
+
|
| 371 |
+
### Training Hyperparameters
|
| 372 |
+
#### Non-Default Hyperparameters
|
| 373 |
+
|
| 374 |
+
- `per_device_train_batch_size`: 512
|
| 375 |
+
- `num_train_epochs`: 1
|
| 376 |
+
- `warmup_ratio`: 0.1
|
| 377 |
+
- `bf16`: True
|
| 378 |
+
- `push_to_hub`: True
|
| 379 |
+
- `hub_model_id`: guyhadad01/EncodeRec_600M_Beauty
|
| 380 |
+
- `hub_strategy`: checkpoint
|
| 381 |
+
- `prompts`: Instruct: Given a web search query, retrieve relevant passages that answer the query
|
| 382 |
+
Query:
|
| 383 |
+
|
| 384 |
+
#### All Hyperparameters
|
| 385 |
+
<details><summary>Click to expand</summary>
|
| 386 |
+
|
| 387 |
+
- `overwrite_output_dir`: False
|
| 388 |
+
- `do_predict`: False
|
| 389 |
+
- `eval_strategy`: no
|
| 390 |
+
- `prediction_loss_only`: True
|
| 391 |
+
- `per_device_train_batch_size`: 512
|
| 392 |
+
- `per_device_eval_batch_size`: 8
|
| 393 |
+
- `per_gpu_train_batch_size`: None
|
| 394 |
+
- `per_gpu_eval_batch_size`: None
|
| 395 |
+
- `gradient_accumulation_steps`: 1
|
| 396 |
+
- `eval_accumulation_steps`: None
|
| 397 |
+
- `torch_empty_cache_steps`: None
|
| 398 |
+
- `learning_rate`: 5e-05
|
| 399 |
+
- `weight_decay`: 0.0
|
| 400 |
+
- `adam_beta1`: 0.9
|
| 401 |
+
- `adam_beta2`: 0.999
|
| 402 |
+
- `adam_epsilon`: 1e-08
|
| 403 |
+
- `max_grad_norm`: 1.0
|
| 404 |
+
- `num_train_epochs`: 1
|
| 405 |
+
- `max_steps`: -1
|
| 406 |
+
- `lr_scheduler_type`: linear
|
| 407 |
+
- `lr_scheduler_kwargs`: {}
|
| 408 |
+
- `warmup_ratio`: 0.1
|
| 409 |
+
- `warmup_steps`: 0
|
| 410 |
+
- `log_level`: passive
|
| 411 |
+
- `log_level_replica`: warning
|
| 412 |
+
- `log_on_each_node`: True
|
| 413 |
+
- `logging_nan_inf_filter`: True
|
| 414 |
+
- `save_safetensors`: True
|
| 415 |
+
- `save_on_each_node`: False
|
| 416 |
+
- `save_only_model`: False
|
| 417 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 418 |
+
- `no_cuda`: False
|
| 419 |
+
- `use_cpu`: False
|
| 420 |
+
- `use_mps_device`: False
|
| 421 |
+
- `seed`: 42
|
| 422 |
+
- `data_seed`: None
|
| 423 |
+
- `jit_mode_eval`: False
|
| 424 |
+
- `bf16`: True
|
| 425 |
+
- `fp16`: False
|
| 426 |
+
- `fp16_opt_level`: O1
|
| 427 |
+
- `half_precision_backend`: auto
|
| 428 |
+
- `bf16_full_eval`: False
|
| 429 |
+
- `fp16_full_eval`: False
|
| 430 |
+
- `tf32`: None
|
| 431 |
+
- `local_rank`: 0
|
| 432 |
+
- `ddp_backend`: None
|
| 433 |
+
- `tpu_num_cores`: None
|
| 434 |
+
- `tpu_metrics_debug`: False
|
| 435 |
+
- `debug`: []
|
| 436 |
+
- `dataloader_drop_last`: False
|
| 437 |
+
- `dataloader_num_workers`: 0
|
| 438 |
+
- `dataloader_prefetch_factor`: None
|
| 439 |
+
- `past_index`: -1
|
| 440 |
+
- `disable_tqdm`: False
|
| 441 |
+
- `remove_unused_columns`: True
|
| 442 |
+
- `label_names`: None
|
| 443 |
+
- `load_best_model_at_end`: False
|
| 444 |
+
- `ignore_data_skip`: False
|
| 445 |
+
- `fsdp`: []
|
| 446 |
+
- `fsdp_min_num_params`: 0
|
| 447 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 448 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 449 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 450 |
+
- `parallelism_config`: None
|
| 451 |
+
- `deepspeed`: None
|
| 452 |
+
- `label_smoothing_factor`: 0.0
|
| 453 |
+
- `optim`: adamw_torch
|
| 454 |
+
- `optim_args`: None
|
| 455 |
+
- `adafactor`: False
|
| 456 |
+
- `group_by_length`: False
|
| 457 |
+
- `length_column_name`: length
|
| 458 |
+
- `project`: huggingface
|
| 459 |
+
- `trackio_space_id`: trackio
|
| 460 |
+
- `ddp_find_unused_parameters`: None
|
| 461 |
+
- `ddp_bucket_cap_mb`: None
|
| 462 |
+
- `ddp_broadcast_buffers`: False
|
| 463 |
+
- `dataloader_pin_memory`: True
|
| 464 |
+
- `dataloader_persistent_workers`: False
|
| 465 |
+
- `skip_memory_metrics`: True
|
| 466 |
+
- `use_legacy_prediction_loop`: False
|
| 467 |
+
- `push_to_hub`: True
|
| 468 |
+
- `resume_from_checkpoint`: None
|
| 469 |
+
- `hub_model_id`: guyhadad01/EncodeRec_600M_Beauty
|
| 470 |
+
- `hub_strategy`: checkpoint
|
| 471 |
+
- `hub_private_repo`: None
|
| 472 |
+
- `hub_always_push`: False
|
| 473 |
+
- `hub_revision`: None
|
| 474 |
+
- `gradient_checkpointing`: False
|
| 475 |
+
- `gradient_checkpointing_kwargs`: None
|
| 476 |
+
- `include_inputs_for_metrics`: False
|
| 477 |
+
- `include_for_metrics`: []
|
| 478 |
+
- `eval_do_concat_batches`: True
|
| 479 |
+
- `fp16_backend`: auto
|
| 480 |
+
- `push_to_hub_model_id`: None
|
| 481 |
+
- `push_to_hub_organization`: None
|
| 482 |
+
- `mp_parameters`:
|
| 483 |
+
- `auto_find_batch_size`: False
|
| 484 |
+
- `full_determinism`: False
|
| 485 |
+
- `torchdynamo`: None
|
| 486 |
+
- `ray_scope`: last
|
| 487 |
+
- `ddp_timeout`: 1800
|
| 488 |
+
- `torch_compile`: False
|
| 489 |
+
- `torch_compile_backend`: None
|
| 490 |
+
- `torch_compile_mode`: None
|
| 491 |
+
- `include_tokens_per_second`: False
|
| 492 |
+
- `include_num_input_tokens_seen`: no
|
| 493 |
+
- `neftune_noise_alpha`: None
|
| 494 |
+
- `optim_target_modules`: None
|
| 495 |
+
- `batch_eval_metrics`: False
|
| 496 |
+
- `eval_on_start`: False
|
| 497 |
+
- `use_liger_kernel`: False
|
| 498 |
+
- `liger_kernel_config`: None
|
| 499 |
+
- `eval_use_gather_object`: False
|
| 500 |
+
- `average_tokens_across_devices`: True
|
| 501 |
+
- `prompts`: Instruct: Given a web search query, retrieve relevant passages that answer the query
|
| 502 |
+
Query:
|
| 503 |
+
- `batch_sampler`: batch_sampler
|
| 504 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 505 |
+
- `router_mapping`: {}
|
| 506 |
+
- `learning_rate_mapping`: {}
|
| 507 |
+
|
| 508 |
+
</details>
|
| 509 |
+
|
| 510 |
+
### Framework Versions
|
| 511 |
+
- Python: 3.12.11
|
| 512 |
+
- Sentence Transformers: 5.1.0
|
| 513 |
+
- Transformers: 4.57.0
|
| 514 |
+
- PyTorch: 2.7.1+cu126
|
| 515 |
+
- Accelerate: 1.10.0
|
| 516 |
+
- Datasets: 3.6.0
|
| 517 |
+
- Tokenizers: 0.22.1
|
| 518 |
+
|
| 519 |
+
## Citation
|
| 520 |
+
|
| 521 |
+
### BibTeX
|
| 522 |
+
|
| 523 |
+
#### Sentence Transformers
|
| 524 |
+
```bibtex
|
| 525 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 526 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 527 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 528 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 529 |
+
month = "11",
|
| 530 |
+
year = "2019",
|
| 531 |
+
publisher = "Association for Computational Linguistics",
|
| 532 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 533 |
+
}
|
| 534 |
+
```
|
| 535 |
+
|
| 536 |
+
#### CachedMultipleNegativesRankingLoss
|
| 537 |
+
```bibtex
|
| 538 |
+
@misc{gao2021scaling,
|
| 539 |
+
title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
|
| 540 |
+
author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
|
| 541 |
+
year={2021},
|
| 542 |
+
eprint={2101.06983},
|
| 543 |
+
archivePrefix={arXiv},
|
| 544 |
+
primaryClass={cs.LG}
|
| 545 |
+
}
|
| 546 |
+
```
|
| 547 |
+
|
| 548 |
+
<!--
|
| 549 |
+
## Glossary
|
| 550 |
+
|
| 551 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 552 |
+
-->
|
| 553 |
+
|
| 554 |
+
<!--
|
| 555 |
+
## Model Card Authors
|
| 556 |
+
|
| 557 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 558 |
+
-->
|
| 559 |
+
|
| 560 |
+
<!--
|
| 561 |
+
## Model Card Contact
|
| 562 |
+
|
| 563 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 564 |
+
-->
|
last-checkpoint/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
last-checkpoint/chat_template.jinja
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{{- messages[0].content + '\n\n' }}
|
| 5 |
+
{%- endif %}
|
| 6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 7 |
+
{%- for tool in tools %}
|
| 8 |
+
{{- "\n" }}
|
| 9 |
+
{{- tool | tojson }}
|
| 10 |
+
{%- endfor %}
|
| 11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 12 |
+
{%- else %}
|
| 13 |
+
{%- if messages[0].role == 'system' %}
|
| 14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
+
{%- endif %}
|
| 16 |
+
{%- endif %}
|
| 17 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 18 |
+
{%- for message in messages[::-1] %}
|
| 19 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 20 |
+
{%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
| 21 |
+
{%- set ns.multi_step_tool = false %}
|
| 22 |
+
{%- set ns.last_query_index = index %}
|
| 23 |
+
{%- endif %}
|
| 24 |
+
{%- endfor %}
|
| 25 |
+
{%- for message in messages %}
|
| 26 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 27 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 28 |
+
{%- elif message.role == "assistant" %}
|
| 29 |
+
{%- set content = message.content %}
|
| 30 |
+
{%- set reasoning_content = '' %}
|
| 31 |
+
{%- if message.reasoning_content is defined and message.reasoning_content is not none %}
|
| 32 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 33 |
+
{%- else %}
|
| 34 |
+
{%- if '</think>' in message.content %}
|
| 35 |
+
{%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
|
| 36 |
+
{%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 37 |
+
{%- endif %}
|
| 38 |
+
{%- endif %}
|
| 39 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 40 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 41 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 42 |
+
{%- else %}
|
| 43 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 44 |
+
{%- endif %}
|
| 45 |
+
{%- else %}
|
| 46 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 47 |
+
{%- endif %}
|
| 48 |
+
{%- if message.tool_calls %}
|
| 49 |
+
{%- for tool_call in message.tool_calls %}
|
| 50 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 51 |
+
{{- '\n' }}
|
| 52 |
+
{%- endif %}
|
| 53 |
+
{%- if tool_call.function %}
|
| 54 |
+
{%- set tool_call = tool_call.function %}
|
| 55 |
+
{%- endif %}
|
| 56 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 57 |
+
{{- tool_call.name }}
|
| 58 |
+
{{- '", "arguments": ' }}
|
| 59 |
+
{%- if tool_call.arguments is string %}
|
| 60 |
+
{{- tool_call.arguments }}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{{- tool_call.arguments | tojson }}
|
| 63 |
+
{%- endif %}
|
| 64 |
+
{{- '}\n</tool_call>' }}
|
| 65 |
+
{%- endfor %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{{- '<|im_end|>\n' }}
|
| 68 |
+
{%- elif message.role == "tool" %}
|
| 69 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 70 |
+
{{- '<|im_start|>user' }}
|
| 71 |
+
{%- endif %}
|
| 72 |
+
{{- '\n<tool_response>\n' }}
|
| 73 |
+
{{- message.content }}
|
| 74 |
+
{{- '\n</tool_response>' }}
|
| 75 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 76 |
+
{{- '<|im_end|>\n' }}
|
| 77 |
+
{%- endif %}
|
| 78 |
+
{%- endif %}
|
| 79 |
+
{%- endfor %}
|
| 80 |
+
{%- if add_generation_prompt %}
|
| 81 |
+
{{- '<|im_start|>assistant\n' }}
|
| 82 |
+
{%- if enable_thinking is defined and enable_thinking is false %}
|
| 83 |
+
{{- '<think>\n\n</think>\n\n' }}
|
| 84 |
+
{%- endif %}
|
| 85 |
+
{%- endif %}
|
last-checkpoint/config.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3Model"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_id": 151643,
|
| 10 |
+
"head_dim": 128,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_types": [
|
| 16 |
+
"full_attention",
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention"
|
| 44 |
+
],
|
| 45 |
+
"max_position_embeddings": 32768,
|
| 46 |
+
"max_window_layers": 28,
|
| 47 |
+
"model_type": "qwen3",
|
| 48 |
+
"num_attention_heads": 16,
|
| 49 |
+
"num_hidden_layers": 28,
|
| 50 |
+
"num_key_value_heads": 8,
|
| 51 |
+
"rms_norm_eps": 1e-06,
|
| 52 |
+
"rope_scaling": null,
|
| 53 |
+
"rope_theta": 1000000,
|
| 54 |
+
"sliding_window": null,
|
| 55 |
+
"tie_word_embeddings": true,
|
| 56 |
+
"transformers_version": "4.57.0",
|
| 57 |
+
"use_cache": true,
|
| 58 |
+
"use_sliding_window": false,
|
| 59 |
+
"vocab_size": 151669
|
| 60 |
+
}
|
last-checkpoint/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"prompts": {
|
| 3 |
+
"query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:",
|
| 4 |
+
"document": ""
|
| 5 |
+
},
|
| 6 |
+
"default_prompt_name": null,
|
| 7 |
+
"similarity_fn_name": "cosine",
|
| 8 |
+
"model_type": "SentenceTransformer",
|
| 9 |
+
"__version__": {
|
| 10 |
+
"sentence_transformers": "5.1.0",
|
| 11 |
+
"transformers": "4.57.0",
|
| 12 |
+
"pytorch": "2.7.1+cu126"
|
| 13 |
+
}
|
| 14 |
+
}
|
last-checkpoint/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
last-checkpoint/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:634d9cd269fe0d583a546cd77180b2ac581442abc3157067fdc0383e98b38aee
|
| 3 |
+
size 2383139480
|
last-checkpoint/modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
last-checkpoint/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df87da3cebba22cacef885c6056555a0042bb358208399b555c8c2f46954598a
|
| 3 |
+
size 4766477779
|
last-checkpoint/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:866c0d16e0ba412369309f8c0cbcb2a24695a6a2921557f033b691f2cb5b2dd8
|
| 3 |
+
size 14645
|
last-checkpoint/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc97c427c4a1a55e7821fe5b53aa4d6490c1ab3a4c5ef79e3eadb4afd22c88ce
|
| 3 |
+
size 1465
|
last-checkpoint/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
last-checkpoint/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
last-checkpoint/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c87c38db060bafb0122019c0c749ec1eb1ae510dae43c93f0042ec51099942e8
|
| 3 |
+
size 11423971
|
last-checkpoint/tokenizer_config.json
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"clean_up_tokenization_spaces": false,
|
| 231 |
+
"eos_token": "<|im_end|>",
|
| 232 |
+
"errors": "replace",
|
| 233 |
+
"extra_special_tokens": {},
|
| 234 |
+
"model_max_length": 131072,
|
| 235 |
+
"pad_token": "<|endoftext|>",
|
| 236 |
+
"split_special_tokens": false,
|
| 237 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 238 |
+
"unk_token": null
|
| 239 |
+
}
|
last-checkpoint/trainer_state.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 43,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [],
|
| 12 |
+
"logging_steps": 50,
|
| 13 |
+
"max_steps": 43,
|
| 14 |
+
"num_input_tokens_seen": 0,
|
| 15 |
+
"num_train_epochs": 1,
|
| 16 |
+
"save_steps": 200,
|
| 17 |
+
"stateful_callbacks": {
|
| 18 |
+
"TrainerControl": {
|
| 19 |
+
"args": {
|
| 20 |
+
"should_epoch_stop": false,
|
| 21 |
+
"should_evaluate": false,
|
| 22 |
+
"should_log": false,
|
| 23 |
+
"should_save": true,
|
| 24 |
+
"should_training_stop": true
|
| 25 |
+
},
|
| 26 |
+
"attributes": {}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"total_flos": 0.0,
|
| 30 |
+
"train_batch_size": 512,
|
| 31 |
+
"trial_name": null,
|
| 32 |
+
"trial_params": null
|
| 33 |
+
}
|
last-checkpoint/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b399764abfbe075f6d000718b12ff8e0f9afe70f5d9ff48947a3a3a2d8fa976
|
| 3 |
+
size 6289
|
last-checkpoint/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|