Training in progress, step 200, checkpoint
Browse files- .gitattributes +1 -0
- last-checkpoint/1_Pooling/config.json +10 -0
- last-checkpoint/2_Dense/config.json +6 -0
- last-checkpoint/2_Dense/model.safetensors +3 -0
- last-checkpoint/3_Dense/config.json +6 -0
- last-checkpoint/3_Dense/model.safetensors +3 -0
- last-checkpoint/README.md +695 -0
- last-checkpoint/added_tokens.json +28 -0
- last-checkpoint/chat_template.jinja +85 -0
- last-checkpoint/config.json +60 -0
- last-checkpoint/config_sentence_transformers.json +14 -0
- last-checkpoint/merges.txt +0 -0
- last-checkpoint/model.safetensors +3 -0
- last-checkpoint/modules.json +20 -0
- last-checkpoint/optimizer.pt +3 -0
- last-checkpoint/rng_state.pth +3 -0
- last-checkpoint/scheduler.pt +3 -0
- last-checkpoint/sentence_bert_config.json +4 -0
- last-checkpoint/special_tokens_map.json +31 -0
- last-checkpoint/tokenizer.json +3 -0
- last-checkpoint/tokenizer.model +3 -0
- last-checkpoint/tokenizer_config.json +239 -0
- last-checkpoint/trainer_state.json +62 -0
- last-checkpoint/training_args.bin +3 -0
- last-checkpoint/vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
last-checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
last-checkpoint/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": false,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": true,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
last-checkpoint/2_Dense/config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"in_features": 768,
|
| 3 |
+
"out_features": 3072,
|
| 4 |
+
"bias": false,
|
| 5 |
+
"activation_function": "torch.nn.modules.linear.Identity"
|
| 6 |
+
}
|
last-checkpoint/2_Dense/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fed83dfd00c1a0263f07eea8794b9265914ae7b3dc5c76729cf3807e2861adc3
|
| 3 |
+
size 9437272
|
last-checkpoint/3_Dense/config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"in_features": 3072,
|
| 3 |
+
"out_features": 768,
|
| 4 |
+
"bias": false,
|
| 5 |
+
"activation_function": "torch.nn.modules.linear.Identity"
|
| 6 |
+
}
|
last-checkpoint/3_Dense/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87b5e471a0697253d32e596ab7ab53200a19437e9d28de12f4dc211852102b58
|
| 3 |
+
size 9437272
|
last-checkpoint/README.md
ADDED
|
@@ -0,0 +1,695 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:552482
|
| 9 |
+
- loss:CachedMultipleNegativesRankingLoss
|
| 10 |
+
base_model: Qwen/Qwen3-Embedding-0.6B
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: "title: \nCreatto Flashy Fish Silly Swimmers LightUp 3D Puzzle\
|
| 13 |
+
\ Kit Includes Creatto Puzzle Pieces to Make Illuminated Craft Creations Sting\
|
| 14 |
+
\ Ray Turtle Crab Fish DIY Activity LED Lights"
|
| 15 |
+
sentences:
|
| 16 |
+
- 'description
|
| 17 |
+
|
| 18 |
+
With 43 Creatto pieces and a string of 20 LED lights you can build a cute cartoony
|
| 19 |
+
fish a crab a stingray and a turtle Batterypowered lights require 2 AA batteries
|
| 20 |
+
Suitable for ages 8 and upMAKE Each Creatto kit consists of just two primary building
|
| 21 |
+
components that easily piece together into countless different 3D creations
|
| 22 |
+
|
| 23 |
+
PLAY Once your creation is complete enjoy it as you play It becomes a prop for
|
| 24 |
+
carrying out infinite adventures and storylines
|
| 25 |
+
|
| 26 |
+
DISPLAY Your Creatto doubles as a piece of artwork Display it in your room and
|
| 27 |
+
let it showcase your creativity and individuality The string of LED lights makes
|
| 28 |
+
for a particularly striking sight
|
| 29 |
+
|
| 30 |
+
Building instructions are language independent just follow the visual steps to
|
| 31 |
+
learn how the Creatto system worksand how easy it is to use
|
| 32 |
+
|
| 33 |
+
Stepbystep building videos for the models included in the kit a cute cartoony
|
| 34 |
+
fish a crab a stingray and a turtle are available online'
|
| 35 |
+
- 'description
|
| 36 |
+
|
| 37 |
+
Bring home your favorite characters from Middleearth The Lord of the Rings Frodo
|
| 38 |
+
Baggins Pop Vinyl Figure steps right off the silver screen of Peter Jacksons The
|
| 39 |
+
Lord of the Rings trilogy and into that familiar Funko format The Lord of the
|
| 40 |
+
Rings Frodo Baggins Pop Vinyl Figure measures approximately 3 34inches tall and
|
| 41 |
+
comes packaged in a window display box CHASE VERSIONFrom The Lord of the Rings
|
| 42 |
+
Gandalf as a stylized POP vinyl from Funko'
|
| 43 |
+
- 'description
|
| 44 |
+
|
| 45 |
+
Specification
|
| 46 |
+
|
| 47 |
+
Condition 100 Brand New
|
| 48 |
+
|
| 49 |
+
Item Type RC Accessory
|
| 50 |
+
|
| 51 |
+
Material Metal Magnet
|
| 52 |
+
|
| 53 |
+
Color Black
|
| 54 |
+
|
| 55 |
+
Length Approx 120mm 472inch
|
| 56 |
+
|
| 57 |
+
Diameter Approx 66mm 260inch Fins Not Included
|
| 58 |
+
|
| 59 |
+
Shaft Diameter Approx 8mm 031inch
|
| 60 |
+
|
| 61 |
+
Length of Extension Shaft Approx 30mm 118inch
|
| 62 |
+
|
| 63 |
+
Weight Approx 18112g
|
| 64 |
+
|
| 65 |
+
Power 10000W
|
| 66 |
+
|
| 67 |
+
imum LiPo 814S
|
| 68 |
+
|
| 69 |
+
imum Current 200A
|
| 70 |
+
|
| 71 |
+
Pole 4
|
| 72 |
+
|
| 73 |
+
IO 45A
|
| 74 |
+
|
| 75 |
+
Resistance 00095
|
| 76 |
+
|
| 77 |
+
KVRPMVolt 620
|
| 78 |
+
|
| 79 |
+
RPM 35000
|
| 80 |
+
|
| 81 |
+
Package List
|
| 82 |
+
|
| 83 |
+
1 70120 620KV Brushless MotorLevel 4Brushless motor 4pole 12slot height design
|
| 84 |
+
Design for maximum energy conversion
|
| 85 |
+
|
| 86 |
+
UniversalUniversal RC car motor suitable for 15 scale brushless remote control
|
| 87 |
+
car also suitable for electric skateboard
|
| 88 |
+
|
| 89 |
+
ReliableThe Sensorless brushless motor precisely balances the rotor to ensure
|
| 90 |
+
reliability and maximum speed
|
| 91 |
+
|
| 92 |
+
High efficiency RC car engine starter kit highpurity copper winding maximizes
|
| 93 |
+
efficiency
|
| 94 |
+
|
| 95 |
+
StableRC brushless motor accessoryABEC5 bearing stability'
|
| 96 |
+
- source_sentence: "title: \nLEGO Friends Heartlake Surf Shop 41315 Building Kit 186\
|
| 97 |
+
\ Pieces Discontinued by Manufacturer"
|
| 98 |
+
sentences:
|
| 99 |
+
- 'description
|
| 100 |
+
|
| 101 |
+
Take one of the most popular games with you wherever you go The Coop Hydro Lacrosse
|
| 102 |
+
game set is great for playing lacrosse at the pool beach or in the backyard The
|
| 103 |
+
soft foam handles of this fun water toy make it easy for all skill levels to catch
|
| 104 |
+
scoop cradle and throw the ball Coop Hydro Lacrosse is an excellent choice for
|
| 105 |
+
encouraging young players who are not quite ready for a larger lacrosse stick
|
| 106 |
+
Features quality construction for longlasting play and the colorful ontrend graphics
|
| 107 |
+
add to the fun Two lacrosse sticks and one ball in each set Each set sold individuallyFUN
|
| 108 |
+
OUTDOOR GAME Lacrosse game set is perfect for pool beach or backyard play
|
| 109 |
+
|
| 110 |
+
EASY TO PLAY catch scoop throw and cradle with ease Great for young lacrosse players
|
| 111 |
+
and the whole family
|
| 112 |
+
|
| 113 |
+
DURABLE Plastic frame maintains shape for long time use The lacrosse stick is
|
| 114 |
+
strong with a foam handle for comfort
|
| 115 |
+
|
| 116 |
+
ALL PIECES INCLUDED Each set includes 2 lacrosse sticks and 1 soft ball'
|
| 117 |
+
- 'description
|
| 118 |
+
|
| 119 |
+
ACHIEVE TRUE RANDOM
|
| 120 |
+
|
| 121 |
+
What makes a single die Casino Quality Casino dice must be manufactured with superlative
|
| 122 |
+
care to exacting standards in order to meet stringent casino specifications Every
|
| 123 |
+
die must be truly random which requires it to have razorsharp edges and flush
|
| 124 |
+
pips so each face has equal weight A typical casino die is a perfect 19mm cube
|
| 125 |
+
with an error margin of up to 3mm The error margin of our Grade AAA dice 05mm
|
| 126 |
+
Thats thinner than 13 the thickness of an eyelash These dice look casino authentic
|
| 127 |
+
are manufactured using the authentic process and with an authentic quality indistinguishable
|
| 128 |
+
from the very same dice used on casino floors every day They make the perfect
|
| 129 |
+
gift for any gaming enthusiast that wants to get as close to the Vegas tables
|
| 130 |
+
as possible
|
| 131 |
+
|
| 132 |
+
Why Youll Love It
|
| 133 |
+
|
| 134 |
+
Our dice began as high quality raw materials cut into cubes with razorsharp edges
|
| 135 |
+
Each of the 21 pips are drilled to identical depths and to ensure that all 6 sides
|
| 136 |
+
maintain equal weight the holes are then filled with a special white epoxy with
|
| 137 |
+
the precise weight and density of the material that was removed during the drilling
|
| 138 |
+
process Pairs of opposite sides are then abraded and cut in sequence to ensure
|
| 139 |
+
each side remains the same size The dice are then given a glossy polished finish
|
| 140 |
+
before being professionally serialized to prevent tampering and preserve authenticityMAKE
|
| 141 |
+
THE GRADE Our Fine Precision Dice meet or exceed all exacting casino dice specifications
|
| 142 |
+
for the most professional gaming experience
|
| 143 |
+
|
| 144 |
+
TRUE PRECISION Dice are cut with an exacting error margin of 05mm which is about
|
| 145 |
+
13 the thickness of an eyelash
|
| 146 |
+
|
| 147 |
+
RAZORS EDGE Real Casino Dice are known for having sharp razor edges and flush
|
| 148 |
+
pips drilled to identical depths This ensures truly random rolls
|
| 149 |
+
|
| 150 |
+
TAMPER PROOF All sets of Brybelly Casino Dice are professionally serialized as
|
| 151 |
+
a set of 5 to prevent tampering
|
| 152 |
+
|
| 153 |
+
GOOD AS GOLD These sets make the perfect gift for any gaming enthusiast Each set
|
| 154 |
+
even comes wrapped in giftable golden foil'
|
| 155 |
+
- 'description
|
| 156 |
+
|
| 157 |
+
Your explorer can celebrate their summer excitement with all of the accessories
|
| 158 |
+
in the LEGO Friends Heartlake Surf Shop set Kids can catch a wave and create stories
|
| 159 |
+
about the adventures they have on their boards Builders will love to create summer
|
| 160 |
+
memories with this set Multiple models and plenty of extras makes this set a great
|
| 161 |
+
value and combine it with LEGO Friends Sunshine Catamaran and Andreas Speedboat
|
| 162 |
+
Transporter to continue the summer funFeatures a surf shop with opening door beach
|
| 163 |
+
area with sun lounger jetty with buoys surfboard stand buildable kayak and a rock
|
| 164 |
+
for the seal
|
| 165 |
+
|
| 166 |
+
Includes a Mia minidoll figure in a wetsuit plus a Velvet the seal figure
|
| 167 |
+
|
| 168 |
+
Accessory elements include a paddle 2 surfboards waterproof camera sunglasses
|
| 169 |
+
binoculars water bottles postcard and a cash register
|
| 170 |
+
|
| 171 |
+
Surf shop measures over 4in 11cm high 5in 14cm wide and 6in 17cm deep
|
| 172 |
+
|
| 173 |
+
Kayak measures over 4in 12cm long and under 1in 1cm high and 1in 2cm wide'
|
| 174 |
+
- source_sentence: "title: \nTerra by Battat Horses Set Detailed Miniature Horse\
|
| 175 |
+
\ Toys and Farm Animals with Quarter Horse for Kids 3 6 pc"
|
| 176 |
+
sentences:
|
| 177 |
+
- 'description
|
| 178 |
+
|
| 179 |
+
Princess Bride is the beloved fantasy adventure that celebrates true love and
|
| 180 |
+
follows the adventures of one farmhandturnedpirate as he fights for his true love
|
| 181 |
+
Featuring a memorable cast including Billy Crystal and Andre the Giant If youre
|
| 182 |
+
a fan of Princess Bride you need these cool diecut vinyl stickers All our products
|
| 183 |
+
are officially licensed and 100 authenticExclusive custom collectible and 100
|
| 184 |
+
official vinyl diecut stickers
|
| 185 |
+
|
| 186 |
+
Each one of our designs are officially licensed and 100 authentic
|
| 187 |
+
|
| 188 |
+
Perfect to personalize Laptops MacBooks Surfaces iPads Lockers Bedroom and much
|
| 189 |
+
more Great for DIY decoration a gift for your kids and friends
|
| 190 |
+
|
| 191 |
+
Simply get the stickers clean the surface sticker on and enjoy 30 Day 100 Satisfaction
|
| 192 |
+
is guarantee
|
| 193 |
+
|
| 194 |
+
Sticker Sheet Size 4 x 7'
|
| 195 |
+
- 'description
|
| 196 |
+
|
| 197 |
+
Peppa Pig Tin Lunch Box With Puzzle Set 5 Pc Bundle with Peppa Lunch Box 24Piece
|
| 198 |
+
Puzzle for Kids Stickers Flash Cards and More Peppa Pig Activity Set This Peppa
|
| 199 |
+
Pig lunch and puzzle set comes with 1 Peppa Pig tin lunch box measuring 775 x
|
| 200 |
+
3 x 62 with 1 24piece Peppa Pig jigsaw puzzle for kids and toddlers inside The
|
| 201 |
+
Peppa Pig puzzle measures 15 x 125 inches when complete and features your favorite
|
| 202 |
+
characters from Peppa Pig Ideal for kids ages 35 and 48 boys and girls alike This
|
| 203 |
+
Peppa Pig activity set is great at home or around town Use the Peppa Pig tin lunch
|
| 204 |
+
box for school lunch or keep it as your puzzle storage case Officially licensed
|
| 205 |
+
Peppa Pig puzzles and games The Peppa Pig tin lunch box and puzzle activity also
|
| 206 |
+
comes with 3 packs of Peppa Pig flash cards Peppa Pig stickers and a Barn Bots
|
| 207 |
+
door hangerThis Peppa Pig lunch and puzzle set comes with 1 Peppa Pig tin lunch
|
| 208 |
+
box measuring 775 x 3 x 62 with 1 24piece Peppa Pig jigsaw puzzle for kids and
|
| 209 |
+
toddlers inside
|
| 210 |
+
|
| 211 |
+
The Peppa Pig puzzle measures 15 x 125 inches when complete and features your
|
| 212 |
+
favorite characters from Peppa Pig Ideal for kids ages 35 and 48 boys and girls
|
| 213 |
+
alike
|
| 214 |
+
|
| 215 |
+
This Peppa Pig activity set is great at home or around town Use the Peppa Pig
|
| 216 |
+
tin lunch box for school lunch or keep it as your puzzle storage case
|
| 217 |
+
|
| 218 |
+
Officially licensed Peppa Pig puzzles and games The Peppa Pig tin lunch box and
|
| 219 |
+
puzzle activity also comes with 3 packs of Peppa Pig flash cards and 1 Peppa Pig
|
| 220 |
+
play pack containing stickers coloring pages and coloring utensils
|
| 221 |
+
|
| 222 |
+
Skill level Kids Children Girls Boys Toddlers'
|
| 223 |
+
- 'description
|
| 224 |
+
|
| 225 |
+
Theres no horsing around with this set of 6 classic Horse toys for toddlers from
|
| 226 |
+
Terra by Battat Little ones and collectors alike will love the adorable and realistic
|
| 227 |
+
miniature animals Each plastic horse has individual characteristics and details
|
| 228 |
+
from the speedy quarter Horse to the graceful pizzas and the hardworking Shire
|
| 229 |
+
The carefullycrafted horses are made from safe and durable plastic that looks
|
| 230 |
+
great in any animal figure collection Your pony toy will be proud to share the
|
| 231 |
+
toy horse stable with these gorgeous Fillies Saddle up this collection of farm
|
| 232 |
+
animal toy figures from Terra by Battat6 detailed horse toy figurines with natural
|
| 233 |
+
poses that stand on their own hooves
|
| 234 |
+
|
| 235 |
+
Beautifully crafted lifelike toys with realistic details
|
| 236 |
+
|
| 237 |
+
Includes 1 quarter Horse 1 Friesian 1 falabella 1 lipizzan 1 spotted saddle and
|
| 238 |
+
1 Shire horse toy
|
| 239 |
+
|
| 240 |
+
Educational toys provide information about horses and encourage compassion for
|
| 241 |
+
animals
|
| 242 |
+
|
| 243 |
+
Imaginative play reduces screen time while improving social skills
|
| 244 |
+
|
| 245 |
+
Size horse toys approximately 4 05 0 inches long
|
| 246 |
+
|
| 247 |
+
Recommended for children ages 3 and up
|
| 248 |
+
|
| 249 |
+
This set of small animals is made of worryfree materials We make toys that are
|
| 250 |
+
safe and sturdy at terra
|
| 251 |
+
|
| 252 |
+
Discover the entire Terra by Battat family of toy animal figures'
|
| 253 |
+
- source_sentence: "title: \nDisguise Costumes Sequin Eye Mask Bright Red"
|
| 254 |
+
sentences:
|
| 255 |
+
- "description\nMagic the Gathering is a collectible card game created by Richard\
|
| 256 |
+
\ Garfield In Magic you play the role of a planeswalker who fights other planeswalkers\
|
| 257 |
+
\ for glory knowledge and conquest Your deck of cards represents all the weapons\
|
| 258 |
+
\ in your arsenal It contains the spells you know and the creatures you can summon\
|
| 259 |
+
\ to fight for you\nCard Name\n Grand Arbiter Augustin IV\nCost\n 2WU\nColor\n\
|
| 260 |
+
\ MultiColor\nCard Type\n Legendary Creature Human Advisor\nPowerToughness\n\
|
| 261 |
+
\ 23\nCard Text\n White spells you play cost 1 less to play Blue spells you play\
|
| 262 |
+
\ cost 1 less to play Spells your opponents play cost 1 more to playA single individual\
|
| 263 |
+
\ card from the Magic the Gathering MTG trading and collectible card game TCGCCG\n\
|
| 264 |
+
This is of Rare rarity\nFrom the Dissension set"
|
| 265 |
+
- 'description
|
| 266 |
+
|
| 267 |
+
Disguise Costumes has been making fun interactive costumes for children and adults
|
| 268 |
+
for decades Trusted in the industry for its creativity ingenuity and amazing price
|
| 269 |
+
for the value Disguise wont let you down when it comes to fun dress up timeQuality
|
| 270 |
+
materials used to make Disguise products
|
| 271 |
+
|
| 272 |
+
Fun Colorful Inventive designs to put you in the world of role play
|
| 273 |
+
|
| 274 |
+
Whether its Halloween birthday parties or even a fun filled night disguise is
|
| 275 |
+
good for everything'
|
| 276 |
+
- 'description
|
| 277 |
+
|
| 278 |
+
A colourful coffee maker set from New Classic Toys Who wants a coffee Everything
|
| 279 |
+
you need for a good coffee This coffee machine set includes two coffee pods to
|
| 280 |
+
insert in the removable filter a milk carton two brightly painted coffee cups
|
| 281 |
+
with saucers This article is part of the New Classic Toys Bon Appetit productline
|
| 282 |
+
Cutting baking cooking and ready for dinner Our little pots pans cuttings sets
|
| 283 |
+
and kitchens are absolute must haves for little master chefs100 wood
|
| 284 |
+
|
| 285 |
+
Safety we want to challenge excite and enjoy children with high quality toys All
|
| 286 |
+
products comply with the strictest safety requirements conform the standards Our
|
| 287 |
+
products last for generations And we are very proud of this
|
| 288 |
+
|
| 289 |
+
Fun this beautiful toy is colourful and has many fun options to play with The
|
| 290 |
+
color is very bright Children would spend lot of time playing and parents finally
|
| 291 |
+
could get some rest
|
| 292 |
+
|
| 293 |
+
Learning Playing is not just fun it is also very important While playing kids
|
| 294 |
+
get familiar with the world Building stacking making music or imitating playing
|
| 295 |
+
contributes to the development of insight talent and motor skills
|
| 296 |
+
|
| 297 |
+
Age Designed for anyone aged 3 and up this imagination toy can be enjoyed by every
|
| 298 |
+
child in the family
|
| 299 |
+
|
| 300 |
+
Bon Appetit This toy set is part of the Bon Appetit collection of New Classic
|
| 301 |
+
Toys Cutting baking cooking and ready for dinner Our little pots pans cuttings
|
| 302 |
+
sets and kitchens are absolute must haves for little master chefs'
|
| 303 |
+
- source_sentence: "title: \nDH top bottom main gear for 910108 or 905308 or 905008"
|
| 304 |
+
sentences:
|
| 305 |
+
- 'description
|
| 306 |
+
|
| 307 |
+
Lord of the Fries is the followup to Give Me the Brain another game in the Fast
|
| 308 |
+
Food Restaurant of the Damned The mechanic is simple combine the ingredients in
|
| 309 |
+
your hand to build combo meals with dishes like the Cowabunga the Meat Munch and
|
| 310 |
+
of course the Lord of the Fries This is the fourth edition of Lord of the Fries
|
| 311 |
+
It was introduced in 1998 and has been updated and improved in every edition The
|
| 312 |
+
game returns to Cheapass Games after a brief vacation at Steve Jackson Games This
|
| 313 |
+
time the game is designed to expand The core box contains the original restaurant
|
| 314 |
+
menu 55 cards plus a completely new Coffee Shop menu with allnew art and ingredients
|
| 315 |
+
also 55 cards Each deck can support up to 6 players and there is a third menu
|
| 316 |
+
using ingredients from both decks which supports up to 8 players Also arriving
|
| 317 |
+
on the scene will be four standalone restaurant decks Mexican Chinese Italian
|
| 318 |
+
and Irish Each deck plays alone for up to 6 players fits into the core game box
|
| 319 |
+
and can combine with the core deck for up to 8 playersLightweight funny card game
|
| 320 |
+
make combo meals from random ingredients
|
| 321 |
+
|
| 322 |
+
Zombies All new card art by Brian Snoddy
|
| 323 |
+
|
| 324 |
+
Includes a complete Coffee Shop expansion
|
| 325 |
+
|
| 326 |
+
Designed by awardwinning game designer James Ernest
|
| 327 |
+
|
| 328 |
+
All new card art by Brian Snoddy'
|
| 329 |
+
- 'description
|
| 330 |
+
|
| 331 |
+
From the Manufacturer
|
| 332 |
+
|
| 333 |
+
Round 2s Polar Lights brings back another popular movie car the Time Machine made
|
| 334 |
+
famous in Back to the Future The kit features every detail to be expected in a
|
| 335 |
+
Polar Lights kit from the flux capacitor to its gullwing doors A lightning rod
|
| 336 |
+
hook power assembly is also included as an option The car body features a new
|
| 337 |
+
authentic looking brushed metal look that captures the brushed aluminum of the
|
| 338 |
+
real car The tires roll and the snap assembly means the kit can be assembled in
|
| 339 |
+
no time with great results With this model kit youre sure to hit 88 miles an hour
|
| 340 |
+
and be outta timeOfficially licensed from the classic film
|
| 341 |
+
|
| 342 |
+
Features every detail including the Flux Capacitor
|
| 343 |
+
|
| 344 |
+
Authentic brushedmetal look
|
| 345 |
+
|
| 346 |
+
Features rolling tires
|
| 347 |
+
|
| 348 |
+
Snap assembly'
|
| 349 |
+
- 'description
|
| 350 |
+
|
| 351 |
+
top bottom main gear for 910108 or 905308 or 9050081 set top bottom main geartop
|
| 352 |
+
bottom main gear for 910108 or 905308 or 905008'
|
| 353 |
+
datasets:
|
| 354 |
+
- guyhadad01/Amazon_2023_items_processed_filtered
|
| 355 |
+
pipeline_tag: sentence-similarity
|
| 356 |
+
library_name: sentence-transformers
|
| 357 |
+
---
|
| 358 |
+
|
| 359 |
+
# SentenceTransformer based on Qwen/Qwen3-Embedding-0.6B
|
| 360 |
+
|
| 361 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Qwen/Qwen3-Embedding-0.6B](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B) on the [amazon_2023_items_processed_filtered](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered) dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 362 |
+
|
| 363 |
+
## Model Details
|
| 364 |
+
|
| 365 |
+
### Model Description
|
| 366 |
+
- **Model Type:** Sentence Transformer
|
| 367 |
+
- **Base model:** [Qwen/Qwen3-Embedding-0.6B](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B) <!-- at revision c54f2e6e80b2d7b7de06f51cec4959f6b3e03418 -->
|
| 368 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 369 |
+
- **Output Dimensionality:** 1024 dimensions
|
| 370 |
+
- **Similarity Function:** Cosine Similarity
|
| 371 |
+
- **Training Dataset:**
|
| 372 |
+
- [amazon_2023_items_processed_filtered](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered)
|
| 373 |
+
<!-- - **Language:** Unknown -->
|
| 374 |
+
<!-- - **License:** Unknown -->
|
| 375 |
+
|
| 376 |
+
### Model Sources
|
| 377 |
+
|
| 378 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 379 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 380 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 381 |
+
|
| 382 |
+
### Full Model Architecture
|
| 383 |
+
|
| 384 |
+
```
|
| 385 |
+
SentenceTransformer(
|
| 386 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'Qwen3Model'})
|
| 387 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': True, 'include_prompt': True})
|
| 388 |
+
(2): Normalize()
|
| 389 |
+
)
|
| 390 |
+
```
|
| 391 |
+
|
| 392 |
+
## Usage
|
| 393 |
+
|
| 394 |
+
### Direct Usage (Sentence Transformers)
|
| 395 |
+
|
| 396 |
+
First install the Sentence Transformers library:
|
| 397 |
+
|
| 398 |
+
```bash
|
| 399 |
+
pip install -U sentence-transformers
|
| 400 |
+
```
|
| 401 |
+
|
| 402 |
+
Then you can load this model and run inference.
|
| 403 |
+
```python
|
| 404 |
+
from sentence_transformers import SentenceTransformer
|
| 405 |
+
|
| 406 |
+
# Download from the 🤗 Hub
|
| 407 |
+
model = SentenceTransformer("guyhadad01/EncodeRec_600M_Toys")
|
| 408 |
+
# Run inference
|
| 409 |
+
queries = [
|
| 410 |
+
"title: \nDH top bottom main gear for 910108 or 905308 or 905008",
|
| 411 |
+
]
|
| 412 |
+
documents = [
|
| 413 |
+
'description\ntop bottom main gear for 910108 or 905308 or 9050081 set top bottom main geartop bottom main gear for 910108 or 905308 or 905008',
|
| 414 |
+
'description\nFrom the Manufacturer\nRound 2s Polar Lights brings back another popular movie car the Time Machine made famous in Back to the Future The kit features every detail to be expected in a Polar Lights kit from the flux capacitor to its gullwing doors A lightning rod hook power assembly is also included as an option The car body features a new authentic looking brushed metal look that captures the brushed aluminum of the real car The tires roll and the snap assembly means the kit can be assembled in no time with great results With this model kit youre sure to hit 88 miles an hour and be outta timeOfficially licensed from the classic film\nFeatures every detail including the Flux Capacitor\nAuthentic brushedmetal look\nFeatures rolling tires\nSnap assembly',
|
| 415 |
+
'description\nLord of the Fries is the followup to Give Me the Brain another game in the Fast Food Restaurant of the Damned The mechanic is simple combine the ingredients in your hand to build combo meals with dishes like the Cowabunga the Meat Munch and of course the Lord of the Fries This is the fourth edition of Lord of the Fries It was introduced in 1998 and has been updated and improved in every edition The game returns to Cheapass Games after a brief vacation at Steve Jackson Games This time the game is designed to expand The core box contains the original restaurant menu 55 cards plus a completely new Coffee Shop menu with allnew art and ingredients also 55 cards Each deck can support up to 6 players and there is a third menu using ingredients from both decks which supports up to 8 players Also arriving on the scene will be four standalone restaurant decks Mexican Chinese Italian and Irish Each deck plays alone for up to 6 players fits into the core game box and can combine with the core deck for up to 8 playersLightweight funny card game make combo meals from random ingredients\nZombies All new card art by Brian Snoddy\nIncludes a complete Coffee Shop expansion\nDesigned by awardwinning game designer James Ernest\nAll new card art by Brian Snoddy',
|
| 416 |
+
]
|
| 417 |
+
query_embeddings = model.encode_query(queries)
|
| 418 |
+
document_embeddings = model.encode_document(documents)
|
| 419 |
+
print(query_embeddings.shape, document_embeddings.shape)
|
| 420 |
+
# [1, 1024] [3, 1024]
|
| 421 |
+
|
| 422 |
+
# Get the similarity scores for the embeddings
|
| 423 |
+
similarities = model.similarity(query_embeddings, document_embeddings)
|
| 424 |
+
print(similarities)
|
| 425 |
+
# tensor([[ 0.8868, 0.0210, -0.0103]])
|
| 426 |
+
```
|
| 427 |
+
|
| 428 |
+
<!--
|
| 429 |
+
### Direct Usage (Transformers)
|
| 430 |
+
|
| 431 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 432 |
+
|
| 433 |
+
</details>
|
| 434 |
+
-->
|
| 435 |
+
|
| 436 |
+
<!--
|
| 437 |
+
### Downstream Usage (Sentence Transformers)
|
| 438 |
+
|
| 439 |
+
You can finetune this model on your own dataset.
|
| 440 |
+
|
| 441 |
+
<details><summary>Click to expand</summary>
|
| 442 |
+
|
| 443 |
+
</details>
|
| 444 |
+
-->
|
| 445 |
+
|
| 446 |
+
<!--
|
| 447 |
+
### Out-of-Scope Use
|
| 448 |
+
|
| 449 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 450 |
+
-->
|
| 451 |
+
|
| 452 |
+
<!--
|
| 453 |
+
## Bias, Risks and Limitations
|
| 454 |
+
|
| 455 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 456 |
+
-->
|
| 457 |
+
|
| 458 |
+
<!--
|
| 459 |
+
### Recommendations
|
| 460 |
+
|
| 461 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 462 |
+
-->
|
| 463 |
+
|
| 464 |
+
## Training Details
|
| 465 |
+
|
| 466 |
+
### Training Dataset
|
| 467 |
+
|
| 468 |
+
#### amazon_2023_items_processed_filtered
|
| 469 |
+
|
| 470 |
+
* Dataset: [amazon_2023_items_processed_filtered](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered) at [6b58dd1](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered/tree/6b58dd18854109aac31652e941c667725f6352f0)
|
| 471 |
+
* Size: 552,482 training samples
|
| 472 |
+
* Columns: <code>title</code> and <code>description</code>
|
| 473 |
+
* Approximate statistics based on the first 1000 samples:
|
| 474 |
+
| | title | description |
|
| 475 |
+
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 476 |
+
| type | string | string |
|
| 477 |
+
| details | <ul><li>min: 7 tokens</li><li>mean: 25.42 tokens</li><li>max: 81 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 209.44 tokens</li><li>max: 512 tokens</li></ul> |
|
| 478 |
+
* Samples:
|
| 479 |
+
| title | description |
|
| 480 |
+
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 481 |
+
| <code>title: <br>Gothic Mothman Plushie Doll with Bright Red Eyes Medieval Style Attractive Evil Moth Stuffed Plush Doll Toy Magical Home Decor for Bedroom for Kids Boyfriend Classmate A</code> | <code>description<br> Description Mothmans bright red eyes could stare you down in an instant and his huge dark figure cast fear and doubt in so many residents He is perfect to join you on your vast adventures This Mothman plushie is looking for a loving and magical home right now A great gift for your cryptid lover friend or that friend who likes a dark in their home decor It would look great resting on your bed sitting on your dresser or on the shelf with your other plushies Material Plush Size Approx Height 8 inches x Width 9 inches Package included 1x Gothic Mothman Plushie Mothmans bright red eyes could stare you down in an instant and his huge dark figure cast fear and doubt in so many residents He is perfect to join you on your vast adventures<br> This Mothman plushie is looking for a loving and magical home right now A great gift for your cryptid lover friend or that friend who likes a dark in their home decor<br> It would look great resting on your bed sitting on your dresser or on the shel...</code> |
|
| 482 |
+
| <code>title: <br>Traxxas Stampede 4X4 110 Scale 4wd Monster Truck Red</code> | <code>description<br>Stampede 4X4 is built Traxxas Tough to withstand all the 4WD mayhem you can dish out Its high ground clearance and longarm suspension make Stampede 4X4 feel nearly indestructible Chrome AllStar 28 wheels give Stampede 4X4 aggressive style and soft Chevron Maxx tires deliver allterrain capability Its all waterproof so the fun keeps going through wet conditions No other truck offers Stampede 4X4s style capability and durability Stampede 4X4 is perfect for almost any age and comes fully assembled painted and ReadytoDrive Stampede 4x4 features a shaftdriven 4WD system and modular design of the Slash 4X4 Stampedes highly efficient drivetrain offers optimum performance and easy maintenance A single center driveshaft connects the front and rear drive assemblies for maximum power transfer The modular design allows the entire front or rear section to be removed with just a few screws for easy service The extra ground clearance increases the offroad versatility allowing you to tackle...</code> |
|
| 483 |
+
| <code>title: <br>Hot Wheels Monster Truck 124 Scale 2022 Bone Shaker It All Vehicle with Giant Wheels for Kids Age 3 to 8 Years Old Great Gift Toy Trucks Large Scale</code> | <code>description<br>The Hot Wheels Monster Trucks 124 scale diecast trucks are THE baddest trucks ever built for competition and ultimate dominance The outrageous vehicles are ready to square off in headtohead battles with an oversized body and GIANT wheels Every toy truck package includes fun stats for the monster truck inside including name truck type strength unique crash attack and motorvation for great storytelling fun Each sold separately subject to availability Colors and decorations may varyDesigned in 124 scale with durable diecast metal bodies Hot Wheels Monster Trucks are ready to for outrageous action<br>Giant wheels and rad details amp up the collecting and crashing smashing and stunt play<br>The vehicle package features cool stats on back giving details about strength crash attack motorvation and more<br>Rev up imagination and storytelling with unique toy Monster Trucks that deliver mindblowing features fans will want them all<br>Makes a great gift for kids and Monster Trucks fans ages 3 ye...</code> |
|
| 484 |
+
* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
|
| 485 |
+
```json
|
| 486 |
+
{
|
| 487 |
+
"scale": 20.0,
|
| 488 |
+
"similarity_fct": "cos_sim",
|
| 489 |
+
"mini_batch_size": 16,
|
| 490 |
+
"gather_across_devices": false
|
| 491 |
+
}
|
| 492 |
+
```
|
| 493 |
+
|
| 494 |
+
### Training Hyperparameters
|
| 495 |
+
#### Non-Default Hyperparameters
|
| 496 |
+
|
| 497 |
+
- `per_device_train_batch_size`: 512
|
| 498 |
+
- `num_train_epochs`: 1
|
| 499 |
+
- `warmup_ratio`: 0.1
|
| 500 |
+
- `bf16`: True
|
| 501 |
+
- `push_to_hub`: True
|
| 502 |
+
- `hub_model_id`: guyhadad01/EncodeRec_600M_Toys
|
| 503 |
+
- `hub_strategy`: checkpoint
|
| 504 |
+
- `prompts`: Instruct: Given a web search query, retrieve relevant passages that answer the query
|
| 505 |
+
Query:
|
| 506 |
+
|
| 507 |
+
#### All Hyperparameters
|
| 508 |
+
<details><summary>Click to expand</summary>
|
| 509 |
+
|
| 510 |
+
- `overwrite_output_dir`: False
|
| 511 |
+
- `do_predict`: False
|
| 512 |
+
- `eval_strategy`: no
|
| 513 |
+
- `prediction_loss_only`: True
|
| 514 |
+
- `per_device_train_batch_size`: 512
|
| 515 |
+
- `per_device_eval_batch_size`: 8
|
| 516 |
+
- `per_gpu_train_batch_size`: None
|
| 517 |
+
- `per_gpu_eval_batch_size`: None
|
| 518 |
+
- `gradient_accumulation_steps`: 1
|
| 519 |
+
- `eval_accumulation_steps`: None
|
| 520 |
+
- `torch_empty_cache_steps`: None
|
| 521 |
+
- `learning_rate`: 5e-05
|
| 522 |
+
- `weight_decay`: 0.0
|
| 523 |
+
- `adam_beta1`: 0.9
|
| 524 |
+
- `adam_beta2`: 0.999
|
| 525 |
+
- `adam_epsilon`: 1e-08
|
| 526 |
+
- `max_grad_norm`: 1.0
|
| 527 |
+
- `num_train_epochs`: 1
|
| 528 |
+
- `max_steps`: -1
|
| 529 |
+
- `lr_scheduler_type`: linear
|
| 530 |
+
- `lr_scheduler_kwargs`: {}
|
| 531 |
+
- `warmup_ratio`: 0.1
|
| 532 |
+
- `warmup_steps`: 0
|
| 533 |
+
- `log_level`: passive
|
| 534 |
+
- `log_level_replica`: warning
|
| 535 |
+
- `log_on_each_node`: True
|
| 536 |
+
- `logging_nan_inf_filter`: True
|
| 537 |
+
- `save_safetensors`: True
|
| 538 |
+
- `save_on_each_node`: False
|
| 539 |
+
- `save_only_model`: False
|
| 540 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 541 |
+
- `no_cuda`: False
|
| 542 |
+
- `use_cpu`: False
|
| 543 |
+
- `use_mps_device`: False
|
| 544 |
+
- `seed`: 42
|
| 545 |
+
- `data_seed`: None
|
| 546 |
+
- `jit_mode_eval`: False
|
| 547 |
+
- `use_ipex`: False
|
| 548 |
+
- `bf16`: True
|
| 549 |
+
- `fp16`: False
|
| 550 |
+
- `fp16_opt_level`: O1
|
| 551 |
+
- `half_precision_backend`: auto
|
| 552 |
+
- `bf16_full_eval`: False
|
| 553 |
+
- `fp16_full_eval`: False
|
| 554 |
+
- `tf32`: None
|
| 555 |
+
- `local_rank`: 0
|
| 556 |
+
- `ddp_backend`: None
|
| 557 |
+
- `tpu_num_cores`: None
|
| 558 |
+
- `tpu_metrics_debug`: False
|
| 559 |
+
- `debug`: []
|
| 560 |
+
- `dataloader_drop_last`: False
|
| 561 |
+
- `dataloader_num_workers`: 0
|
| 562 |
+
- `dataloader_prefetch_factor`: None
|
| 563 |
+
- `past_index`: -1
|
| 564 |
+
- `disable_tqdm`: False
|
| 565 |
+
- `remove_unused_columns`: True
|
| 566 |
+
- `label_names`: None
|
| 567 |
+
- `load_best_model_at_end`: False
|
| 568 |
+
- `ignore_data_skip`: False
|
| 569 |
+
- `fsdp`: []
|
| 570 |
+
- `fsdp_min_num_params`: 0
|
| 571 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 572 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 573 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 574 |
+
- `parallelism_config`: None
|
| 575 |
+
- `deepspeed`: None
|
| 576 |
+
- `label_smoothing_factor`: 0.0
|
| 577 |
+
- `optim`: adamw_torch
|
| 578 |
+
- `optim_args`: None
|
| 579 |
+
- `adafactor`: False
|
| 580 |
+
- `group_by_length`: False
|
| 581 |
+
- `length_column_name`: length
|
| 582 |
+
- `ddp_find_unused_parameters`: None
|
| 583 |
+
- `ddp_bucket_cap_mb`: None
|
| 584 |
+
- `ddp_broadcast_buffers`: False
|
| 585 |
+
- `dataloader_pin_memory`: True
|
| 586 |
+
- `dataloader_persistent_workers`: False
|
| 587 |
+
- `skip_memory_metrics`: True
|
| 588 |
+
- `use_legacy_prediction_loop`: False
|
| 589 |
+
- `push_to_hub`: True
|
| 590 |
+
- `resume_from_checkpoint`: None
|
| 591 |
+
- `hub_model_id`: guyhadad01/EncodeRec_600M_Toys
|
| 592 |
+
- `hub_strategy`: checkpoint
|
| 593 |
+
- `hub_private_repo`: None
|
| 594 |
+
- `hub_always_push`: False
|
| 595 |
+
- `hub_revision`: None
|
| 596 |
+
- `gradient_checkpointing`: False
|
| 597 |
+
- `gradient_checkpointing_kwargs`: None
|
| 598 |
+
- `include_inputs_for_metrics`: False
|
| 599 |
+
- `include_for_metrics`: []
|
| 600 |
+
- `eval_do_concat_batches`: True
|
| 601 |
+
- `fp16_backend`: auto
|
| 602 |
+
- `push_to_hub_model_id`: None
|
| 603 |
+
- `push_to_hub_organization`: None
|
| 604 |
+
- `mp_parameters`:
|
| 605 |
+
- `auto_find_batch_size`: False
|
| 606 |
+
- `full_determinism`: False
|
| 607 |
+
- `torchdynamo`: None
|
| 608 |
+
- `ray_scope`: last
|
| 609 |
+
- `ddp_timeout`: 1800
|
| 610 |
+
- `torch_compile`: False
|
| 611 |
+
- `torch_compile_backend`: None
|
| 612 |
+
- `torch_compile_mode`: None
|
| 613 |
+
- `include_tokens_per_second`: False
|
| 614 |
+
- `include_num_input_tokens_seen`: False
|
| 615 |
+
- `neftune_noise_alpha`: None
|
| 616 |
+
- `optim_target_modules`: None
|
| 617 |
+
- `batch_eval_metrics`: False
|
| 618 |
+
- `eval_on_start`: False
|
| 619 |
+
- `use_liger_kernel`: False
|
| 620 |
+
- `liger_kernel_config`: None
|
| 621 |
+
- `eval_use_gather_object`: False
|
| 622 |
+
- `average_tokens_across_devices`: False
|
| 623 |
+
- `prompts`: Instruct: Given a web search query, retrieve relevant passages that answer the query
|
| 624 |
+
Query:
|
| 625 |
+
- `batch_sampler`: batch_sampler
|
| 626 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 627 |
+
- `router_mapping`: {}
|
| 628 |
+
- `learning_rate_mapping`: {}
|
| 629 |
+
|
| 630 |
+
</details>
|
| 631 |
+
|
| 632 |
+
### Training Logs
|
| 633 |
+
| Epoch | Step | Training Loss |
|
| 634 |
+
|:------:|:----:|:-------------:|
|
| 635 |
+
| 0.0463 | 50 | 0.302 |
|
| 636 |
+
| 0.0926 | 100 | 0.1737 |
|
| 637 |
+
| 0.1389 | 150 | 0.1749 |
|
| 638 |
+
| 0.1852 | 200 | 0.1664 |
|
| 639 |
+
|
| 640 |
+
|
| 641 |
+
### Framework Versions
|
| 642 |
+
- Python: 3.10.18
|
| 643 |
+
- Sentence Transformers: 5.1.1
|
| 644 |
+
- Transformers: 4.56.2
|
| 645 |
+
- PyTorch: 2.6.0+cu126
|
| 646 |
+
- Accelerate: 1.10.1
|
| 647 |
+
- Datasets: 4.1.1
|
| 648 |
+
- Tokenizers: 0.22.1
|
| 649 |
+
|
| 650 |
+
## Citation
|
| 651 |
+
|
| 652 |
+
### BibTeX
|
| 653 |
+
|
| 654 |
+
#### Sentence Transformers
|
| 655 |
+
```bibtex
|
| 656 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 657 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 658 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 659 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 660 |
+
month = "11",
|
| 661 |
+
year = "2019",
|
| 662 |
+
publisher = "Association for Computational Linguistics",
|
| 663 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 664 |
+
}
|
| 665 |
+
```
|
| 666 |
+
|
| 667 |
+
#### CachedMultipleNegativesRankingLoss
|
| 668 |
+
```bibtex
|
| 669 |
+
@misc{gao2021scaling,
|
| 670 |
+
title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
|
| 671 |
+
author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
|
| 672 |
+
year={2021},
|
| 673 |
+
eprint={2101.06983},
|
| 674 |
+
archivePrefix={arXiv},
|
| 675 |
+
primaryClass={cs.LG}
|
| 676 |
+
}
|
| 677 |
+
```
|
| 678 |
+
|
| 679 |
+
<!--
|
| 680 |
+
## Glossary
|
| 681 |
+
|
| 682 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 683 |
+
-->
|
| 684 |
+
|
| 685 |
+
<!--
|
| 686 |
+
## Model Card Authors
|
| 687 |
+
|
| 688 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 689 |
+
-->
|
| 690 |
+
|
| 691 |
+
<!--
|
| 692 |
+
## Model Card Contact
|
| 693 |
+
|
| 694 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 695 |
+
-->
|
last-checkpoint/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
last-checkpoint/chat_template.jinja
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{{- messages[0].content + '\n\n' }}
|
| 5 |
+
{%- endif %}
|
| 6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 7 |
+
{%- for tool in tools %}
|
| 8 |
+
{{- "\n" }}
|
| 9 |
+
{{- tool | tojson }}
|
| 10 |
+
{%- endfor %}
|
| 11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 12 |
+
{%- else %}
|
| 13 |
+
{%- if messages[0].role == 'system' %}
|
| 14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
+
{%- endif %}
|
| 16 |
+
{%- endif %}
|
| 17 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 18 |
+
{%- for message in messages[::-1] %}
|
| 19 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 20 |
+
{%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
| 21 |
+
{%- set ns.multi_step_tool = false %}
|
| 22 |
+
{%- set ns.last_query_index = index %}
|
| 23 |
+
{%- endif %}
|
| 24 |
+
{%- endfor %}
|
| 25 |
+
{%- for message in messages %}
|
| 26 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 27 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 28 |
+
{%- elif message.role == "assistant" %}
|
| 29 |
+
{%- set content = message.content %}
|
| 30 |
+
{%- set reasoning_content = '' %}
|
| 31 |
+
{%- if message.reasoning_content is defined and message.reasoning_content is not none %}
|
| 32 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 33 |
+
{%- else %}
|
| 34 |
+
{%- if '</think>' in message.content %}
|
| 35 |
+
{%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
|
| 36 |
+
{%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 37 |
+
{%- endif %}
|
| 38 |
+
{%- endif %}
|
| 39 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 40 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 41 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 42 |
+
{%- else %}
|
| 43 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 44 |
+
{%- endif %}
|
| 45 |
+
{%- else %}
|
| 46 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 47 |
+
{%- endif %}
|
| 48 |
+
{%- if message.tool_calls %}
|
| 49 |
+
{%- for tool_call in message.tool_calls %}
|
| 50 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 51 |
+
{{- '\n' }}
|
| 52 |
+
{%- endif %}
|
| 53 |
+
{%- if tool_call.function %}
|
| 54 |
+
{%- set tool_call = tool_call.function %}
|
| 55 |
+
{%- endif %}
|
| 56 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 57 |
+
{{- tool_call.name }}
|
| 58 |
+
{{- '", "arguments": ' }}
|
| 59 |
+
{%- if tool_call.arguments is string %}
|
| 60 |
+
{{- tool_call.arguments }}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{{- tool_call.arguments | tojson }}
|
| 63 |
+
{%- endif %}
|
| 64 |
+
{{- '}\n</tool_call>' }}
|
| 65 |
+
{%- endfor %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{{- '<|im_end|>\n' }}
|
| 68 |
+
{%- elif message.role == "tool" %}
|
| 69 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 70 |
+
{{- '<|im_start|>user' }}
|
| 71 |
+
{%- endif %}
|
| 72 |
+
{{- '\n<tool_response>\n' }}
|
| 73 |
+
{{- message.content }}
|
| 74 |
+
{{- '\n</tool_response>' }}
|
| 75 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 76 |
+
{{- '<|im_end|>\n' }}
|
| 77 |
+
{%- endif %}
|
| 78 |
+
{%- endif %}
|
| 79 |
+
{%- endfor %}
|
| 80 |
+
{%- if add_generation_prompt %}
|
| 81 |
+
{{- '<|im_start|>assistant\n' }}
|
| 82 |
+
{%- if enable_thinking is defined and enable_thinking is false %}
|
| 83 |
+
{{- '<think>\n\n</think>\n\n' }}
|
| 84 |
+
{%- endif %}
|
| 85 |
+
{%- endif %}
|
last-checkpoint/config.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3Model"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_id": 151643,
|
| 10 |
+
"head_dim": 128,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_types": [
|
| 16 |
+
"full_attention",
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention"
|
| 44 |
+
],
|
| 45 |
+
"max_position_embeddings": 32768,
|
| 46 |
+
"max_window_layers": 28,
|
| 47 |
+
"model_type": "qwen3",
|
| 48 |
+
"num_attention_heads": 16,
|
| 49 |
+
"num_hidden_layers": 28,
|
| 50 |
+
"num_key_value_heads": 8,
|
| 51 |
+
"rms_norm_eps": 1e-06,
|
| 52 |
+
"rope_scaling": null,
|
| 53 |
+
"rope_theta": 1000000,
|
| 54 |
+
"sliding_window": null,
|
| 55 |
+
"tie_word_embeddings": true,
|
| 56 |
+
"transformers_version": "4.56.2",
|
| 57 |
+
"use_cache": true,
|
| 58 |
+
"use_sliding_window": false,
|
| 59 |
+
"vocab_size": 151669
|
| 60 |
+
}
|
last-checkpoint/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"prompts": {
|
| 3 |
+
"query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:",
|
| 4 |
+
"document": ""
|
| 5 |
+
},
|
| 6 |
+
"default_prompt_name": null,
|
| 7 |
+
"similarity_fn_name": "cosine",
|
| 8 |
+
"model_type": "SentenceTransformer",
|
| 9 |
+
"__version__": {
|
| 10 |
+
"sentence_transformers": "5.1.1",
|
| 11 |
+
"transformers": "4.56.2",
|
| 12 |
+
"pytorch": "2.6.0+cu126"
|
| 13 |
+
}
|
| 14 |
+
}
|
last-checkpoint/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
last-checkpoint/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f05b2ecf6c21e4e22e8d86f7d6cdfd66d3c24140fb5da41e4e6fddf88f53062
|
| 3 |
+
size 2383139480
|
last-checkpoint/modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
last-checkpoint/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:975fcf4229f439cb652012750fa7242ec08caec72e4b15e47bb0421c42c2d65a
|
| 3 |
+
size 4766477350
|
last-checkpoint/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0a93a6278551923693ac4f988f65ba37a3d27ccb15787cd8a7ac1081397feb4
|
| 3 |
+
size 14244
|
last-checkpoint/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ce8ab349effd1adbb6d4237116520cacfb57476eecc98810f044e89e9743cd6
|
| 3 |
+
size 1064
|
last-checkpoint/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
last-checkpoint/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
last-checkpoint/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c87c38db060bafb0122019c0c749ec1eb1ae510dae43c93f0042ec51099942e8
|
| 3 |
+
size 11423971
|
last-checkpoint/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
|
| 3 |
+
size 4689074
|
last-checkpoint/tokenizer_config.json
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"clean_up_tokenization_spaces": false,
|
| 231 |
+
"eos_token": "<|im_end|>",
|
| 232 |
+
"errors": "replace",
|
| 233 |
+
"extra_special_tokens": {},
|
| 234 |
+
"model_max_length": 131072,
|
| 235 |
+
"pad_token": "<|endoftext|>",
|
| 236 |
+
"split_special_tokens": false,
|
| 237 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 238 |
+
"unk_token": null
|
| 239 |
+
}
|
last-checkpoint/trainer_state.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.18518518518518517,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 200,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.046296296296296294,
|
| 14 |
+
"grad_norm": 3.5503571033477783,
|
| 15 |
+
"learning_rate": 2.2685185185185187e-05,
|
| 16 |
+
"loss": 0.302,
|
| 17 |
+
"step": 50
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.09259259259259259,
|
| 21 |
+
"grad_norm": 2.7323362827301025,
|
| 22 |
+
"learning_rate": 4.5833333333333334e-05,
|
| 23 |
+
"loss": 0.1737,
|
| 24 |
+
"step": 100
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.1388888888888889,
|
| 28 |
+
"grad_norm": 2.2092525959014893,
|
| 29 |
+
"learning_rate": 4.7890946502057616e-05,
|
| 30 |
+
"loss": 0.1749,
|
| 31 |
+
"step": 150
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.18518518518518517,
|
| 35 |
+
"grad_norm": 1.7028791904449463,
|
| 36 |
+
"learning_rate": 4.531893004115226e-05,
|
| 37 |
+
"loss": 0.1664,
|
| 38 |
+
"step": 200
|
| 39 |
+
}
|
| 40 |
+
],
|
| 41 |
+
"logging_steps": 50,
|
| 42 |
+
"max_steps": 1080,
|
| 43 |
+
"num_input_tokens_seen": 0,
|
| 44 |
+
"num_train_epochs": 1,
|
| 45 |
+
"save_steps": 200,
|
| 46 |
+
"stateful_callbacks": {
|
| 47 |
+
"TrainerControl": {
|
| 48 |
+
"args": {
|
| 49 |
+
"should_epoch_stop": false,
|
| 50 |
+
"should_evaluate": false,
|
| 51 |
+
"should_log": false,
|
| 52 |
+
"should_save": true,
|
| 53 |
+
"should_training_stop": false
|
| 54 |
+
},
|
| 55 |
+
"attributes": {}
|
| 56 |
+
}
|
| 57 |
+
},
|
| 58 |
+
"total_flos": 0.0,
|
| 59 |
+
"train_batch_size": 512,
|
| 60 |
+
"trial_name": null,
|
| 61 |
+
"trial_params": null
|
| 62 |
+
}
|
last-checkpoint/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e01e8f26e5c2d4f6bffbdf651718d0e87d7dcd6ace3b4fa03602996e17a6a82e
|
| 3 |
+
size 5816
|
last-checkpoint/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|