reasonwang commited on
Commit
f22f1b3
·
verified ·
1 Parent(s): 4d32a54

Add new SentenceTransformer model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 2048,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": true,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,1061 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - dense
7
+ - generated_from_trainer
8
+ - loss:CachedMultipleNegativesRankingLoss
9
+ pipeline_tag: sentence-similarity
10
+ library_name: sentence-transformers
11
+ metrics:
12
+ - cosine_accuracy@1
13
+ - cosine_accuracy@3
14
+ - cosine_accuracy@5
15
+ - cosine_accuracy@10
16
+ - cosine_precision@1
17
+ - cosine_precision@3
18
+ - cosine_precision@5
19
+ - cosine_precision@10
20
+ - cosine_recall@1
21
+ - cosine_recall@3
22
+ - cosine_recall@5
23
+ - cosine_recall@10
24
+ - cosine_ndcg@10
25
+ - cosine_ndcg@100
26
+ - cosine_mrr@10
27
+ - cosine_mrr@100
28
+ - cosine_map@100
29
+ model-index:
30
+ - name: SentenceTransformer
31
+ results:
32
+ - task:
33
+ type: information-retrieval
34
+ name: Information Retrieval
35
+ dataset:
36
+ name: validation retrieval
37
+ type: validation_retrieval
38
+ metrics:
39
+ - type: cosine_accuracy@1
40
+ value: 0.57
41
+ name: Cosine Accuracy@1
42
+ - type: cosine_accuracy@3
43
+ value: 0.82
44
+ name: Cosine Accuracy@3
45
+ - type: cosine_accuracy@5
46
+ value: 0.89
47
+ name: Cosine Accuracy@5
48
+ - type: cosine_accuracy@10
49
+ value: 0.94
50
+ name: Cosine Accuracy@10
51
+ - type: cosine_precision@1
52
+ value: 0.57
53
+ name: Cosine Precision@1
54
+ - type: cosine_precision@3
55
+ value: 0.4766666666666666
56
+ name: Cosine Precision@3
57
+ - type: cosine_precision@5
58
+ value: 0.446
59
+ name: Cosine Precision@5
60
+ - type: cosine_precision@10
61
+ value: 0.3270000000000001
62
+ name: Cosine Precision@10
63
+ - type: cosine_recall@1
64
+ value: 0.11448060544870549
65
+ name: Cosine Recall@1
66
+ - type: cosine_recall@3
67
+ value: 0.23773109238082593
68
+ name: Cosine Recall@3
69
+ - type: cosine_recall@5
70
+ value: 0.33273253175934026
71
+ name: Cosine Recall@5
72
+ - type: cosine_recall@10
73
+ value: 0.40875330857853476
74
+ name: Cosine Recall@10
75
+ - type: cosine_ndcg@10
76
+ value: 0.5080415101347919
77
+ name: Cosine Ndcg@10
78
+ - type: cosine_ndcg@100
79
+ value: 0.5643031400861662
80
+ name: Cosine Ndcg@100
81
+ - type: cosine_mrr@10
82
+ value: 0.7045
83
+ name: Cosine Mrr@10
84
+ - type: cosine_mrr@100
85
+ value: 0.7065935127674259
86
+ name: Cosine Mrr@100
87
+ - type: cosine_map@100
88
+ value: 0.3786876646034055
89
+ name: Cosine Map@100
90
+ ---
91
+
92
+ # SentenceTransformer
93
+
94
+ This is a [sentence-transformers](https://www.SBERT.net) model trained on the generator dataset. It maps sentences & paragraphs to a 4096-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
95
+
96
+ ## Model Details
97
+
98
+ ### Model Description
99
+ - **Model Type:** Sentence Transformer
100
+ <!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
101
+ - **Maximum Sequence Length:** 32768 tokens
102
+ - **Output Dimensionality:** 4096 dimensions
103
+ - **Similarity Function:** Cosine Similarity
104
+ - **Training Dataset:**
105
+ - generator
106
+ <!-- - **Language:** Unknown -->
107
+ <!-- - **License:** Unknown -->
108
+
109
+ ### Model Sources
110
+
111
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
112
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
113
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
114
+
115
+ ### Full Model Architecture
116
+
117
+ ```
118
+ SentenceTransformer(
119
+ (0): Transformer({'max_seq_length': 32768, 'do_lower_case': False, 'architecture': 'Qwen3Model'})
120
+ (1): Pooling({'word_embedding_dimension': 2048, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': True, 'include_prompt': True})
121
+ )
122
+ ```
123
+
124
+ ## Usage
125
+
126
+ ### Direct Usage (Sentence Transformers)
127
+
128
+ First install the Sentence Transformers library:
129
+
130
+ ```bash
131
+ pip install -U sentence-transformers
132
+ ```
133
+
134
+ Then you can load this model and run inference.
135
+ ```python
136
+ from sentence_transformers import SentenceTransformer
137
+
138
+ # Download from the 🤗 Hub
139
+ model = SentenceTransformer("reasonwang/embedding-qwen3-1.7b-embedding_ctxt_unicode_shuf")
140
+ # Run inference
141
+ sentences = [
142
+ 'The weather is lovely today.',
143
+ "It's so sunny outside!",
144
+ 'He drove to the stadium.',
145
+ ]
146
+ embeddings = model.encode(sentences)
147
+ print(embeddings.shape)
148
+ # [3, 4096]
149
+
150
+ # Get the similarity scores for the embeddings
151
+ similarities = model.similarity(embeddings, embeddings)
152
+ print(similarities)
153
+ # tensor([[1.0000, 0.8640, 0.8773],
154
+ # [0.8640, 1.0000, 0.7820],
155
+ # [0.8773, 0.7820, 1.0000]])
156
+ ```
157
+
158
+ <!--
159
+ ### Direct Usage (Transformers)
160
+
161
+ <details><summary>Click to see the direct usage in Transformers</summary>
162
+
163
+ </details>
164
+ -->
165
+
166
+ <!--
167
+ ### Downstream Usage (Sentence Transformers)
168
+
169
+ You can finetune this model on your own dataset.
170
+
171
+ <details><summary>Click to expand</summary>
172
+
173
+ </details>
174
+ -->
175
+
176
+ <!--
177
+ ### Out-of-Scope Use
178
+
179
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
180
+ -->
181
+
182
+ ## Evaluation
183
+
184
+ ### Metrics
185
+
186
+ #### Information Retrieval
187
+
188
+ * Dataset: `validation_retrieval`
189
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
190
+
191
+ | Metric | Value |
192
+ |:--------------------|:-----------|
193
+ | cosine_accuracy@1 | 0.57 |
194
+ | cosine_accuracy@3 | 0.82 |
195
+ | cosine_accuracy@5 | 0.89 |
196
+ | cosine_accuracy@10 | 0.94 |
197
+ | cosine_precision@1 | 0.57 |
198
+ | cosine_precision@3 | 0.4767 |
199
+ | cosine_precision@5 | 0.446 |
200
+ | cosine_precision@10 | 0.327 |
201
+ | cosine_recall@1 | 0.1145 |
202
+ | cosine_recall@3 | 0.2377 |
203
+ | cosine_recall@5 | 0.3327 |
204
+ | cosine_recall@10 | 0.4088 |
205
+ | cosine_ndcg@10 | 0.508 |
206
+ | **cosine_ndcg@100** | **0.5643** |
207
+ | cosine_mrr@10 | 0.7045 |
208
+ | cosine_mrr@100 | 0.7066 |
209
+ | cosine_map@100 | 0.3787 |
210
+
211
+ <!--
212
+ ## Bias, Risks and Limitations
213
+
214
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
215
+ -->
216
+
217
+ <!--
218
+ ### Recommendations
219
+
220
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
221
+ -->
222
+
223
+ ## Training Details
224
+
225
+ ### Training Dataset
226
+
227
+ #### generator
228
+
229
+ * Dataset: generator
230
+ * Columns: <code>sentence1</code> and <code>sentence2</code>
231
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
232
+ ```json
233
+ {
234
+ "scale": 20.0,
235
+ "similarity_fct": "cos_sim",
236
+ "mini_batch_size": 4,
237
+ "gather_across_devices": false
238
+ }
239
+ ```
240
+
241
+ ### Training Hyperparameters
242
+ #### Non-Default Hyperparameters
243
+
244
+ - `eval_strategy`: steps
245
+ - `per_device_train_batch_size`: 256
246
+ - `learning_rate`: 2e-05
247
+ - `max_steps`: 100000
248
+ - `log_level`: info
249
+ - `bf16`: True
250
+ - `dataloader_num_workers`: 1
251
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': False, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
252
+
253
+ #### All Hyperparameters
254
+ <details><summary>Click to expand</summary>
255
+
256
+ - `overwrite_output_dir`: False
257
+ - `do_predict`: False
258
+ - `eval_strategy`: steps
259
+ - `prediction_loss_only`: True
260
+ - `per_device_train_batch_size`: 256
261
+ - `per_device_eval_batch_size`: 8
262
+ - `per_gpu_train_batch_size`: None
263
+ - `per_gpu_eval_batch_size`: None
264
+ - `gradient_accumulation_steps`: 1
265
+ - `eval_accumulation_steps`: None
266
+ - `torch_empty_cache_steps`: None
267
+ - `learning_rate`: 2e-05
268
+ - `weight_decay`: 0.0
269
+ - `adam_beta1`: 0.9
270
+ - `adam_beta2`: 0.999
271
+ - `adam_epsilon`: 1e-08
272
+ - `max_grad_norm`: 1.0
273
+ - `num_train_epochs`: 3.0
274
+ - `max_steps`: 100000
275
+ - `lr_scheduler_type`: linear
276
+ - `lr_scheduler_kwargs`: {}
277
+ - `warmup_ratio`: 0.0
278
+ - `warmup_steps`: 0
279
+ - `log_level`: info
280
+ - `log_level_replica`: warning
281
+ - `log_on_each_node`: True
282
+ - `logging_nan_inf_filter`: True
283
+ - `save_safetensors`: True
284
+ - `save_on_each_node`: False
285
+ - `save_only_model`: False
286
+ - `restore_callback_states_from_checkpoint`: False
287
+ - `no_cuda`: False
288
+ - `use_cpu`: False
289
+ - `use_mps_device`: False
290
+ - `seed`: 42
291
+ - `data_seed`: None
292
+ - `jit_mode_eval`: False
293
+ - `bf16`: True
294
+ - `fp16`: False
295
+ - `fp16_opt_level`: O1
296
+ - `half_precision_backend`: auto
297
+ - `bf16_full_eval`: False
298
+ - `fp16_full_eval`: False
299
+ - `tf32`: None
300
+ - `local_rank`: 0
301
+ - `ddp_backend`: None
302
+ - `tpu_num_cores`: None
303
+ - `tpu_metrics_debug`: False
304
+ - `debug`: []
305
+ - `dataloader_drop_last`: True
306
+ - `dataloader_num_workers`: 1
307
+ - `dataloader_prefetch_factor`: None
308
+ - `past_index`: -1
309
+ - `disable_tqdm`: False
310
+ - `remove_unused_columns`: True
311
+ - `label_names`: None
312
+ - `load_best_model_at_end`: False
313
+ - `ignore_data_skip`: False
314
+ - `fsdp`: []
315
+ - `fsdp_min_num_params`: 0
316
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
317
+ - `fsdp_transformer_layer_cls_to_wrap`: None
318
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': False, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
319
+ - `parallelism_config`: None
320
+ - `deepspeed`: None
321
+ - `label_smoothing_factor`: 0.0
322
+ - `optim`: adamw_torch_fused
323
+ - `optim_args`: None
324
+ - `adafactor`: False
325
+ - `group_by_length`: False
326
+ - `length_column_name`: length
327
+ - `project`: huggingface
328
+ - `trackio_space_id`: trackio
329
+ - `ddp_find_unused_parameters`: None
330
+ - `ddp_bucket_cap_mb`: None
331
+ - `ddp_broadcast_buffers`: False
332
+ - `dataloader_pin_memory`: True
333
+ - `dataloader_persistent_workers`: False
334
+ - `skip_memory_metrics`: True
335
+ - `use_legacy_prediction_loop`: False
336
+ - `push_to_hub`: False
337
+ - `resume_from_checkpoint`: None
338
+ - `hub_model_id`: None
339
+ - `hub_strategy`: every_save
340
+ - `hub_private_repo`: None
341
+ - `hub_always_push`: False
342
+ - `hub_revision`: None
343
+ - `gradient_checkpointing`: False
344
+ - `gradient_checkpointing_kwargs`: None
345
+ - `include_inputs_for_metrics`: False
346
+ - `include_for_metrics`: []
347
+ - `eval_do_concat_batches`: True
348
+ - `fp16_backend`: auto
349
+ - `push_to_hub_model_id`: None
350
+ - `push_to_hub_organization`: None
351
+ - `mp_parameters`:
352
+ - `auto_find_batch_size`: False
353
+ - `full_determinism`: False
354
+ - `torchdynamo`: None
355
+ - `ray_scope`: last
356
+ - `ddp_timeout`: 1800
357
+ - `torch_compile`: False
358
+ - `torch_compile_backend`: None
359
+ - `torch_compile_mode`: None
360
+ - `include_tokens_per_second`: False
361
+ - `include_num_input_tokens_seen`: no
362
+ - `neftune_noise_alpha`: None
363
+ - `optim_target_modules`: None
364
+ - `batch_eval_metrics`: False
365
+ - `eval_on_start`: False
366
+ - `use_liger_kernel`: False
367
+ - `liger_kernel_config`: None
368
+ - `eval_use_gather_object`: False
369
+ - `average_tokens_across_devices`: True
370
+ - `prompts`: None
371
+ - `batch_sampler`: batch_sampler
372
+ - `multi_dataset_batch_sampler`: proportional
373
+ - `router_mapping`: {}
374
+ - `learning_rate_mapping`: {}
375
+
376
+ </details>
377
+
378
+ ### Training Logs
379
+ <details><summary>Click to expand</summary>
380
+
381
+ | Epoch | Step | Training Loss | validation_retrieval_cosine_ndcg@100 |
382
+ |:------:|:----:|:-------------:|:------------------------------------:|
383
+ | 1e-05 | 1 | 5.3032 | - |
384
+ | 0.0001 | 10 | 3.7433 | - |
385
+ | 0.0002 | 20 | 2.7632 | - |
386
+ | 0.0003 | 30 | 2.4609 | - |
387
+ | 0.0004 | 40 | 2.303 | - |
388
+ | 0.0005 | 50 | 2.2197 | - |
389
+ | 0.0006 | 60 | 2.1855 | - |
390
+ | 0.0007 | 70 | 2.1517 | - |
391
+ | 0.0008 | 80 | 2.1111 | - |
392
+ | 0.0009 | 90 | 2.0876 | - |
393
+ | 0.001 | 100 | 2.0613 | 0.4992 |
394
+ | 0.0011 | 110 | 2.0348 | - |
395
+ | 0.0012 | 120 | 2.0209 | - |
396
+ | 0.0013 | 130 | 2.0293 | - |
397
+ | 0.0014 | 140 | 2.0281 | - |
398
+ | 0.0015 | 150 | 2.0019 | - |
399
+ | 0.0016 | 160 | 1.9692 | - |
400
+ | 0.0017 | 170 | 1.9849 | - |
401
+ | 0.0018 | 180 | 1.9494 | - |
402
+ | 0.0019 | 190 | 1.9458 | - |
403
+ | 0.002 | 200 | 1.9264 | 0.5104 |
404
+ | 0.0021 | 210 | 1.9644 | - |
405
+ | 0.0022 | 220 | 1.9315 | - |
406
+ | 0.0023 | 230 | 1.9324 | - |
407
+ | 0.0024 | 240 | 1.8965 | - |
408
+ | 0.0025 | 250 | 1.9287 | - |
409
+ | 0.0026 | 260 | 1.9246 | - |
410
+ | 0.0027 | 270 | 1.9087 | - |
411
+ | 0.0028 | 280 | 1.9052 | - |
412
+ | 0.0029 | 290 | 1.8969 | - |
413
+ | 0.003 | 300 | 1.8971 | 0.5164 |
414
+ | 0.0031 | 310 | 1.8896 | - |
415
+ | 0.0032 | 320 | 1.8897 | - |
416
+ | 0.0033 | 330 | 1.8646 | - |
417
+ | 0.0034 | 340 | 1.8825 | - |
418
+ | 0.0035 | 350 | 1.8599 | - |
419
+ | 0.0036 | 360 | 1.8583 | - |
420
+ | 0.0037 | 370 | 1.8649 | - |
421
+ | 0.0038 | 380 | 1.8647 | - |
422
+ | 0.0039 | 390 | 1.8759 | - |
423
+ | 0.004 | 400 | 1.8197 | 0.5253 |
424
+ | 0.0041 | 410 | 1.846 | - |
425
+ | 0.0042 | 420 | 1.841 | - |
426
+ | 0.0043 | 430 | 1.8319 | - |
427
+ | 0.0044 | 440 | 1.835 | - |
428
+ | 0.0045 | 450 | 1.807 | - |
429
+ | 0.0046 | 460 | 1.8406 | - |
430
+ | 0.0047 | 470 | 1.8344 | - |
431
+ | 0.0048 | 480 | 1.8003 | - |
432
+ | 0.0049 | 490 | 1.8155 | - |
433
+ | 0.005 | 500 | 1.8242 | 0.5266 |
434
+ | 0.0051 | 510 | 1.8014 | - |
435
+ | 0.0052 | 520 | 1.8026 | - |
436
+ | 0.0053 | 530 | 1.8042 | - |
437
+ | 0.0054 | 540 | 1.8372 | - |
438
+ | 0.0055 | 550 | 1.8054 | - |
439
+ | 0.0056 | 560 | 1.8093 | - |
440
+ | 0.0057 | 570 | 1.7814 | - |
441
+ | 0.0058 | 580 | 1.7875 | - |
442
+ | 0.0059 | 590 | 1.7844 | - |
443
+ | 0.006 | 600 | 1.7789 | 0.5330 |
444
+ | 0.0061 | 610 | 1.7947 | - |
445
+ | 0.0062 | 620 | 1.8084 | - |
446
+ | 0.0063 | 630 | 1.7806 | - |
447
+ | 0.0064 | 640 | 1.772 | - |
448
+ | 0.0065 | 650 | 1.7948 | - |
449
+ | 0.0066 | 660 | 1.7648 | - |
450
+ | 0.0067 | 670 | 1.7801 | - |
451
+ | 0.0068 | 680 | 1.7801 | - |
452
+ | 0.0069 | 690 | 1.7696 | - |
453
+ | 0.007 | 700 | 1.7848 | 0.5457 |
454
+ | 0.0071 | 710 | 1.774 | - |
455
+ | 0.0072 | 720 | 1.7619 | - |
456
+ | 0.0073 | 730 | 1.7422 | - |
457
+ | 0.0074 | 740 | 1.7594 | - |
458
+ | 0.0075 | 750 | 1.7225 | - |
459
+ | 0.0076 | 760 | 1.7601 | - |
460
+ | 0.0077 | 770 | 1.7432 | - |
461
+ | 0.0078 | 780 | 1.7627 | - |
462
+ | 0.0079 | 790 | 1.749 | - |
463
+ | 0.008 | 800 | 1.7361 | 0.5455 |
464
+ | 0.0081 | 810 | 1.7275 | - |
465
+ | 0.0082 | 820 | 1.7391 | - |
466
+ | 0.0083 | 830 | 1.7403 | - |
467
+ | 0.0084 | 840 | 1.736 | - |
468
+ | 0.0085 | 850 | 1.7297 | - |
469
+ | 0.0086 | 860 | 1.7376 | - |
470
+ | 0.0087 | 870 | 1.7242 | - |
471
+ | 0.0088 | 880 | 1.7231 | - |
472
+ | 0.0089 | 890 | 1.729 | - |
473
+ | 0.009 | 900 | 1.7515 | 0.5473 |
474
+ | 0.0091 | 910 | 1.7269 | - |
475
+ | 0.0092 | 920 | 1.6863 | - |
476
+ | 0.0093 | 930 | 1.7164 | - |
477
+ | 0.0094 | 940 | 1.7347 | - |
478
+ | 0.0095 | 950 | 1.7439 | - |
479
+ | 0.0096 | 960 | 1.7102 | - |
480
+ | 0.0097 | 970 | 1.7129 | - |
481
+ | 0.0098 | 980 | 1.7185 | - |
482
+ | 0.0099 | 990 | 1.7131 | - |
483
+ | 0.01 | 1000 | 1.7309 | 0.5527 |
484
+ | 0.0101 | 1010 | 1.7055 | - |
485
+ | 0.0102 | 1020 | 1.7106 | - |
486
+ | 0.0103 | 1030 | 1.7089 | - |
487
+ | 0.0104 | 1040 | 1.7058 | - |
488
+ | 0.0105 | 1050 | 1.6984 | - |
489
+ | 0.0106 | 1060 | 1.69 | - |
490
+ | 0.0107 | 1070 | 1.7189 | - |
491
+ | 0.0108 | 1080 | 1.7147 | - |
492
+ | 0.0109 | 1090 | 1.7237 | - |
493
+ | 0.011 | 1100 | 1.6781 | 0.5567 |
494
+ | 0.0111 | 1110 | 1.6788 | - |
495
+ | 0.0112 | 1120 | 1.6928 | - |
496
+ | 0.0113 | 1130 | 1.7146 | - |
497
+ | 0.0114 | 1140 | 1.6983 | - |
498
+ | 0.0115 | 1150 | 1.7014 | - |
499
+ | 0.0116 | 1160 | 1.6888 | - |
500
+ | 0.0117 | 1170 | 1.6668 | - |
501
+ | 0.0118 | 1180 | 1.6785 | - |
502
+ | 0.0119 | 1190 | 1.6853 | - |
503
+ | 0.012 | 1200 | 1.7077 | 0.5459 |
504
+ | 0.0121 | 1210 | 1.676 | - |
505
+ | 0.0122 | 1220 | 1.6749 | - |
506
+ | 0.0123 | 1230 | 1.6815 | - |
507
+ | 0.0124 | 1240 | 1.6823 | - |
508
+ | 0.0125 | 1250 | 1.6751 | - |
509
+ | 0.0126 | 1260 | 1.6942 | - |
510
+ | 0.0127 | 1270 | 1.6597 | - |
511
+ | 0.0128 | 1280 | 1.6685 | - |
512
+ | 0.0129 | 1290 | 1.6873 | - |
513
+ | 0.013 | 1300 | 1.6779 | 0.5526 |
514
+ | 0.0131 | 1310 | 1.6676 | - |
515
+ | 0.0132 | 1320 | 1.6721 | - |
516
+ | 0.0133 | 1330 | 1.6713 | - |
517
+ | 0.0134 | 1340 | 1.6618 | - |
518
+ | 0.0135 | 1350 | 1.6387 | - |
519
+ | 0.0136 | 1360 | 1.6951 | - |
520
+ | 0.0137 | 1370 | 1.6669 | - |
521
+ | 0.0138 | 1380 | 1.6477 | - |
522
+ | 0.0139 | 1390 | 1.6856 | - |
523
+ | 0.014 | 1400 | 1.6687 | 0.5528 |
524
+ | 0.0141 | 1410 | 1.6578 | - |
525
+ | 0.0142 | 1420 | 1.6588 | - |
526
+ | 0.0143 | 1430 | 1.6552 | - |
527
+ | 0.0144 | 1440 | 1.6643 | - |
528
+ | 0.0145 | 1450 | 1.6543 | - |
529
+ | 0.0146 | 1460 | 1.6851 | - |
530
+ | 0.0147 | 1470 | 1.6547 | - |
531
+ | 0.0148 | 1480 | 1.6744 | - |
532
+ | 0.0149 | 1490 | 1.6694 | - |
533
+ | 0.015 | 1500 | 1.6795 | 0.5537 |
534
+ | 0.0151 | 1510 | 1.656 | - |
535
+ | 0.0152 | 1520 | 1.6425 | - |
536
+ | 0.0153 | 1530 | 1.6545 | - |
537
+ | 0.0154 | 1540 | 1.614 | - |
538
+ | 0.0155 | 1550 | 1.6554 | - |
539
+ | 0.0156 | 1560 | 1.6542 | - |
540
+ | 0.0157 | 1570 | 1.6676 | - |
541
+ | 0.0158 | 1580 | 1.6615 | - |
542
+ | 0.0159 | 1590 | 1.6374 | - |
543
+ | 0.016 | 1600 | 1.6451 | 0.5613 |
544
+ | 0.0161 | 1610 | 1.6258 | - |
545
+ | 0.0162 | 1620 | 1.6504 | - |
546
+ | 0.0163 | 1630 | 1.6254 | - |
547
+ | 0.0164 | 1640 | 1.6257 | - |
548
+ | 0.0165 | 1650 | 1.6392 | - |
549
+ | 0.0166 | 1660 | 1.6365 | - |
550
+ | 0.0167 | 1670 | 1.6407 | - |
551
+ | 0.0168 | 1680 | 1.6313 | - |
552
+ | 0.0169 | 1690 | 1.6458 | - |
553
+ | 0.017 | 1700 | 1.6405 | 0.5526 |
554
+ | 0.0171 | 1710 | 1.6431 | - |
555
+ | 0.0172 | 1720 | 1.6262 | - |
556
+ | 0.0173 | 1730 | 1.6434 | - |
557
+ | 0.0174 | 1740 | 1.6404 | - |
558
+ | 0.0175 | 1750 | 1.6418 | - |
559
+ | 0.0176 | 1760 | 1.6176 | - |
560
+ | 0.0177 | 1770 | 1.6282 | - |
561
+ | 0.0178 | 1780 | 1.6228 | - |
562
+ | 0.0179 | 1790 | 1.656 | - |
563
+ | 0.018 | 1800 | 1.6392 | 0.5499 |
564
+ | 0.0181 | 1810 | 1.6307 | - |
565
+ | 0.0182 | 1820 | 1.6147 | - |
566
+ | 0.0183 | 1830 | 1.6225 | - |
567
+ | 0.0184 | 1840 | 1.6387 | - |
568
+ | 0.0185 | 1850 | 1.6173 | - |
569
+ | 0.0186 | 1860 | 1.6535 | - |
570
+ | 0.0187 | 1870 | 1.6339 | - |
571
+ | 0.0188 | 1880 | 1.6215 | - |
572
+ | 0.0189 | 1890 | 1.6048 | - |
573
+ | 0.019 | 1900 | 1.6278 | 0.5527 |
574
+ | 0.0191 | 1910 | 1.6359 | - |
575
+ | 0.0192 | 1920 | 1.6142 | - |
576
+ | 0.0193 | 1930 | 1.6354 | - |
577
+ | 0.0194 | 1940 | 1.6341 | - |
578
+ | 0.0195 | 1950 | 1.6352 | - |
579
+ | 0.0196 | 1960 | 1.6223 | - |
580
+ | 0.0197 | 1970 | 1.6208 | - |
581
+ | 0.0198 | 1980 | 1.6151 | - |
582
+ | 0.0199 | 1990 | 1.5815 | - |
583
+ | 0.02 | 2000 | 1.6159 | 0.5573 |
584
+ | 0.0201 | 2010 | 1.6229 | - |
585
+ | 0.0202 | 2020 | 1.6156 | - |
586
+ | 0.0203 | 2030 | 1.6051 | - |
587
+ | 0.0204 | 2040 | 1.6411 | - |
588
+ | 0.0205 | 2050 | 1.6339 | - |
589
+ | 0.0206 | 2060 | 1.6241 | - |
590
+ | 0.0207 | 2070 | 1.6014 | - |
591
+ | 0.0208 | 2080 | 1.5942 | - |
592
+ | 0.0209 | 2090 | 1.611 | - |
593
+ | 0.021 | 2100 | 1.6065 | 0.5563 |
594
+ | 0.0211 | 2110 | 1.6208 | - |
595
+ | 0.0212 | 2120 | 1.6239 | - |
596
+ | 0.0213 | 2130 | 1.6066 | - |
597
+ | 0.0214 | 2140 | 1.5936 | - |
598
+ | 0.0215 | 2150 | 1.6008 | - |
599
+ | 0.0216 | 2160 | 1.6239 | - |
600
+ | 0.0217 | 2170 | 1.6116 | - |
601
+ | 0.0218 | 2180 | 1.6128 | - |
602
+ | 0.0219 | 2190 | 1.5819 | - |
603
+ | 0.022 | 2200 | 1.5915 | 0.5547 |
604
+ | 0.0221 | 2210 | 1.6164 | - |
605
+ | 0.0222 | 2220 | 1.6141 | - |
606
+ | 0.0223 | 2230 | 1.6296 | - |
607
+ | 0.0224 | 2240 | 1.6026 | - |
608
+ | 0.0225 | 2250 | 1.5958 | - |
609
+ | 0.0226 | 2260 | 1.6009 | - |
610
+ | 0.0227 | 2270 | 1.6336 | - |
611
+ | 0.0228 | 2280 | 1.6231 | - |
612
+ | 0.0229 | 2290 | 1.6163 | - |
613
+ | 0.023 | 2300 | 1.5811 | 0.5626 |
614
+ | 0.0231 | 2310 | 1.5951 | - |
615
+ | 0.0232 | 2320 | 1.5989 | - |
616
+ | 0.0233 | 2330 | 1.6056 | - |
617
+ | 0.0234 | 2340 | 1.5808 | - |
618
+ | 0.0235 | 2350 | 1.5741 | - |
619
+ | 0.0236 | 2360 | 1.5928 | - |
620
+ | 0.0237 | 2370 | 1.5921 | - |
621
+ | 0.0238 | 2380 | 1.6032 | - |
622
+ | 0.0239 | 2390 | 1.5779 | - |
623
+ | 0.024 | 2400 | 1.609 | 0.5637 |
624
+ | 0.0241 | 2410 | 1.5771 | - |
625
+ | 0.0242 | 2420 | 1.5902 | - |
626
+ | 0.0243 | 2430 | 1.5971 | - |
627
+ | 0.0244 | 2440 | 1.5969 | - |
628
+ | 0.0245 | 2450 | 1.6058 | - |
629
+ | 0.0246 | 2460 | 1.6161 | - |
630
+ | 0.0247 | 2470 | 1.5709 | - |
631
+ | 0.0248 | 2480 | 1.5814 | - |
632
+ | 0.0249 | 2490 | 1.5866 | - |
633
+ | 0.025 | 2500 | 1.5692 | 0.5642 |
634
+ | 0.0251 | 2510 | 1.584 | - |
635
+ | 0.0252 | 2520 | 1.5899 | - |
636
+ | 0.0253 | 2530 | 1.614 | - |
637
+ | 0.0254 | 2540 | 1.5966 | - |
638
+ | 0.0255 | 2550 | 1.5838 | - |
639
+ | 0.0256 | 2560 | 1.5969 | - |
640
+ | 0.0257 | 2570 | 1.5789 | - |
641
+ | 0.0258 | 2580 | 1.5938 | - |
642
+ | 0.0259 | 2590 | 1.5836 | - |
643
+ | 0.026 | 2600 | 1.579 | 0.5640 |
644
+ | 0.0261 | 2610 | 1.5978 | - |
645
+ | 0.0262 | 2620 | 1.5783 | - |
646
+ | 0.0263 | 2630 | 1.5842 | - |
647
+ | 0.0264 | 2640 | 1.6001 | - |
648
+ | 0.0265 | 2650 | 1.5798 | - |
649
+ | 0.0266 | 2660 | 1.6003 | - |
650
+ | 0.0267 | 2670 | 1.5868 | - |
651
+ | 0.0268 | 2680 | 1.603 | - |
652
+ | 0.0269 | 2690 | 1.5789 | - |
653
+ | 0.027 | 2700 | 1.5724 | 0.5674 |
654
+ | 0.0271 | 2710 | 1.5718 | - |
655
+ | 0.0272 | 2720 | 1.5771 | - |
656
+ | 0.0273 | 2730 | 1.5954 | - |
657
+ | 0.0274 | 2740 | 1.5687 | - |
658
+ | 0.0275 | 2750 | 1.5897 | - |
659
+ | 0.0276 | 2760 | 1.5533 | - |
660
+ | 0.0277 | 2770 | 1.5799 | - |
661
+ | 0.0278 | 2780 | 1.5741 | - |
662
+ | 0.0279 | 2790 | 1.6096 | - |
663
+ | 0.028 | 2800 | 1.5863 | 0.5568 |
664
+ | 0.0281 | 2810 | 1.6004 | - |
665
+ | 0.0282 | 2820 | 1.569 | - |
666
+ | 0.0283 | 2830 | 1.5757 | - |
667
+ | 0.0284 | 2840 | 1.5597 | - |
668
+ | 0.0285 | 2850 | 1.5935 | - |
669
+ | 0.0286 | 2860 | 1.5673 | - |
670
+ | 0.0287 | 2870 | 1.5725 | - |
671
+ | 0.0288 | 2880 | 1.5899 | - |
672
+ | 0.0289 | 2890 | 1.5683 | - |
673
+ | 0.029 | 2900 | 1.5519 | 0.5702 |
674
+ | 0.0291 | 2910 | 1.559 | - |
675
+ | 0.0292 | 2920 | 1.5692 | - |
676
+ | 0.0293 | 2930 | 1.5792 | - |
677
+ | 0.0294 | 2940 | 1.5704 | - |
678
+ | 0.0295 | 2950 | 1.5717 | - |
679
+ | 0.0296 | 2960 | 1.5535 | - |
680
+ | 0.0297 | 2970 | 1.553 | - |
681
+ | 0.0298 | 2980 | 1.5629 | - |
682
+ | 0.0299 | 2990 | 1.5636 | - |
683
+ | 0.03 | 3000 | 1.5715 | 0.5681 |
684
+ | 0.0301 | 3010 | 1.5538 | - |
685
+ | 0.0302 | 3020 | 1.5803 | - |
686
+ | 0.0303 | 3030 | 1.5535 | - |
687
+ | 0.0304 | 3040 | 1.5674 | - |
688
+ | 0.0305 | 3050 | 1.5465 | - |
689
+ | 0.0306 | 3060 | 1.5682 | - |
690
+ | 0.0307 | 3070 | 1.5855 | - |
691
+ | 0.0308 | 3080 | 1.559 | - |
692
+ | 0.0309 | 3090 | 1.559 | - |
693
+ | 0.031 | 3100 | 1.5773 | 0.5707 |
694
+ | 0.0311 | 3110 | 1.5693 | - |
695
+ | 0.0312 | 3120 | 1.5643 | - |
696
+ | 0.0313 | 3130 | 1.5586 | - |
697
+ | 0.0314 | 3140 | 1.5453 | - |
698
+ | 0.0315 | 3150 | 1.5799 | - |
699
+ | 0.0316 | 3160 | 1.5532 | - |
700
+ | 0.0317 | 3170 | 1.5459 | - |
701
+ | 0.0318 | 3180 | 1.5541 | - |
702
+ | 0.0319 | 3190 | 1.5789 | - |
703
+ | 0.032 | 3200 | 1.5331 | 0.5595 |
704
+ | 0.0321 | 3210 | 1.5521 | - |
705
+ | 0.0322 | 3220 | 1.5553 | - |
706
+ | 0.0323 | 3230 | 1.5675 | - |
707
+ | 0.0324 | 3240 | 1.551 | - |
708
+ | 0.0325 | 3250 | 1.5753 | - |
709
+ | 0.0326 | 3260 | 1.5625 | - |
710
+ | 0.0327 | 3270 | 1.5782 | - |
711
+ | 0.0328 | 3280 | 1.5588 | - |
712
+ | 0.0329 | 3290 | 1.5795 | - |
713
+ | 0.033 | 3300 | 1.5529 | 0.5654 |
714
+ | 0.0331 | 3310 | 1.5581 | - |
715
+ | 0.0332 | 3320 | 1.5828 | - |
716
+ | 0.0333 | 3330 | 1.5628 | - |
717
+ | 0.0334 | 3340 | 1.5614 | - |
718
+ | 0.0335 | 3350 | 1.5645 | - |
719
+ | 0.0336 | 3360 | 1.5405 | - |
720
+ | 0.0337 | 3370 | 1.5743 | - |
721
+ | 0.0338 | 3380 | 1.5393 | - |
722
+ | 0.0339 | 3390 | 1.5547 | - |
723
+ | 0.034 | 3400 | 1.5403 | 0.5616 |
724
+ | 0.0341 | 3410 | 1.5627 | - |
725
+ | 0.0342 | 3420 | 1.5638 | - |
726
+ | 0.0343 | 3430 | 1.5664 | - |
727
+ | 0.0344 | 3440 | 1.5345 | - |
728
+ | 0.0345 | 3450 | 1.5546 | - |
729
+ | 0.0346 | 3460 | 1.5581 | - |
730
+ | 0.0347 | 3470 | 1.5614 | - |
731
+ | 0.0348 | 3480 | 1.558 | - |
732
+ | 0.0349 | 3490 | 1.5451 | - |
733
+ | 0.035 | 3500 | 1.5491 | 0.5581 |
734
+ | 0.0351 | 3510 | 1.5357 | - |
735
+ | 0.0352 | 3520 | 1.5578 | - |
736
+ | 0.0353 | 3530 | 1.5433 | - |
737
+ | 0.0354 | 3540 | 1.5343 | - |
738
+ | 0.0355 | 3550 | 1.5558 | - |
739
+ | 0.0356 | 3560 | 1.5711 | - |
740
+ | 0.0357 | 3570 | 1.5458 | - |
741
+ | 0.0358 | 3580 | 1.5356 | - |
742
+ | 0.0359 | 3590 | 1.559 | - |
743
+ | 0.036 | 3600 | 1.5338 | 0.5598 |
744
+ | 0.0361 | 3610 | 1.5532 | - |
745
+ | 0.0362 | 3620 | 1.5346 | - |
746
+ | 0.0363 | 3630 | 1.5558 | - |
747
+ | 0.0364 | 3640 | 1.539 | - |
748
+ | 0.0365 | 3650 | 1.538 | - |
749
+ | 0.0366 | 3660 | 1.5638 | - |
750
+ | 0.0367 | 3670 | 1.5666 | - |
751
+ | 0.0368 | 3680 | 1.5662 | - |
752
+ | 0.0369 | 3690 | 1.5432 | - |
753
+ | 0.037 | 3700 | 1.5345 | 0.5680 |
754
+ | 0.0371 | 3710 | 1.5524 | - |
755
+ | 0.0372 | 3720 | 1.5617 | - |
756
+ | 0.0373 | 3730 | 1.5261 | - |
757
+ | 0.0374 | 3740 | 1.5502 | - |
758
+ | 0.0375 | 3750 | 1.5452 | - |
759
+ | 0.0376 | 3760 | 1.5566 | - |
760
+ | 0.0377 | 3770 | 1.5457 | - |
761
+ | 0.0378 | 3780 | 1.5307 | - |
762
+ | 0.0379 | 3790 | 1.5331 | - |
763
+ | 0.038 | 3800 | 1.5294 | 0.5578 |
764
+ | 0.0381 | 3810 | 1.5389 | - |
765
+ | 0.0382 | 3820 | 1.5379 | - |
766
+ | 0.0383 | 3830 | 1.5578 | - |
767
+ | 0.0384 | 3840 | 1.5259 | - |
768
+ | 0.0385 | 3850 | 1.5308 | - |
769
+ | 0.0386 | 3860 | 1.5461 | - |
770
+ | 0.0387 | 3870 | 1.5197 | - |
771
+ | 0.0388 | 3880 | 1.5332 | - |
772
+ | 0.0389 | 3890 | 1.5642 | - |
773
+ | 0.039 | 3900 | 1.5256 | 0.5625 |
774
+ | 0.0391 | 3910 | 1.5608 | - |
775
+ | 0.0392 | 3920 | 1.5567 | - |
776
+ | 0.0393 | 3930 | 1.5278 | - |
777
+ | 0.0394 | 3940 | 1.5404 | - |
778
+ | 0.0395 | 3950 | 1.5367 | - |
779
+ | 0.0396 | 3960 | 1.5186 | - |
780
+ | 0.0397 | 3970 | 1.5437 | - |
781
+ | 0.0398 | 3980 | 1.5459 | - |
782
+ | 0.0399 | 3990 | 1.5536 | - |
783
+ | 0.04 | 4000 | 1.548 | 0.5642 |
784
+ | 0.0401 | 4010 | 1.5407 | - |
785
+ | 0.0402 | 4020 | 1.5235 | - |
786
+ | 0.0403 | 4030 | 1.526 | - |
787
+ | 0.0404 | 4040 | 1.5184 | - |
788
+ | 0.0405 | 4050 | 1.5232 | - |
789
+ | 0.0406 | 4060 | 1.5215 | - |
790
+ | 0.0407 | 4070 | 1.5202 | - |
791
+ | 0.0408 | 4080 | 1.5325 | - |
792
+ | 0.0409 | 4090 | 1.5317 | - |
793
+ | 0.041 | 4100 | 1.5326 | 0.5689 |
794
+ | 0.0411 | 4110 | 1.5083 | - |
795
+ | 0.0412 | 4120 | 1.5158 | - |
796
+ | 0.0413 | 4130 | 1.5321 | - |
797
+ | 0.0414 | 4140 | 1.5383 | - |
798
+ | 0.0415 | 4150 | 1.5432 | - |
799
+ | 0.0416 | 4160 | 1.503 | - |
800
+ | 0.0417 | 4170 | 1.5374 | - |
801
+ | 0.0418 | 4180 | 1.5166 | - |
802
+ | 0.0419 | 4190 | 1.5462 | - |
803
+ | 0.042 | 4200 | 1.5175 | 0.5650 |
804
+ | 0.0421 | 4210 | 1.5348 | - |
805
+ | 0.0422 | 4220 | 1.5613 | - |
806
+ | 0.0423 | 4230 | 1.521 | - |
807
+ | 0.0424 | 4240 | 1.5377 | - |
808
+ | 0.0425 | 4250 | 1.5163 | - |
809
+ | 0.0426 | 4260 | 1.5354 | - |
810
+ | 0.0427 | 4270 | 1.5181 | - |
811
+ | 0.0428 | 4280 | 1.5381 | - |
812
+ | 0.0429 | 4290 | 1.5311 | - |
813
+ | 0.043 | 4300 | 1.5074 | 0.5688 |
814
+ | 0.0431 | 4310 | 1.5162 | - |
815
+ | 0.0432 | 4320 | 1.5051 | - |
816
+ | 0.0433 | 4330 | 1.5171 | - |
817
+ | 0.0434 | 4340 | 1.5283 | - |
818
+ | 0.0435 | 4350 | 1.5171 | - |
819
+ | 0.0436 | 4360 | 1.5377 | - |
820
+ | 0.0437 | 4370 | 1.5197 | - |
821
+ | 0.0438 | 4380 | 1.513 | - |
822
+ | 0.0439 | 4390 | 1.5418 | - |
823
+ | 0.044 | 4400 | 1.5135 | 0.5644 |
824
+ | 0.0441 | 4410 | 1.522 | - |
825
+ | 0.0442 | 4420 | 1.5286 | - |
826
+ | 0.0443 | 4430 | 1.5328 | - |
827
+ | 0.0444 | 4440 | 1.5354 | - |
828
+ | 0.0445 | 4450 | 1.5252 | - |
829
+ | 0.0446 | 4460 | 1.5127 | - |
830
+ | 0.0447 | 4470 | 1.5116 | - |
831
+ | 0.0448 | 4480 | 1.5237 | - |
832
+ | 0.0449 | 4490 | 1.5265 | - |
833
+ | 0.045 | 4500 | 1.5298 | 0.5649 |
834
+ | 0.0451 | 4510 | 1.5349 | - |
835
+ | 0.0452 | 4520 | 1.4997 | - |
836
+ | 0.0453 | 4530 | 1.4947 | - |
837
+ | 0.0454 | 4540 | 1.5186 | - |
838
+ | 0.0455 | 4550 | 1.487 | - |
839
+ | 0.0456 | 4560 | 1.5088 | - |
840
+ | 0.0457 | 4570 | 1.5422 | - |
841
+ | 0.0458 | 4580 | 1.4962 | - |
842
+ | 0.0459 | 4590 | 1.5193 | - |
843
+ | 0.046 | 4600 | 1.5306 | 0.5608 |
844
+ | 0.0461 | 4610 | 1.536 | - |
845
+ | 0.0462 | 4620 | 1.5334 | - |
846
+ | 0.0463 | 4630 | 1.5598 | - |
847
+ | 0.0464 | 4640 | 1.5223 | - |
848
+ | 0.0465 | 4650 | 1.5223 | - |
849
+ | 0.0466 | 4660 | 1.5277 | - |
850
+ | 0.0467 | 4670 | 1.5381 | - |
851
+ | 0.0468 | 4680 | 1.5416 | - |
852
+ | 0.0469 | 4690 | 1.5056 | - |
853
+ | 0.047 | 4700 | 1.5077 | 0.5655 |
854
+ | 0.0471 | 4710 | 1.5045 | - |
855
+ | 0.0472 | 4720 | 1.5135 | - |
856
+ | 0.0473 | 4730 | 1.5284 | - |
857
+ | 0.0474 | 4740 | 1.5331 | - |
858
+ | 0.0475 | 4750 | 1.5194 | - |
859
+ | 0.0476 | 4760 | 1.5286 | - |
860
+ | 0.0477 | 4770 | 1.536 | - |
861
+ | 0.0478 | 4780 | 1.4984 | - |
862
+ | 0.0479 | 4790 | 1.5086 | - |
863
+ | 0.048 | 4800 | 1.5137 | 0.5703 |
864
+ | 0.0481 | 4810 | 1.5421 | - |
865
+ | 0.0482 | 4820 | 1.5131 | - |
866
+ | 0.0483 | 4830 | 1.5084 | - |
867
+ | 0.0484 | 4840 | 1.5006 | - |
868
+ | 0.0485 | 4850 | 1.5141 | - |
869
+ | 0.0486 | 4860 | 1.503 | - |
870
+ | 0.0487 | 4870 | 1.511 | - |
871
+ | 0.0488 | 4880 | 1.5175 | - |
872
+ | 0.0489 | 4890 | 1.5088 | - |
873
+ | 0.049 | 4900 | 1.5019 | 0.5711 |
874
+ | 0.0491 | 4910 | 1.5359 | - |
875
+ | 0.0492 | 4920 | 1.5218 | - |
876
+ | 0.0493 | 4930 | 1.5043 | - |
877
+ | 0.0494 | 4940 | 1.5059 | - |
878
+ | 0.0495 | 4950 | 1.4943 | - |
879
+ | 0.0496 | 4960 | 1.5269 | - |
880
+ | 0.0497 | 4970 | 1.517 | - |
881
+ | 0.0498 | 4980 | 1.5135 | - |
882
+ | 0.0499 | 4990 | 1.5204 | - |
883
+ | 0.05 | 5000 | 1.4983 | 0.5700 |
884
+ | 0.0501 | 5010 | 1.5271 | - |
885
+ | 0.0502 | 5020 | 1.4929 | - |
886
+ | 0.0503 | 5030 | 1.4947 | - |
887
+ | 0.0504 | 5040 | 1.4883 | - |
888
+ | 0.0505 | 5050 | 1.523 | - |
889
+ | 0.0506 | 5060 | 1.5092 | - |
890
+ | 0.0507 | 5070 | 1.5262 | - |
891
+ | 0.0508 | 5080 | 1.4859 | - |
892
+ | 0.0509 | 5090 | 1.5059 | - |
893
+ | 0.051 | 5100 | 1.5293 | 0.5677 |
894
+ | 0.0511 | 5110 | 1.4962 | - |
895
+ | 0.0512 | 5120 | 1.5192 | - |
896
+ | 0.0513 | 5130 | 1.5115 | - |
897
+ | 0.0514 | 5140 | 1.5152 | - |
898
+ | 0.0515 | 5150 | 1.4948 | - |
899
+ | 0.0516 | 5160 | 1.5376 | - |
900
+ | 0.0517 | 5170 | 1.5015 | - |
901
+ | 0.0518 | 5180 | 1.5119 | - |
902
+ | 0.0519 | 5190 | 1.4926 | - |
903
+ | 0.052 | 5200 | 1.5235 | 0.5663 |
904
+ | 0.0521 | 5210 | 1.5158 | - |
905
+ | 0.0522 | 5220 | 1.5072 | - |
906
+ | 0.0523 | 5230 | 1.5264 | - |
907
+ | 0.0524 | 5240 | 1.5026 | - |
908
+ | 0.0525 | 5250 | 1.5042 | - |
909
+ | 0.0526 | 5260 | 1.5096 | - |
910
+ | 0.0527 | 5270 | 1.5022 | - |
911
+ | 0.0528 | 5280 | 1.5038 | - |
912
+ | 0.0529 | 5290 | 1.4903 | - |
913
+ | 0.053 | 5300 | 1.5284 | 0.5684 |
914
+ | 0.0531 | 5310 | 1.5009 | - |
915
+ | 0.0532 | 5320 | 1.505 | - |
916
+ | 0.0533 | 5330 | 1.5288 | - |
917
+ | 0.0534 | 5340 | 1.501 | - |
918
+ | 0.0535 | 5350 | 1.5143 | - |
919
+ | 0.0536 | 5360 | 1.5071 | - |
920
+ | 0.0537 | 5370 | 1.4976 | - |
921
+ | 0.0538 | 5380 | 1.5092 | - |
922
+ | 0.0539 | 5390 | 1.5082 | - |
923
+ | 0.054 | 5400 | 1.5056 | 0.5716 |
924
+ | 0.0541 | 5410 | 1.4934 | - |
925
+ | 0.0542 | 5420 | 1.5159 | - |
926
+ | 0.0543 | 5430 | 1.5059 | - |
927
+ | 0.0544 | 5440 | 1.4937 | - |
928
+ | 0.0545 | 5450 | 1.5223 | - |
929
+ | 0.0546 | 5460 | 1.4989 | - |
930
+ | 0.0547 | 5470 | 1.5149 | - |
931
+ | 0.0548 | 5480 | 1.5003 | - |
932
+ | 0.0549 | 5490 | 1.521 | - |
933
+ | 0.055 | 5500 | 1.4959 | 0.5779 |
934
+ | 0.0551 | 5510 | 1.5074 | - |
935
+ | 0.0552 | 5520 | 1.5071 | - |
936
+ | 0.0553 | 5530 | 1.5173 | - |
937
+ | 0.0554 | 5540 | 1.5111 | - |
938
+ | 0.0555 | 5550 | 1.5017 | - |
939
+ | 0.0556 | 5560 | 1.5296 | - |
940
+ | 0.0557 | 5570 | 1.5147 | - |
941
+ | 0.0558 | 5580 | 1.524 | - |
942
+ | 0.0559 | 5590 | 1.4936 | - |
943
+ | 0.056 | 5600 | 1.5111 | 0.5684 |
944
+ | 0.0561 | 5610 | 1.5147 | - |
945
+ | 0.0562 | 5620 | 1.5002 | - |
946
+ | 0.0563 | 5630 | 1.5048 | - |
947
+ | 0.0564 | 5640 | 1.5093 | - |
948
+ | 0.0565 | 5650 | 1.5093 | - |
949
+ | 0.0566 | 5660 | 1.4795 | - |
950
+ | 0.0567 | 5670 | 1.5149 | - |
951
+ | 0.0568 | 5680 | 1.4881 | - |
952
+ | 0.0569 | 5690 | 1.4986 | - |
953
+ | 0.057 | 5700 | 1.4929 | 0.5692 |
954
+ | 0.0571 | 5710 | 1.5186 | - |
955
+ | 0.0572 | 5720 | 1.4938 | - |
956
+ | 0.0573 | 5730 | 1.4943 | - |
957
+ | 0.0574 | 5740 | 1.4926 | - |
958
+ | 0.0575 | 5750 | 1.4672 | - |
959
+ | 0.0576 | 5760 | 1.5036 | - |
960
+ | 0.0577 | 5770 | 1.511 | - |
961
+ | 0.0578 | 5780 | 1.4892 | - |
962
+ | 0.0579 | 5790 | 1.4983 | - |
963
+ | 0.058 | 5800 | 1.4914 | 0.5704 |
964
+ | 0.0581 | 5810 | 1.4883 | - |
965
+ | 0.0582 | 5820 | 1.5052 | - |
966
+ | 0.0583 | 5830 | 1.5066 | - |
967
+ | 0.0584 | 5840 | 1.4904 | - |
968
+ | 0.0585 | 5850 | 1.5114 | - |
969
+ | 0.0586 | 5860 | 1.4984 | - |
970
+ | 0.0587 | 5870 | 1.4827 | - |
971
+ | 0.0588 | 5880 | 1.4676 | - |
972
+ | 0.0589 | 5890 | 1.514 | - |
973
+ | 0.059 | 5900 | 1.509 | 0.5688 |
974
+ | 0.0591 | 5910 | 1.5094 | - |
975
+ | 0.0592 | 5920 | 1.4902 | - |
976
+ | 0.0593 | 5930 | 1.4849 | - |
977
+ | 0.0594 | 5940 | 1.5159 | - |
978
+ | 0.0595 | 5950 | 1.5012 | - |
979
+ | 0.0596 | 5960 | 1.5068 | - |
980
+ | 0.0597 | 5970 | 1.5054 | - |
981
+ | 0.0598 | 5980 | 1.4722 | - |
982
+ | 0.0599 | 5990 | 1.4975 | - |
983
+ | 0.06 | 6000 | 1.4843 | 0.5623 |
984
+ | 0.0601 | 6010 | 1.4726 | - |
985
+ | 0.0602 | 6020 | 1.517 | - |
986
+ | 0.0603 | 6030 | 1.4957 | - |
987
+ | 0.0604 | 6040 | 1.508 | - |
988
+ | 0.0605 | 6050 | 1.5113 | - |
989
+ | 0.0606 | 6060 | 1.4903 | - |
990
+ | 0.0607 | 6070 | 1.4761 | - |
991
+ | 0.0608 | 6080 | 1.5226 | - |
992
+ | 0.0609 | 6090 | 1.5228 | - |
993
+ | 0.061 | 6100 | 1.4836 | 0.5643 |
994
+ | 0.0611 | 6110 | 1.4926 | - |
995
+ | 0.0612 | 6120 | 1.4968 | - |
996
+ | 0.0613 | 6130 | 1.4954 | - |
997
+ | 0.0614 | 6140 | 1.5209 | - |
998
+ | 0.0615 | 6150 | 1.4857 | - |
999
+ | 0.0616 | 6160 | 1.4881 | - |
1000
+ | 0.0617 | 6170 | 1.504 | - |
1001
+ | 0.0618 | 6180 | 1.464 | - |
1002
+ | 0.0619 | 6190 | 1.5003 | - |
1003
+ | 0.062 | 6200 | 1.4858 | 0.5643 |
1004
+
1005
+ </details>
1006
+
1007
+ ### Framework Versions
1008
+ - Python: 3.12.12
1009
+ - Sentence Transformers: 5.2.0
1010
+ - Transformers: 4.57.3
1011
+ - PyTorch: 2.9.0+cu128
1012
+ - Accelerate: 1.12.0
1013
+ - Datasets: 4.4.2
1014
+ - Tokenizers: 0.22.1
1015
+
1016
+ ## Citation
1017
+
1018
+ ### BibTeX
1019
+
1020
+ #### Sentence Transformers
1021
+ ```bibtex
1022
+ @inproceedings{reimers-2019-sentence-bert,
1023
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
1024
+ author = "Reimers, Nils and Gurevych, Iryna",
1025
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
1026
+ month = "11",
1027
+ year = "2019",
1028
+ publisher = "Association for Computational Linguistics",
1029
+ url = "https://arxiv.org/abs/1908.10084",
1030
+ }
1031
+ ```
1032
+
1033
+ #### CachedMultipleNegativesRankingLoss
1034
+ ```bibtex
1035
+ @misc{gao2021scaling,
1036
+ title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
1037
+ author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
1038
+ year={2021},
1039
+ eprint={2101.06983},
1040
+ archivePrefix={arXiv},
1041
+ primaryClass={cs.LG}
1042
+ }
1043
+ ```
1044
+
1045
+ <!--
1046
+ ## Glossary
1047
+
1048
+ *Clearly define terms in order to be accessible across audiences.*
1049
+ -->
1050
+
1051
+ <!--
1052
+ ## Model Card Authors
1053
+
1054
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
1055
+ -->
1056
+
1057
+ <!--
1058
+ ## Model Card Contact
1059
+
1060
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
1061
+ -->
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
chat_template.jinja ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
+ {%- elif message.role == "assistant" %}
29
+ {%- set content = message.content %}
30
+ {%- set reasoning_content = '' %}
31
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
+ {%- set reasoning_content = message.reasoning_content %}
33
+ {%- else %}
34
+ {%- if '</think>' in message.content %}
35
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
+ {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
+ {%- endif %}
38
+ {%- endif %}
39
+ {%- if loop.index0 > ns.last_query_index %}
40
+ {%- if loop.last or (not loop.last and reasoning_content) %}
41
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
+ {%- else %}
43
+ {{- '<|im_start|>' + message.role + '\n' + content }}
44
+ {%- endif %}
45
+ {%- else %}
46
+ {{- '<|im_start|>' + message.role + '\n' + content }}
47
+ {%- endif %}
48
+ {%- if message.tool_calls %}
49
+ {%- for tool_call in message.tool_calls %}
50
+ {%- if (loop.first and content) or (not loop.first) %}
51
+ {{- '\n' }}
52
+ {%- endif %}
53
+ {%- if tool_call.function %}
54
+ {%- set tool_call = tool_call.function %}
55
+ {%- endif %}
56
+ {{- '<tool_call>\n{"name": "' }}
57
+ {{- tool_call.name }}
58
+ {{- '", "arguments": ' }}
59
+ {%- if tool_call.arguments is string %}
60
+ {{- tool_call.arguments }}
61
+ {%- else %}
62
+ {{- tool_call.arguments | tojson }}
63
+ {%- endif %}
64
+ {{- '}\n</tool_call>' }}
65
+ {%- endfor %}
66
+ {%- endif %}
67
+ {{- '<|im_end|>\n' }}
68
+ {%- elif message.role == "tool" %}
69
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
+ {{- '<|im_start|>user' }}
71
+ {%- endif %}
72
+ {{- '\n<tool_response>\n' }}
73
+ {{- message.content }}
74
+ {{- '\n</tool_response>' }}
75
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
+ {{- '<|im_end|>\n' }}
77
+ {%- endif %}
78
+ {%- endif %}
79
+ {%- endfor %}
80
+ {%- if add_generation_prompt %}
81
+ {{- '<|im_start|>assistant\n' }}
82
+ {%- if enable_thinking is defined and enable_thinking is false %}
83
+ {{- '<think>\n\n</think>\n\n' }}
84
+ {%- endif %}
85
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3Model"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "float32",
9
+ "eos_token_id": 151643,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention"
44
+ ],
45
+ "max_position_embeddings": 32768,
46
+ "max_window_layers": 28,
47
+ "model_type": "qwen3",
48
+ "num_attention_heads": 16,
49
+ "num_hidden_layers": 28,
50
+ "num_key_value_heads": 8,
51
+ "rms_norm_eps": 1e-06,
52
+ "rope_scaling": null,
53
+ "rope_theta": 1000000,
54
+ "sliding_window": null,
55
+ "tie_word_embeddings": true,
56
+ "transformers_version": "4.57.3",
57
+ "use_cache": true,
58
+ "use_sliding_window": false,
59
+ "vocab_size": 151936
60
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.2.0",
5
+ "transformers": "4.57.3",
6
+ "pytorch": "2.7.1+cu126"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae915626728724fb591d033d35e06362b12cfd17f82a7f25877d881fbf7705e9
3
+ size 4969538328
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa07060398b46d8a12e5036eadae4fcdd95dfe72dbec0546b2805ba2a25b45b
3
+ size 1912795064
model.safetensors.index.json ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 1720574976,
4
+ "total_size": 6882299904
5
+ },
6
+ "weight_map": {
7
+ "embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
14
+ "layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
15
+ "layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
16
+ "layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
17
+ "layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
18
+ "layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
19
+ "layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
20
+ "layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
21
+ "layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
22
+ "layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
23
+ "layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
24
+ "layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
25
+ "layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
26
+ "layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
27
+ "layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
28
+ "layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
29
+ "layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
30
+ "layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
31
+ "layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
32
+ "layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
33
+ "layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
34
+ "layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
35
+ "layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
36
+ "layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
37
+ "layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
38
+ "layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
39
+ "layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
40
+ "layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
41
+ "layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
42
+ "layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
43
+ "layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
44
+ "layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
45
+ "layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
46
+ "layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
47
+ "layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
48
+ "layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
49
+ "layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
50
+ "layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
51
+ "layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
52
+ "layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
53
+ "layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
54
+ "layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
55
+ "layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
56
+ "layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
57
+ "layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
58
+ "layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
+ "layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
+ "layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
61
+ "layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
62
+ "layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
63
+ "layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
64
+ "layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
65
+ "layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
66
+ "layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
67
+ "layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
68
+ "layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
69
+ "layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
70
+ "layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
71
+ "layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
72
+ "layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
73
+ "layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
74
+ "layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
75
+ "layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
76
+ "layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
77
+ "layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
78
+ "layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
79
+ "layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
80
+ "layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
81
+ "layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
82
+ "layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
83
+ "layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
84
+ "layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
85
+ "layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
86
+ "layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
87
+ "layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
88
+ "layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
89
+ "layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
91
+ "layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
92
+ "layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
93
+ "layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
94
+ "layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
95
+ "layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
96
+ "layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
97
+ "layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
98
+ "layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
99
+ "layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
100
+ "layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
101
+ "layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
102
+ "layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
103
+ "layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
104
+ "layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
105
+ "layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
108
+ "layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
109
+ "layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
110
+ "layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
111
+ "layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
112
+ "layers.17.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
113
+ "layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
114
+ "layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
115
+ "layers.17.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
116
+ "layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
117
+ "layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
118
+ "layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
119
+ "layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
120
+ "layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
121
+ "layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
122
+ "layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
123
+ "layers.18.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
124
+ "layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
125
+ "layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
126
+ "layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
127
+ "layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
128
+ "layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
129
+ "layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
130
+ "layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
131
+ "layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
132
+ "layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
133
+ "layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
134
+ "layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
135
+ "layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
136
+ "layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
137
+ "layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
138
+ "layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
139
+ "layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
140
+ "layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
141
+ "layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
142
+ "layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
143
+ "layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
144
+ "layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
145
+ "layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
146
+ "layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
147
+ "layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
148
+ "layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
149
+ "layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
150
+ "layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
152
+ "layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
153
+ "layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
154
+ "layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
155
+ "layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
156
+ "layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
157
+ "layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
158
+ "layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
159
+ "layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
160
+ "layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
161
+ "layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
162
+ "layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
163
+ "layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
164
+ "layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
165
+ "layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
166
+ "layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
167
+ "layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
168
+ "layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
169
+ "layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
170
+ "layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
171
+ "layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
172
+ "layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
173
+ "layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
174
+ "layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
175
+ "layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
176
+ "layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
177
+ "layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
178
+ "layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
179
+ "layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
180
+ "layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
181
+ "layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
182
+ "layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
183
+ "layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
184
+ "layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
185
+ "layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
186
+ "layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
187
+ "layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
188
+ "layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
189
+ "layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
190
+ "layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
191
+ "layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
192
+ "layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
193
+ "layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
194
+ "layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
195
+ "layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
196
+ "layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
197
+ "layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
198
+ "layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
199
+ "layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
200
+ "layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
201
+ "layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
202
+ "layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
203
+ "layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
204
+ "layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
+ "layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
206
+ "layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
207
+ "layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
208
+ "layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
209
+ "layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
210
+ "layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
211
+ "layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
212
+ "layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
213
+ "layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
214
+ "layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
215
+ "layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
216
+ "layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
217
+ "layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
218
+ "layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
219
+ "layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
220
+ "layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
221
+ "layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
222
+ "layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
223
+ "layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
224
+ "layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
225
+ "layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
226
+ "layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
227
+ "layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
228
+ "layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
229
+ "layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
230
+ "layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
231
+ "layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
232
+ "layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
233
+ "layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
234
+ "layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
235
+ "layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
236
+ "layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
237
+ "layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
238
+ "layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
239
+ "layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
240
+ "layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
241
+ "layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
242
+ "layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
243
+ "layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
244
+ "layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
245
+ "layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
246
+ "layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
247
+ "layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
248
+ "layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
249
+ "layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
250
+ "layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
251
+ "layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
252
+ "layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
253
+ "layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
254
+ "layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
255
+ "layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
256
+ "layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
257
+ "layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
258
+ "layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
259
+ "layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
260
+ "layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
261
+ "layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
262
+ "layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
263
+ "layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
264
+ "layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
265
+ "layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
266
+ "layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
267
+ "layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
268
+ "layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
269
+ "layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
270
+ "layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
271
+ "layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
272
+ "layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
273
+ "layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
274
+ "layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
275
+ "layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
276
+ "layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
277
+ "layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
278
+ "layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
279
+ "layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
280
+ "layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
281
+ "layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
282
+ "layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
283
+ "layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
284
+ "layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
285
+ "layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
286
+ "layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
287
+ "layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
288
+ "layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
289
+ "layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
290
+ "layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
291
+ "layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
292
+ "layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
293
+ "layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
294
+ "layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
295
+ "layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
296
+ "layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
297
+ "layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
298
+ "layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
299
+ "layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
300
+ "layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
301
+ "layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
302
+ "layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
303
+ "layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
304
+ "layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
305
+ "layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
306
+ "layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
307
+ "layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
308
+ "layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
309
+ "layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
310
+ "layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
311
+ "layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
312
+ "layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
313
+ "layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
314
+ "layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
315
+ "layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
316
+ "norm.weight": "model-00002-of-00002.safetensors"
317
+ }
318
+ }
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 32768,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aed98cff73772352c260a48579f3b212cb2b06ce0553446799ebf0b5465b0673
3
+ size 11422922
tokenizer_config.json ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|endoftext|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "max_length": 32768,
235
+ "model_max_length": 32768,
236
+ "pad_to_multiple_of": null,
237
+ "pad_token": "<|endoftext|>",
238
+ "pad_token_type_id": 0,
239
+ "padding_side": "right",
240
+ "split_special_tokens": false,
241
+ "stride": 0,
242
+ "tokenizer_class": "Qwen2Tokenizer",
243
+ "truncation_side": "right",
244
+ "truncation_strategy": "longest_first",
245
+ "unk_token": null
246
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff