Rnfudge commited on 24 days ago

Commit

ce2afea

verified ·

1 Parent(s): 72edaa7

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

.gitattributes +1 -0
1_Pooling/config.json +10 -0
README.md +843 -0
added_tokens.json +24 -0
chat_template.jinja +54 -0
config.json +71 -0
config_sentence_transformers.json +14 -0
merges.txt +0 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +405 -0
modules.json +20 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +209 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 2560,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": false,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": true,
+    "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,843 @@

+---
+tags:
+- unsloth
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:223748
+- loss:MultipleNegativesRankingLoss
+widget:
+- source_sentence: What is the significance of the IPv6 multicast address ff02::1?
+  sentences:
+  - Felt board for classroom activities
+  - In the provided network output, the frequent appearance of `ff020000000000000000000000000001`
+    across various interfaces like `lo`, `eth0`, and `eth1` indicates that these interfaces
+    are correctly configured for basic IPv6 operations. Every active IPv6 interface
+    on a segment must listen for messages sent to `ff02::1` to participate in essential
+    link-local protocols, making its presence a standard and expected entry.
+  - Not all customizations are supported across all snapd image types or models. For
+    example, certain customizations might be unsupported for UC20+ or classic models,
+    leading to errors. Additionally, if a gadget snap itself defines `defaults` in
+    its `meta/gadget.yaml`, these can be overridden or complemented by the `Customizations`
+    provided during the `SetupSeed` call, affecting system services like SSH.
+- source_sentence: vein
+  sentences:
+  - blood vessel
+  - 'The `hkdf.Key` function requires several inputs: the underlying hash function
+    for HMAC (e.g., `sha256.New`), the master `secret` material, an optional `salt`
+    value, context-specific `info`, and the desired `keyLen` for the output derived
+    key. These parameters collectively guide the key derivation process.'
+  - egg-laying
+- source_sentence: How are special file types determined in file status?
+  sentences:
+  - Integrated into the *ensure loop*, the `TaskRunner`'s `Ensure` method is invoked
+    periodically to manage task execution. It's responsible for spawning goroutines
+    to concurrently execute task handlers, whether for their primary 'do' logic or
+    their 'undo' logic in case of failures. High-level system parts can also trigger
+    its execution proactively using `State.EnsureBefore`.
+  - File type identification within the `fileStat` population involves a critical
+    step where the `fs.sys.Mode` value is masked with `syscall.S_IFMT`. This operation
+    allows the function to discern whether the file is a block device (`S_IFBLK`),
+    a character device (`S_IFCHR`), a named pipe (`S_IFIFO`), a socket (`S_IFSOCK`),
+    or a regular file (`S_IFREG`), applying the appropriate `FileMode` flags.
+  - Volatility acceptance
+- source_sentence: mitre
+  sentences:
+  - ocean liner
+  - It becomes necessary because, during the initial `mmap` of an output buffer, no
+    code signature typically exists. After the signature is finally created, the kernel's
+    cached view might not reflect this change. Therefore, `purgeSignatureCache` explicitly
+    clears this cache to prevent problems related to stale signature information.
+  - Clerical cap
+- source_sentence: craniofacial
+  sentences:
+  - head and face structure
+  - Planned destruction of structures using explosives or machinery
+  - Anchor-positive pairs are fundamental to contrastive learning, serving to define
+    what the model should consider as semantically similar data points, guiding it
+    to learn meaningful representations.
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+---
+# SentenceTransformer
+This model was finetuned with [Unsloth](https://github.com/unslothai/unsloth).
+[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
+This is a [sentence-transformers](https://www.SBERT.net) model trained. It maps sentences & paragraphs to a 2560-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
+- **Maximum Sequence Length:** 8192 tokens
+- **Output Dimensionality:** 2560 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'PeftModelForFeatureExtraction'})
+  (1): Pooling({'word_embedding_dimension': 2560, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': True, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    'craniofacial',
+    'head and face structure',
+    'Anchor-positive pairs are fundamental to contrastive learning, serving to define what the model should consider as semantically similar data points, guiding it to learn meaningful representations.',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 2560]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+# tensor([[1.0000, 0.7268, 0.0036],
+#         [0.7268, 1.0000, 0.0179],
+#         [0.0036, 0.0179, 1.0000]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 223,748 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                           | positive                                                                           |
+  |:--------|:---------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
+  | type    | string                                                                           | string                                                                             |
+  | details | <ul><li>min: 2 tokens</li><li>mean: 8.95 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>min: 2 tokens</li><li>mean: 38.48 tokens</li><li>max: 124 tokens</li></ul> |
+* Samples:
+  | anchor                                                                                         | positive                                                                                                                                                                                                                                                                                                                                                                                |
+  |:-----------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>groupthink</code>                                                                        | <code>Psychological tendency for group conformity</code>                                                                                                                                                                                                                                                                                                                                |
+  | <code>customs and border protection</code>                                                     | <code>DHS component enforcing trade and immigration laws</code>                                                                                                                                                                                                                                                                                                                         |
+  | <code>What is the meaning and purpose of the `//go:noescape` directive in Go functions?</code> | <code>The `//go:noescape` comment is a hint to the Go compiler. It asserts that none of the pointer parameters of the decorated function will escape the function's stack frame. This is primarily used for performance tuning in low-level code, ensuring that objects pointed to by function arguments are not allocated on the heap, thus avoiding garbage collection cycles.</code> |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "gather_across_devices": false,
+      "directions": [
+          "query_to_doc"
+      ],
+      "partition_mode": "joint",
+      "hardness_mode": null,
+      "hardness_strength": 0.0
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 64
+- `gradient_accumulation_steps`: 8
+- `learning_rate`: 3e-05
+- `num_train_epochs`: 1
+- `lr_scheduler_type`: constant_with_warmup
+- `warmup_ratio`: 0.03
+- `bf16`: True
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 64
+- `per_device_eval_batch_size`: 8
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 8
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 3e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: constant_with_warmup
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.03
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `parallelism_config`: None
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch_fused
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+<details><summary>Click to expand</summary>
+| Epoch  | Step | Training Loss |
+|:------:|:----:|:-------------:|
+| 0.0023 | 1    | 0.5184        |
+| 0.0046 | 2    | 0.5683        |
+| 0.0069 | 3    | 0.5821        |
+| 0.0092 | 4    | 0.4948        |
+| 0.0114 | 5    | 0.4001        |
+| 0.0137 | 6    | 0.3097        |
+| 0.0160 | 7    | 0.257         |
+| 0.0183 | 8    | 0.2752        |
+| 0.0206 | 9    | 0.2311        |
+| 0.0229 | 10   | 0.1433        |
+| 0.0252 | 11   | 0.2507        |
+| 0.0275 | 12   | 0.1944        |
+| 0.0297 | 13   | 0.2052        |
+| 0.0320 | 14   | 0.1044        |
+| 0.0343 | 15   | 0.2027        |
+| 0.0366 | 16   | 0.1969        |
+| 0.0389 | 17   | 0.1833        |
+| 0.0412 | 18   | 0.1641        |
+| 0.0435 | 19   | 0.1629        |
+| 0.0458 | 20   | 0.1702        |
+| 0.0480 | 21   | 0.1855        |
+| 0.0503 | 22   | 0.1697        |
+| 0.0526 | 23   | 0.116         |
+| 0.0549 | 24   | 0.1373        |
+| 0.0572 | 25   | 0.1323        |
+| 0.0595 | 26   | 0.1349        |
+| 0.0618 | 27   | 0.1199        |
+| 0.0641 | 28   | 0.1353        |
+| 0.0663 | 29   | 0.143         |
+| 0.0686 | 30   | 0.1305        |
+| 0.0709 | 31   | 0.1088        |
+| 0.0732 | 32   | 0.0908        |
+| 0.0755 | 33   | 0.1502        |
+| 0.0778 | 34   | 0.1139        |
+| 0.0801 | 35   | 0.1311        |
+| 0.0824 | 36   | 0.1291        |
+| 0.0846 | 37   | 0.0977        |
+| 0.0869 | 38   | 0.0962        |
+| 0.0892 | 39   | 0.1166        |
+| 0.0915 | 40   | 0.0965        |
+| 0.0938 | 41   | 0.1242        |
+| 0.0961 | 42   | 0.0705        |
+| 0.0984 | 43   | 0.0813        |
+| 0.1007 | 44   | 0.1545        |
+| 0.1029 | 45   | 0.0868        |
+| 0.1052 | 46   | 0.0987        |
+| 0.1075 | 47   | 0.0938        |
+| 0.1098 | 48   | 0.1086        |
+| 0.1121 | 49   | 0.0982        |
+| 0.1144 | 50   | 0.0817        |
+| 0.1167 | 51   | 0.0527        |
+| 0.1190 | 52   | 0.0986        |
+| 0.1212 | 53   | 0.098         |
+| 0.1235 | 54   | 0.1074        |
+| 0.1258 | 55   | 0.1396        |
+| 0.1281 | 56   | 0.1101        |
+| 0.1304 | 57   | 0.0829        |
+| 0.1327 | 58   | 0.1261        |
+| 0.1350 | 59   | 0.048         |
+| 0.1373 | 60   | 0.1215        |
+| 0.1395 | 61   | 0.0981        |
+| 0.1418 | 62   | 0.0739        |
+| 0.1441 | 63   | 0.0525        |
+| 0.1464 | 64   | 0.0757        |
+| 0.1487 | 65   | 0.0543        |
+| 0.1510 | 66   | 0.0878        |
+| 0.1533 | 67   | 0.0791        |
+| 0.1556 | 68   | 0.0816        |
+| 0.1578 | 69   | 0.0999        |
+| 0.1601 | 70   | 0.086         |
+| 0.1624 | 71   | 0.0775        |
+| 0.1647 | 72   | 0.1048        |
+| 0.1670 | 73   | 0.0552        |
+| 0.1693 | 74   | 0.0619        |
+| 0.1716 | 75   | 0.0667        |
+| 0.1739 | 76   | 0.0787        |
+| 0.1762 | 77   | 0.1022        |
+| 0.1784 | 78   | 0.0937        |
+| 0.1807 | 79   | 0.0751        |
+| 0.1830 | 80   | 0.0642        |
+| 0.1853 | 81   | 0.0508        |
+| 0.1876 | 82   | 0.1169        |
+| 0.1899 | 83   | 0.09          |
+| 0.1922 | 84   | 0.0725        |
+| 0.1945 | 85   | 0.0476        |
+| 0.1967 | 86   | 0.0737        |
+| 0.1990 | 87   | 0.0968        |
+| 0.2013 | 88   | 0.0988        |
+| 0.2036 | 89   | 0.0575        |
+| 0.2059 | 90   | 0.0629        |
+| 0.2082 | 91   | 0.0627        |
+| 0.2105 | 92   | 0.0565        |
+| 0.2128 | 93   | 0.0696        |
+| 0.2150 | 94   | 0.0413        |
+| 0.2173 | 95   | 0.0625        |
+| 0.2196 | 96   | 0.0593        |
+| 0.2219 | 97   | 0.0511        |
+| 0.2242 | 98   | 0.1168        |
+| 0.2265 | 99   | 0.0601        |
+| 0.2288 | 100  | 0.0919        |
+| 0.2311 | 101  | 0.0471        |
+| 0.2333 | 102  | 0.0701        |
+| 0.2356 | 103  | 0.1032        |
+| 0.2379 | 104  | 0.0823        |
+| 0.2402 | 105  | 0.0825        |
+| 0.2425 | 106  | 0.0626        |
+| 0.2448 | 107  | 0.0821        |
+| 0.2471 | 108  | 0.0532        |
+| 0.2494 | 109  | 0.1171        |
+| 0.2516 | 110  | 0.0814        |
+| 0.2539 | 111  | 0.1167        |
+| 0.2562 | 112  | 0.0918        |
+| 0.2585 | 113  | 0.0704        |
+| 0.2608 | 114  | 0.0726        |
+| 0.2631 | 115  | 0.0522        |
+| 0.2654 | 116  | 0.0628        |
+| 0.2677 | 117  | 0.0716        |
+| 0.2699 | 118  | 0.0676        |
+| 0.2722 | 119  | 0.0616        |
+| 0.2745 | 120  | 0.0505        |
+| 0.2768 | 121  | 0.0653        |
+| 0.2791 | 122  | 0.051         |
+| 0.2814 | 123  | 0.0888        |
+| 0.2837 | 124  | 0.1061        |
+| 0.2860 | 125  | 0.104         |
+| 0.2882 | 126  | 0.095         |
+| 0.2905 | 127  | 0.0715        |
+| 0.2928 | 128  | 0.0766        |
+| 0.2951 | 129  | 0.076         |
+| 0.2974 | 130  | 0.1154        |
+| 0.2997 | 131  | 0.0463        |
+| 0.3020 | 132  | 0.0596        |
+| 0.3043 | 133  | 0.0705        |
+| 0.3065 | 134  | 0.0654        |
+| 0.3088 | 135  | 0.0802        |
+| 0.3111 | 136  | 0.0882        |
+| 0.3134 | 137  | 0.0872        |
+| 0.3157 | 138  | 0.0853        |
+| 0.3180 | 139  | 0.0661        |
+| 0.3203 | 140  | 0.0633        |
+| 0.3226 | 141  | 0.0784        |
+| 0.3248 | 142  | 0.0832        |
+| 0.3271 | 143  | 0.0799        |
+| 0.3294 | 144  | 0.0954        |
+| 0.3317 | 145  | 0.0744        |
+| 0.3340 | 146  | 0.0559        |
+| 0.3363 | 147  | 0.0892        |
+| 0.3386 | 148  | 0.0424        |
+| 0.3409 | 149  | 0.0742        |
+| 0.3432 | 150  | 0.1025        |
+| 0.3454 | 151  | 0.0814        |
+| 0.3477 | 152  | 0.051         |
+| 0.3500 | 153  | 0.1313        |
+| 0.3523 | 154  | 0.0645        |
+| 0.3546 | 155  | 0.1006        |
+| 0.3569 | 156  | 0.0524        |
+| 0.3592 | 157  | 0.0635        |
+| 0.3615 | 158  | 0.0467        |
+| 0.3637 | 159  | 0.0741        |
+| 0.3660 | 160  | 0.0593        |
+| 0.3683 | 161  | 0.0698        |
+| 0.3706 | 162  | 0.0835        |
+| 0.3729 | 163  | 0.0715        |
+| 0.3752 | 164  | 0.0628        |
+| 0.3775 | 165  | 0.0772        |
+| 0.3798 | 166  | 0.1167        |
+| 0.3820 | 167  | 0.0981        |
+| 0.3843 | 168  | 0.0595        |
+| 0.3866 | 169  | 0.041         |
+| 0.3889 | 170  | 0.0728        |
+| 0.3912 | 171  | 0.0937        |
+| 0.3935 | 172  | 0.0757        |
+| 0.3958 | 173  | 0.0603        |
+| 0.3981 | 174  | 0.0542        |
+| 0.4003 | 175  | 0.0701        |
+| 0.4026 | 176  | 0.0372        |
+| 0.4049 | 177  | 0.125         |
+| 0.4072 | 178  | 0.0545        |
+| 0.4095 | 179  | 0.0476        |
+| 0.4118 | 180  | 0.0516        |
+| 0.4141 | 181  | 0.1243        |
+| 0.4164 | 182  | 0.0599        |
+| 0.4186 | 183  | 0.1026        |
+| 0.4209 | 184  | 0.077         |
+| 0.4232 | 185  | 0.0732        |
+| 0.4255 | 186  | 0.0798        |
+| 0.4278 | 187  | 0.0538        |
+| 0.4301 | 188  | 0.0679        |
+| 0.4324 | 189  | 0.0759        |
+| 0.4347 | 190  | 0.0761        |
+| 0.4369 | 191  | 0.0557        |
+| 0.4392 | 192  | 0.0534        |
+| 0.4415 | 193  | 0.0747        |
+| 0.4438 | 194  | 0.0672        |
+| 0.4461 | 195  | 0.0376        |
+| 0.4484 | 196  | 0.0466        |
+| 0.4507 | 197  | 0.0783        |
+| 0.4530 | 198  | 0.0864        |
+| 0.4552 | 199  | 0.0423        |
+| 0.4575 | 200  | 0.0708        |
+| 0.4598 | 201  | 0.0429        |
+| 0.4621 | 202  | 0.0718        |
+| 0.4644 | 203  | 0.0802        |
+| 0.4667 | 204  | 0.073         |
+| 0.4690 | 205  | 0.0628        |
+| 0.4713 | 206  | 0.055         |
+| 0.4735 | 207  | 0.0468        |
+| 0.4758 | 208  | 0.0536        |
+| 0.4781 | 209  | 0.0429        |
+| 0.4804 | 210  | 0.0388        |
+| 0.4827 | 211  | 0.0962        |
+| 0.4850 | 212  | 0.0475        |
+| 0.4873 | 213  | 0.0589        |
+| 0.4896 | 214  | 0.0606        |
+| 0.4919 | 215  | 0.0512        |
+| 0.4941 | 216  | 0.0836        |
+| 0.4964 | 217  | 0.0659        |
+| 0.4987 | 218  | 0.0924        |
+| 0.5010 | 219  | 0.0711        |
+| 0.5033 | 220  | 0.0676        |
+| 0.5056 | 221  | 0.0393        |
+| 0.5079 | 222  | 0.0668        |
+| 0.5102 | 223  | 0.0511        |
+| 0.5124 | 224  | 0.0575        |
+| 0.5147 | 225  | 0.0594        |
+| 0.5170 | 226  | 0.126         |
+| 0.5193 | 227  | 0.0787        |
+| 0.5216 | 228  | 0.0509        |
+| 0.5239 | 229  | 0.0684        |
+| 0.5262 | 230  | 0.0792        |
+| 0.5285 | 231  | 0.0501        |
+| 0.5307 | 232  | 0.0988        |
+| 0.5330 | 233  | 0.0414        |
+| 0.5353 | 234  | 0.0596        |
+| 0.5376 | 235  | 0.0607        |
+| 0.5399 | 236  | 0.0556        |
+| 0.5422 | 237  | 0.0578        |
+| 0.5445 | 238  | 0.0238        |
+| 0.5468 | 239  | 0.0509        |
+| 0.5490 | 240  | 0.0431        |
+| 0.5513 | 241  | 0.0377        |
+| 0.5536 | 242  | 0.0814        |
+| 0.5559 | 243  | 0.0779        |
+| 0.5582 | 244  | 0.0574        |
+| 0.5605 | 245  | 0.0681        |
+| 0.5628 | 246  | 0.0513        |
+| 0.5651 | 247  | 0.0573        |
+| 0.5673 | 248  | 0.0758        |
+| 0.5696 | 249  | 0.0442        |
+| 0.5719 | 250  | 0.0458        |
+| 0.5742 | 251  | 0.0853        |
+| 0.5765 | 252  | 0.0825        |
+| 0.5788 | 253  | 0.065         |
+| 0.5811 | 254  | 0.0429        |
+| 0.5834 | 255  | 0.0438        |
+| 0.5856 | 256  | 0.1028        |
+| 0.5879 | 257  | 0.04          |
+| 0.5902 | 258  | 0.0406        |
+| 0.5925 | 259  | 0.0465        |
+| 0.5948 | 260  | 0.068         |
+| 0.5971 | 261  | 0.0532        |
+| 0.5994 | 262  | 0.0503        |
+| 0.6017 | 263  | 0.0421        |
+| 0.6039 | 264  | 0.0663        |
+| 0.6062 | 265  | 0.0621        |
+| 0.6085 | 266  | 0.0845        |
+| 0.6108 | 267  | 0.049         |
+| 0.6131 | 268  | 0.0503        |
+| 0.6154 | 269  | 0.0392        |
+| 0.6177 | 270  | 0.0505        |
+| 0.6200 | 271  | 0.0594        |
+| 0.6222 | 272  | 0.0573        |
+| 0.6245 | 273  | 0.0383        |
+| 0.6268 | 274  | 0.0568        |
+| 0.6291 | 275  | 0.0386        |
+| 0.6314 | 276  | 0.0573        |
+| 0.6337 | 277  | 0.0397        |
+| 0.6360 | 278  | 0.0459        |
+| 0.6383 | 279  | 0.0624        |
+| 0.6405 | 280  | 0.0706        |
+| 0.6428 | 281  | 0.0743        |
+| 0.6451 | 282  | 0.0405        |
+| 0.6474 | 283  | 0.0761        |
+| 0.6497 | 284  | 0.0583        |
+| 0.6520 | 285  | 0.0444        |
+| 0.6543 | 286  | 0.0305        |
+| 0.6566 | 287  | 0.0716        |
+| 0.6589 | 288  | 0.041         |
+| 0.6611 | 289  | 0.043         |
+| 0.6634 | 290  | 0.0574        |
+| 0.6657 | 291  | 0.0479        |
+| 0.6680 | 292  | 0.062         |
+| 0.6703 | 293  | 0.0441        |
+| 0.6726 | 294  | 0.0657        |
+| 0.6749 | 295  | 0.0515        |
+| 0.6772 | 296  | 0.0718        |
+| 0.6794 | 297  | 0.0839        |
+| 0.6817 | 298  | 0.0751        |
+| 0.6840 | 299  | 0.073         |
+| 0.6863 | 300  | 0.0656        |
+| 0.6886 | 301  | 0.0717        |
+| 0.6909 | 302  | 0.0457        |
+| 0.6932 | 303  | 0.0761        |
+| 0.6955 | 304  | 0.0557        |
+| 0.6977 | 305  | 0.0646        |
+| 0.7000 | 306  | 0.0688        |
+| 0.7023 | 307  | 0.0396        |
+| 0.7046 | 308  | 0.0444        |
+| 0.7069 | 309  | 0.0627        |
+| 0.7092 | 310  | 0.0594        |
+| 0.7115 | 311  | 0.0496        |
+| 0.7138 | 312  | 0.0406        |
+| 0.7160 | 313  | 0.0513        |
+| 0.7183 | 314  | 0.0483        |
+| 0.7206 | 315  | 0.0527        |
+| 0.7229 | 316  | 0.0646        |
+| 0.7252 | 317  | 0.0351        |
+| 0.7275 | 318  | 0.0432        |
+| 0.7298 | 319  | 0.06          |
+| 0.7321 | 320  | 0.0487        |
+| 0.7343 | 321  | 0.0398        |
+| 0.7366 | 322  | 0.0279        |
+| 0.7389 | 323  | 0.0594        |
+| 0.7412 | 324  | 0.0808        |
+| 0.7435 | 325  | 0.0461        |
+| 0.7458 | 326  | 0.0452        |
+| 0.7481 | 327  | 0.0887        |
+| 0.7504 | 328  | 0.057         |
+| 0.7526 | 329  | 0.082         |
+| 0.7549 | 330  | 0.0693        |
+| 0.7572 | 331  | 0.0245        |
+| 0.7595 | 332  | 0.0476        |
+| 0.7618 | 333  | 0.051         |
+| 0.7641 | 334  | 0.0539        |
+| 0.7664 | 335  | 0.0325        |
+| 0.7687 | 336  | 0.0431        |
+| 0.7709 | 337  | 0.0534        |
+| 0.7732 | 338  | 0.0346        |
+| 0.7755 | 339  | 0.0577        |
+| 0.7778 | 340  | 0.086         |
+| 0.7801 | 341  | 0.0705        |
+| 0.7824 | 342  | 0.0412        |
+| 0.7847 | 343  | 0.0426        |
+| 0.7870 | 344  | 0.0829        |
+| 0.7892 | 345  | 0.0767        |
+| 0.7915 | 346  | 0.0702        |
+| 0.7938 | 347  | 0.0662        |
+| 0.7961 | 348  | 0.0436        |
+| 0.7984 | 349  | 0.0292        |
+| 0.8007 | 350  | 0.0586        |
+| 0.8030 | 351  | 0.0416        |
+| 0.8053 | 352  | 0.0874        |
+| 0.8075 | 353  | 0.0378        |
+| 0.8098 | 354  | 0.036         |
+| 0.8121 | 355  | 0.0426        |
+| 0.8144 | 356  | 0.0375        |
+| 0.8167 | 357  | 0.0296        |
+| 0.8190 | 358  | 0.0535        |
+| 0.8213 | 359  | 0.0654        |
+| 0.8236 | 360  | 0.0756        |
+| 0.8259 | 361  | 0.0591        |
+| 0.8281 | 362  | 0.0603        |
+| 0.8304 | 363  | 0.0664        |
+| 0.8327 | 364  | 0.0403        |
+| 0.8350 | 365  | 0.0418        |
+| 0.8373 | 366  | 0.047         |
+| 0.8396 | 367  | 0.077         |
+| 0.8419 | 368  | 0.0597        |
+| 0.8442 | 369  | 0.0683        |
+| 0.8464 | 370  | 0.0557        |
+| 0.8487 | 371  | 0.0487        |
+| 0.8510 | 372  | 0.0499        |
+| 0.8533 | 373  | 0.0328        |
+| 0.8556 | 374  | 0.0211        |
+| 0.8579 | 375  | 0.0411        |
+| 0.8602 | 376  | 0.0648        |
+| 0.8625 | 377  | 0.0583        |
+| 0.8647 | 378  | 0.0483        |
+| 0.8670 | 379  | 0.0362        |
+| 0.8693 | 380  | 0.0616        |
+| 0.8716 | 381  | 0.0634        |
+| 0.8739 | 382  | 0.0542        |
+| 0.8762 | 383  | 0.053         |
+| 0.8785 | 384  | 0.0436        |
+| 0.8808 | 385  | 0.0426        |
+| 0.8830 | 386  | 0.0503        |
+| 0.8853 | 387  | 0.0522        |
+| 0.8876 | 388  | 0.083         |
+| 0.8899 | 389  | 0.0317        |
+| 0.8922 | 390  | 0.0571        |
+| 0.8945 | 391  | 0.0464        |
+| 0.8968 | 392  | 0.0179        |
+| 0.8991 | 393  | 0.0389        |
+| 0.9013 | 394  | 0.0317        |
+| 0.9036 | 395  | 0.0605        |
+| 0.9059 | 396  | 0.0389        |
+| 0.9082 | 397  | 0.0407        |
+| 0.9105 | 398  | 0.0478        |
+| 0.9128 | 399  | 0.0304        |
+| 0.9151 | 400  | 0.0572        |
+| 0.9174 | 401  | 0.037         |
+| 0.9196 | 402  | 0.062         |
+| 0.9219 | 403  | 0.0539        |
+| 0.9242 | 404  | 0.039         |
+| 0.9265 | 405  | 0.0265        |
+| 0.9288 | 406  | 0.0398        |
+| 0.9311 | 407  | 0.0369        |
+| 0.9334 | 408  | 0.053         |
+| 0.9357 | 409  | 0.0503        |
+| 0.9379 | 410  | 0.0535        |
+| 0.9402 | 411  | 0.0645        |
+| 0.9425 | 412  | 0.0328        |
+| 0.9448 | 413  | 0.0438        |
+| 0.9471 | 414  | 0.0435        |
+| 0.9494 | 415  | 0.1018        |
+| 0.9517 | 416  | 0.0403        |
+| 0.9540 | 417  | 0.0577        |
+| 0.9562 | 418  | 0.0234        |
+| 0.9585 | 419  | 0.041         |
+| 0.9608 | 420  | 0.0226        |
+| 0.9631 | 421  | 0.0497        |
+| 0.9654 | 422  | 0.0493        |
+| 0.9677 | 423  | 0.0223        |
+| 0.9700 | 424  | 0.0192        |
+| 0.9723 | 425  | 0.0322        |
+| 0.9745 | 426  | 0.0483        |
+| 0.9768 | 427  | 0.041         |
+| 0.9791 | 428  | 0.0628        |
+| 0.9814 | 429  | 0.0861        |
+| 0.9837 | 430  | 0.0645        |
+| 0.9860 | 431  | 0.0386        |
+| 0.9883 | 432  | 0.0378        |
+| 0.9906 | 433  | 0.0613        |
+| 0.9929 | 434  | 0.067         |
+| 0.9951 | 435  | 0.049         |
+| 0.9974 | 436  | 0.0644        |
+| 0.9997 | 437  | 0.02          |
+| 1.0    | 438  | 0.0001        |
+</details>
+### Framework Versions
+- Python: 3.12.3
+- Sentence Transformers: 5.3.0
+- Transformers: 4.56.2
+- PyTorch: 2.10.0+cu128
+- Accelerate: 1.13.0
+- Datasets: 4.3.0
+- Tokenizers: 0.22.2
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{oord2019representationlearningcontrastivepredictive,
+      title={Representation Learning with Contrastive Predictive Coding},
+      author={Aaron van den Oord and Yazhe Li and Oriol Vinyals},
+      year={2019},
+      eprint={1807.03748},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/1807.03748},
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,54 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- messages[0]['content'] }}
+    {%- else %}
+        {{- 'You are a helpful assistant.' }}
+    {%- endif %}
+    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content %}
+            {{- '\n' + message.content }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n{"name": "' }}
+            {{- tool_call.name }}
+            {{- '", "arguments": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- '}\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+    "architectures": [
+        "Qwen3ForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "bos_token_id": 151643,
+    "torch_dtype": "bfloat16",
+    "eos_token_id": 151645,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 2560,
+    "initializer_range": 0.02,
+    "intermediate_size": 9728,
+    "layer_types": [
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention"
+    ],
+    "max_position_embeddings": 40960,
+    "max_window_layers": 36,
+    "model_name": "unsloth/Qwen3-Embedding-4B",
+    "model_type": "qwen3",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 36,
+    "num_key_value_heads": 8,
+    "pad_token_id": 151643,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 1000000,
+    "sliding_window": null,
+    "tie_word_embeddings": true,
+    "tokenizer_class": "Qwen2TokenizerFast",
+    "unsloth_version": "2026.3.8",
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 151665
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "model_type": "SentenceTransformer",
+  "__version__": {
+    "sentence_transformers": "5.3.0",
+    "transformers": "4.56.2",
+    "pytorch": "2.10.0+cu128"
+  },
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:592e692c4f1ff6b613f02ea0a77535be028a142e149547834eb8a87c2ddb762d
+size 4965826464

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10bc48f56d7dd975b3feefc30c9e457e56a30405c1ce01d152b1744905d89069
+size 3077765624

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,405 @@

+{
+  "metadata": {
+    "total_size": 8043548672
+  },
+  "weight_map": {
+    "embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.20.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "norm.weight": "model-00002-of-00002.safetensors"
+  }
+}

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 8192,
+    "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17af9bd30dbbda177eda5d8835f90e4277910bedd0011f50077acee58008d28a
+size 11423213

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,209 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff