Upload 6 files

Files changed (6) hide show

README.md ADDED Viewed

+---
+base_model:
+- Qwen/Qwen2.5-1.5B-Instruct
+- anirvankrishna/model_sft_lora_fused
+library_name: transformers
+tags:
+- mergekit
+- merge
+---
+# temp_dare_merge
+This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
+## Merge Details
+### Merge Method
+This model was merged using the [DARE TIES](https://arxiv.org/abs/2311.03099) merge method using [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) as a base.
+### Models Merged
+The following models were included in the merge:
+* [anirvankrishna/model_sft_lora_fused](https://huggingface.co/anirvankrishna/model_sft_lora_fused)
+### Configuration
+The following YAML configuration was used to produce this model:
+```yaml
+base_model: Qwen/Qwen2.5-1.5B-Instruct
+chat_template: chatml
+dtype: bfloat16
+merge_method: dare_ties
+modules:
+  default:
+    slices:
+    - sources:
+      - layer_range: [0, 28]
+        model: Qwen/Qwen2.5-1.5B-Instruct
+      - layer_range: [0, 28]
+        model: anirvankrishna/model_sft_lora_fused
+        parameters:
+          density: 0.30000000000000004
+          weight: 1.0
+```

chat_template.jinja ADDED Viewed


1	+ {% for message in messages %}{{'<\|im_start\|>' + message['role'] + '\n' + message['content'] + '<\|im_end\|>' + '\n'}}{% endfor %}
2	+ {% if add_generation_prompt %}{{ '<\|im_start\|>assistant\n' }}{% endif %}

config.json ADDED Viewed

+{
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "pad_token_id": null,
+  "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "rope_theta": 1000000.0,
+    "rope_type": "default"
+  },
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.0.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

mergekit_config.yml ADDED Viewed

+base_model: Qwen/Qwen2.5-1.5B-Instruct
+chat_template: chatml
+dtype: bfloat16
+merge_method: dare_ties
+modules:
+  default:
+    slices:
+    - sources:
+      - layer_range: [0, 28]
+        model: Qwen/Qwen2.5-1.5B-Instruct
+      - layer_range: [0, 28]
+        model: anirvankrishna/model_sft_lora_fused
+        parameters:
+          density: 0.30000000000000004
+          weight: 1.0

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d9e8b8b40f7ca9d3797040e13f6d7bd2a9ccdc0f578f635bc2180a5bc160d29
+size 3087467144

tokenizer_config.json ADDED Viewed

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}