lainlives commited on
Commit
30dc752
·
verified ·
1 Parent(s): 1335b9d

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - unsloth/codegemma-7b-it
4
+ language:
5
+ - en
6
+ library_name: transformers
7
+ license: apache-2.0
8
+ tags:
9
+ - bnb-my-repo
10
+ - unsloth
11
+ - transformers
12
+ - gemma
13
+ - bnb
14
+ ---
15
+ # unsloth/codegemma-7b-it (Quantized)
16
+
17
+ ## Description
18
+ This model is a quantized version of the original model [`unsloth/codegemma-7b-it`](https://huggingface.co/unsloth/codegemma-7b-it).
19
+
20
+ ## Quantization Details
21
+ - **Quantization Type**: int4
22
+ - **bnb_4bit_quant_type**: nf4
23
+ - **bnb_4bit_use_double_quant**: True
24
+ - **bnb_4bit_compute_dtype**: bfloat16
25
+ - **bnb_4bit_quant_storage**: uint8
26
+
27
+
28
+
29
+ # 📄 Original Model Information
30
+
31
+
32
+
33
+ # Finetune Mistral, Gemma, Llama 2-5x faster with 70% less memory via Unsloth!
34
+
35
+ We have a Google Colab Tesla T4 notebook for CodeGemma 7b here: https://colab.research.google.com/drive/19lwcRk_ZQ_ZtX-qzFP3qZBBHZNcMD1hh?usp=sharing
36
+
37
+ [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/Discord%20button.png" width="200"/>](https://discord.gg/u54VK8m8tk)
38
+ [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/buy%20me%20a%20coffee%20button.png" width="200"/>](https://ko-fi.com/unsloth)
39
+ [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
40
+
41
+ ## ✨ Finetune for Free
42
+
43
+ All notebooks are **beginner friendly**! Add your dataset, click "Run All", and you'll get a 2x faster finetuned model which can be exported to GGUF, vLLM or uploaded to Hugging Face.
44
+
45
+ | Unsloth supports | Free Notebooks | Performance | Memory use |
46
+ |-----------------|--------------------------------------------------------------------------------------------------------------------------|-------------|----------|
47
+ | **Gemma 7b** | [▶️ Start on Colab](https://colab.research.google.com/drive/10NbwlsRChbma1v55m8LAPYG15uQv6HLo?usp=sharing) | 2.4x faster | 58% less |
48
+ | **Mistral 7b** | [▶️ Start on Colab](https://colab.research.google.com/drive/1Dyauq4kTZoLewQ1cApceUQVNcnnNTzg_?usp=sharing) | 2.2x faster | 62% less |
49
+ | **Llama-2 7b** | [▶️ Start on Colab](https://colab.research.google.com/drive/1lBzz5KeZJKXjvivbYvmGarix9Ao6Wxe5?usp=sharing) | 2.2x faster | 43% less |
50
+ | **TinyLlama** | [▶️ Start on Colab](https://colab.research.google.com/drive/1AZghoNBQaMDgWJpi4RbffGM1h6raLUj9?usp=sharing) | 3.9x faster | 74% less |
51
+ | **CodeLlama 34b** A100 | [▶️ Start on Colab](https://colab.research.google.com/drive/1y7A0AxE3y8gdj4AVkl2aZX47Xu3P1wJT?usp=sharing) | 1.9x faster | 27% less |
52
+ | **Mistral 7b** 1xT4 | [▶️ Start on Kaggle](https://www.kaggle.com/code/danielhanchen/kaggle-mistral-7b-unsloth-notebook) | 5x faster\* | 62% less |
53
+ | **DPO - Zephyr** | [▶️ Start on Colab](https://colab.research.google.com/drive/15vttTpzzVXv_tJwEk-hIcQ0S9FcEWvwP?usp=sharing) | 1.9x faster | 19% less |
54
+
55
+ - This [conversational notebook](https://colab.research.google.com/drive/1Aau3lgPzeZKQ-98h69CCu1UJcvIBLmy2?usp=sharing) is useful for ShareGPT ChatML / Vicuna templates.
56
+ - This [text completion notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing) is for raw text. This [DPO notebook](https://colab.research.google.com/drive/15vttTpzzVXv_tJwEk-hIcQ0S9FcEWvwP?usp=sharing) replicates Zephyr.
57
+ - \* Kaggle has 2x T4s, but we use 1. Due to overhead, 1x T4 is 5x faster.
chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
2
+ ' + message['content'] | trim + '<end_of_turn>
3
+ ' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
4
+ '}}{% endif %}
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "GemmaModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 2,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 1,
10
+ "head_dim": 256,
11
+ "hidden_act": "gelu_pytorch_tanh",
12
+ "hidden_activation": "gelu_pytorch_tanh",
13
+ "hidden_size": 3072,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 24576,
16
+ "max_position_embeddings": 8192,
17
+ "model_type": "gemma",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 28,
20
+ "num_key_value_heads": 16,
21
+ "pad_token_id": 0,
22
+ "quantization_config": {
23
+ "_load_in_4bit": true,
24
+ "_load_in_8bit": false,
25
+ "bnb_4bit_compute_dtype": "bfloat16",
26
+ "bnb_4bit_quant_storage": "uint8",
27
+ "bnb_4bit_quant_type": "nf4",
28
+ "bnb_4bit_use_double_quant": true,
29
+ "llm_int8_enable_fp32_cpu_offload": false,
30
+ "llm_int8_has_fp16_weight": false,
31
+ "llm_int8_skip_modules": null,
32
+ "llm_int8_threshold": 6.0,
33
+ "load_in_4bit": true,
34
+ "load_in_8bit": false,
35
+ "quant_method": "bitsandbytes"
36
+ },
37
+ "rms_norm_eps": 1e-06,
38
+ "rope_parameters": {
39
+ "rope_theta": 10000.0,
40
+ "rope_type": "default"
41
+ },
42
+ "tie_word_embeddings": true,
43
+ "transformers_version": "5.3.0.dev0",
44
+ "unsloth_version": "2024.9",
45
+ "use_bidirectional_attention": null,
46
+ "use_cache": true,
47
+ "vocab_size": 256000
48
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc47c52667ff3390e0310e71cc0950f587816279c7d10279127595bd50bf7980
3
+ size 5572141163
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83d1a3e50bab7af7cffdab543ccf7ae59e629c8481a5f58dbcd4073c7d65d74b
3
+ size 34362914
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<bos>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<eos>",
6
+ "is_local": false,
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "<pad>",
10
+ "padding_side": "left",
11
+ "sp_model_kwargs": {},
12
+ "spaces_between_special_tokens": false,
13
+ "tokenizer_class": "GemmaTokenizer",
14
+ "unk_token": "<unk>",
15
+ "use_default_system_prompt": false
16
+ }