Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

.gitattributes +1 -0
README.md +118 -0
added_tokens.json +24 -0
config.json +40 -0
generation_config.json +6 -0
merges.txt +0 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
quantization_config.json +0 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +1 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,118 @@

+---
+language: en
+tags:
+  - exllamav3
+  - quantized
+  - 5-bit
+  - reasoning
+  - coding
+  - qwen2
+library_name: exllamav3
+base_model: andrewzh/Absolute_Zero_Reasoner-Coder-14b
+base_model_relation: quantized
+---
+# Absolute_Zero_Reasoner-Coder-14b-5.0bpw-exl3
+This is a 5-bit quantized version of [andrewzh/Absolute_Zero_Reasoner-Coder-14b](https://huggingface.co/andrewzh/Absolute_Zero_Reasoner-Coder-14b) using [ExLlamaV3](https://github.com/turboderp-org/exllamav3) v0.0.2.
+## Model Description
+This model is a quantized version of Absolute_Zero_Reasoner-Coder-14b, which is based on the Qwen2-Coder-14B architecture. The original model is designed for reasoning and coding tasks. For more details about the original model, please refer to the paper: [https://huggingface.co/papers/2505.03335](https://huggingface.co/papers/2505.03335).
+The quantization reduces the model size and memory requirements while attempting to preserve as much of the original performance as possible.
+## Quantization Methodology
+The model was quantized using ExLlamaV3 v0.0.2 with the following parameters:
+- **Quantization Method**: exl3 (ExLlamaV3)
+- **Bits**: 5.0 (5-bit quantization)
+- **Head Bits**: 6 (6-bit precision for attention heads)
+- **Calibration**:
+  - Rows: 100
+  - Columns: 2048
+- **Out Scales**: auto
+This quantization approach uses a more sophisticated method than simple linear quantization, allowing for better preservation of model quality at lower bit depths.
+## Model Architecture
+The model is based on the Qwen2 architecture with the following specifications:
+- **Hidden Size**: 5120
+- **Intermediate Size**: 13824
+- **Number of Attention Heads**: 40
+- **Number of Key-Value Heads**: 8
+- **Number of Hidden Layers**: 48
+- **Maximum Sequence Length**: 32768
+- **Vocabulary Size**: 152064
+## How to Use
+To use this quantized model with ExLlamaV3, you'll need to install the ExLlamaV3 library:
+```bash
+pip install exllamav3
+```
+Here's a basic example of how to use the model:
+```python
+from exllamav3 import ExLlamaV3, ExLlamaV3Config
+from exllamav3.tokenizer import ExLlamaV3Tokenizer
+# Set up model path
+model_path = "path/to/Absolute_Zero_Reasoner-Coder-14b-5.0bpw-exl3"
+# Load config and model
+config = ExLlamaV3Config()
+config.model_dir = model_path
+config.prepare()
+model = ExLlamaV3(config)
+model.load()
+# Load tokenizer
+tokenizer = ExLlamaV3Tokenizer(config)
+# Generate text
+prompt = "Write a function to calculate the Fibonacci sequence in Python:"
+input_ids = tokenizer.encode(prompt)
+output = model.generate(
+    input_ids=input_ids,
+    max_new_tokens=200,
+    temperature=0.6,
+    top_p=0.9
+)
+print(tokenizer.decode(output))
+```
+## Limitations
+This quantized model has the following limitations:
+1. **Reduced Precision**: The 5-bit quantization may lead to some degradation in performance compared to the original model, particularly for complex reasoning tasks.
+2. **ExLlamaV3 Dependency**: This model can only be used with the ExLlamaV3 library and is not compatible with standard Hugging Face Transformers without conversion.
+3. **Inherited Limitations**: All limitations of the original model apply to this quantized version as well.
+## Citation
+If you use this model in your research, please cite the original paper:
+```
+@misc{absolute_zero_reasoner_coder,
+  author = {Andrew Zhang},
+  title = {Absolute Zero Reasoner-Coder},
+  year = {2024},
+  howpublished = {\url{https://huggingface.co/papers/2505.03335}}
+}
+```
+## Acknowledgements
+- Original model: [andrewzh/Absolute_Zero_Reasoner-Coder-14b](https://huggingface.co/andrewzh/Absolute_Zero_Reasoner-Coder-14b)
+- Quantization library: [ExLlamaV3](https://github.com/turboderp-org/exllamav3)

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+    "_name_or_path": "/home/fit/huangg/WORK/zqc/reason_rl/checkpoints/code_io/code_io/code_io_full_v0_coder14b/test_answer/Qwen2.5-Coder-14B/answer_conditional/global_step_300/actor/huggingface",
+    "architectures": [
+        "Qwen2ForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "eos_token_id": 151643,
+    "hidden_act": "silu",
+    "hidden_size": 5120,
+    "initializer_range": 0.02,
+    "intermediate_size": 13824,
+    "max_position_embeddings": 32768,
+    "max_window_layers": 48,
+    "model_type": "qwen2",
+    "num_attention_heads": 40,
+    "num_hidden_layers": 48,
+    "num_key_value_heads": 8,
+    "pad_token_id": 151643,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 1000000.0,
+    "sliding_window": null,
+    "tie_word_embeddings": false,
+    "torch_dtype": "float32",
+    "transformers_version": "4.47.1",
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 152064,
+    "quantization_config": {
+        "quant_method": "exl3",
+        "version": "0.0.2",
+        "bits": 5.0,
+        "head_bits": 6,
+        "calibration": {
+            "rows": 100,
+            "cols": 2048
+        },
+        "out_scales": "auto"
+    }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "eos_token_id": 151643,
+  "pad_token_id": 151643,
+  "transformers_version": "4.47.1"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56f0e5b4eb105989d0ae5d116ddcabd48a807e84d67fc9a05c515d7687850193
+size 8447091368

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1abb6a023687b0dd1166d319f45c9050169fcbb538c2af555603b00b299a5dca
+size 1962240872

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

quantization_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"add_bos_token": false, "add_prefix_space": false, "added_tokens_decoder": {"151643": {"content": "<|endoftext|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151644": {"content": "<|im_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151645": {"content": "<|im_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151646": {"content": "<|object_ref_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151647": {"content": "<|object_ref_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151648": {"content": "<|box_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151649": {"content": "<|box_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151650": {"content": "<|quad_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151651": {"content": "<|quad_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151652": {"content": "<|vision_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151653": {"content": "<|vision_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151654": {"content": "<|vision_pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151655": {"content": "<|image_pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151656": {"content": "<|video_pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "151657": {"content": "<tool_call>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false}, "151658": {"content": "</tool_call>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false}, "151659": {"content": "<|fim_prefix|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false}, "151660": {"content": "<|fim_middle|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false}, "151661": {"content": "<|fim_suffix|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false}, "151662": {"content": "<|fim_pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false}, "151663": {"content": "<|repo_name|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false}, "151664": {"content": "<|file_sep|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false}}, "additional_special_tokens": ["<|im_start|>", "<|im_end|>", "<|object_ref_start|>", "<|object_ref_end|>", "<|box_start|>", "<|box_end|>", "<|quad_start|>", "<|quad_end|>", "<|vision_start|>", "<|vision_end|>", "<|vision_pad|>", "<|image_pad|>", "<|video_pad|>"], "bos_token": null, "chat_template": "{%- for message in messages -%}{{- '\n' if not loop.first -}}{{- message['content'] -}}{%- endfor -%}", "clean_up_tokenization_spaces": false, "eos_token": "<|endoftext|>", "errors": "replace", "extra_special_tokens": {}, "model_max_length": 32768, "pad_token": "<|endoftext|>", "split_special_tokens": false, "tokenizer_class": "Qwen2Tokenizer", "unk_token": null}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff