tacodevs commited on Apr 6

Commit

a70ca0d

verified ·

1 Parent(s): f74ada5

Initial upload: FP8 dynamic quantization of Behemoth-X-R1-123B

Browse files

Files changed (36) hide show

README.md +68 -0
chat_template.jinja +1 -0
config.json +76 -0
generation_config.json +6 -0
model-00001-of-00026.safetensors +3 -0
model-00002-of-00026.safetensors +3 -0
model-00003-of-00026.safetensors +3 -0
model-00004-of-00026.safetensors +3 -0
model-00005-of-00026.safetensors +3 -0
model-00006-of-00026.safetensors +3 -0
model-00007-of-00026.safetensors +3 -0
model-00008-of-00026.safetensors +3 -0
model-00009-of-00026.safetensors +3 -0
model-00010-of-00026.safetensors +3 -0
model-00011-of-00026.safetensors +3 -0
model-00012-of-00026.safetensors +3 -0
model-00013-of-00026.safetensors +3 -0
model-00014-of-00026.safetensors +3 -0
model-00015-of-00026.safetensors +3 -0
model-00016-of-00026.safetensors +3 -0
model-00017-of-00026.safetensors +3 -0
model-00018-of-00026.safetensors +3 -0
model-00019-of-00026.safetensors +3 -0
model-00020-of-00026.safetensors +3 -0
model-00021-of-00026.safetensors +3 -0
model-00022-of-00026.safetensors +3 -0
model-00023-of-00026.safetensors +3 -0
model-00024-of-00026.safetensors +3 -0
model-00025-of-00026.safetensors +3 -0
model-00026-of-00026.safetensors +3 -0
model.safetensors.index.json +0 -0
recipe.yaml +7 -0
special_tokens_map.json +23 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,68 @@

+---
+license: other
+license_name: mistral-research-license
+license_link: https://mistral.ai/licenses/MRL-0.1.md
+base_model: tacodevs/Behemoth-X-R1-123B
+base_model_relation: quantized
+tags:
+  - mistral
+  - fp8
+  - w8a8
+  - compressed-tensors
+  - quantized
+  - thinking
+  - roleplay
+  - creative-writing
+language:
+  - en
+pipeline_tag: text-generation
+---
+<div align="center">
+<img src="https://huggingface.co/tacodevs/Behemoth-X-R1-123B/resolve/main/assets/hero.png" alt="Behemoth-X-R1-123B" style="width:100%; max-width:960px; border-radius:16px; box-shadow:0 0 60px rgba(236,72,153,0.35), 0 0 100px rgba(139,92,246,0.25);"/>
+</div>
+<div align="center" style="margin-top:24px;">
+<h1 style="font-size:3em; font-weight:900; background:linear-gradient(90deg,#ec4899 0%,#a855f7 50%,#06b6d4 100%); -webkit-background-clip:text; -webkit-text-fill-color:transparent; background-clip:text; margin:0;">Behemoth-X-R1-123B · FP8</h1>
+<p style="font-size:1.2em; color:#a855f7; font-style:italic;">Single-GPU beast mode.</p>
+<p>
+<img src="https://img.shields.io/badge/quant-FP8_Dynamic-8B5CF6?style=for-the-badge" alt="quant"/>
+<img src="https://img.shields.io/badge/VRAM-~130GB-EC4899?style=for-the-badge" alt="vram"/>
+<img src="https://img.shields.io/badge/runs_on-1x_H200-06B6D4?style=for-the-badge" alt="gpu"/>
+</p>
+</div>
+## About
+FP8 dynamic quantization of [`tacodevs/Behemoth-X-R1-123B`](https://huggingface.co/tacodevs/Behemoth-X-R1-123B). Near-lossless quality, half the weight bytes, fits on a single H200.
+- **Method:** W8A8 dynamic quantization via [llm-compressor](https://github.com/vllm-project/llm-compressor)
+- **Format:** `compressed-tensors`
+- **Size:** ~115 GB
+- **Calibration:** None needed (dynamic scheme)
+## Usage with vLLM
+```bash
+python -m vllm.entrypoints.openai.api_server \
+  --model tacodevs/Behemoth-X-R1-123B-FP8 \
+  --max-model-len 16384 \
+  --gpu-memory-utilization 0.95 \
+  --trust-remote-code
+```
+Fits on **1× H200 141GB** with ~30k context window.
+## See the main model card
+Full documentation, prompt format, prefill examples, credits, and everything else is on the source repo:
+### 👉 [tacodevs/Behemoth-X-R1-123B](https://huggingface.co/tacodevs/Behemoth-X-R1-123B)
+## License
+Inherited from base: **[Mistral Research License](https://mistral.ai/licenses/MRL-0.1.md)** — non-commercial use only.

chat_template.jinja ADDED Viewed

	@@ -0,0 +1 @@

+ {{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}

config.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "dtype": "bfloat16",
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 12288,
+  "initializer_range": 0.02,
+  "intermediate_size": 28672,
+  "max_position_embeddings": 131072,
+  "model_type": "mistral",
+  "num_attention_heads": 96,
+  "num_hidden_layers": 88,
+  "num_key_value_heads": 8,
+  "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "format": "float-quantized",
+        "input_activations": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": true,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": null,
+          "observer_kwargs": {},
+          "scale_dtype": null,
+          "strategy": "token",
+          "symmetric": true,
+          "type": "float",
+          "zp_dtype": null
+        },
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "memoryless_minmax",
+          "observer_kwargs": {},
+          "scale_dtype": null,
+          "strategy": "channel",
+          "symmetric": true,
+          "type": "float",
+          "zp_dtype": null
+        }
+      }
+    },
+    "format": "float-quantized",
+    "global_compression_ratio": null,
+    "ignore": [
+      "lm_head"
+    ],
+    "kv_cache_scheme": null,
+    "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {},
+    "transform_config": {},
+    "version": "0.14.0.1"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.57.6",
+  "use_cache": true,
+  "vocab_size": 32768
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.57.6"
+}

model-00001-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efb74e9841acfd6dfa40df465c2eda9dafef9264452f9a3d6f6e76c5c5769d89
+size 4958398128

model-00002-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b13127acfafaa4d7a09b68c1cced6351c9b959de9047b779c1cee5357eaf077
+size 4832680488

model-00003-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1f6e6092fe279168537976050970b450de7829e338418f70d309434c431d541
+size 4857866304

model-00004-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:512871a736c969f0617b17edf00777a6def462817c9fbb885889834dc5171bcc
+size 4832680552

model-00005-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d0ae510f7740646a80a199779ae3edacfe9cb44bcb6460537dc28f3745e7c16
+size 4857866352

model-00006-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35503883727c6ad094f179422deaff88223eb16d8794308243d95f071eba1955
+size 4832680552

model-00007-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0470a73a624b43ce0493b5225a7f6f6dd1392ff3c3b72c3a47fe3dd1bab992d9
+size 4857866352

model-00008-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11efbb99f403f6e4a2609d5ba1eaff7502a0cd07c202cea5d56cce46e1ddcc50
+size 4832680552

model-00009-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2eaeb07360cb340a23602bda6ae7f75d2335d9fc85abe0d8a14ef5ce79f3eda7
+size 4857866352

model-00010-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4981e6912bd0ab826dd37cc3a7c8b33142e1551e8053a354054acc4ffcea56fc
+size 4832680552

model-00011-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39f433c0f7eec9dd432dfada040c3e1343b72c4cc146418a32988971ef62d874
+size 4857866352

model-00012-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdec8bc16948827f3fc714579a432075741aaae269dce4fe91aa5eb465baa080
+size 4832680552

model-00013-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c74581ed1d11b47962ebdeb65a0b8558077681a98a37a9bc5541f91026453503
+size 4857866352

model-00014-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c634e6c7452cc5be9f31770a22fcde3ae1428cfc5cbf1915bd45980ee903a8c
+size 4832680552

model-00015-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7580792ca2eb2f535217f8b5b37dc0263bb55bdc10edf4c036f35899bc1bc9a
+size 4857866352

model-00016-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:447596bada0662a5adcba445015df866b8b37f242ba7446299ed07baed3bbbf7
+size 4832680552

model-00017-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdbda607587a45ba8285c78d21ccde129108a8b5da56f8093d43d32246d909e7
+size 4857866352

model-00018-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c13cda5c464cd5454887ccb52b2a0f7ecb7ef26d43ac0e17b880734e95bad1a7
+size 4832680552

model-00019-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93f78a12bce76ec9e4d6ed879e77bfce4f8ad894c009776ea60397cfa0c41e07
+size 4857866352

model-00020-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27caf7c0ecdc5f7815359ff9bce3289c0e34df72056eec8cd12b0ee5eefa3e11
+size 4832680552

model-00021-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e7e79cb9d5ac66606833dc3c1de82bb05b6f620e1a777bfe2b85d9ad0ee6976
+size 4857866352

model-00022-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22cd92b88545f9dbda06876f1acd65044ba866ee6b9a7a0b21dd1513e0fdcecf
+size 4832680552

model-00023-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bc6815475a3f53c242a80d6c686190845fc5c3238c16fb15a561f9ed2fc57c0
+size 4857866352

model-00024-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04dba7fd22a277041b189c49a297a3f7c0118ece04acea4ef47c73ac28fff953
+size 4832680552

model-00025-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0104b0cf3298c5c2ff2dedfbea5213ae0e37c907385ce0dea8b05d019b7de092
+size 4857866352

model-00026-of-00026.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:802c6f2de89d64753b4db7835631921745f8a21d154394226a40475c8a097b21
+size 2189695048

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

recipe.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+default_stage:
+  default_modifiers:
+    QuantizationModifier:
+      targets: [Linear]
+      ignore: [lm_head]
+      scheme: FP8_DYNAMIC
+      bypass_divisibility_checks: false

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b968b8dc352f42192367337c78ccc61e1eaddc6d641a579372d4f20694beb7a
+size 587562

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff