Files changed (2) hide show
  1. LICENSE +21 -0
  2. README.md +7 -4
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Zhipu AI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -11,9 +11,12 @@ base_model:
11
  - **Output:** Text
12
  - **Supported Hardware Microarchitecture:** AMD MI300/MI350/MI355 (emulation)
13
  - **ROCm:** 7.2.2
 
 
14
  - **Operating System(s):** Linux
15
  - **Inference Engine:** [vLLM](https://docs.vllm.ai/en/latest/)
16
  - **Model Optimizer:** [AMD-Quark](https://quark.docs.amd.com/latest/index.html) (V0.12)
 
17
  - **Weight quantization:** MOE-only, NVFP4, Static
18
  - **Activation quantization:** MOE-only, NVFP4, Dynamic
19
  - **Calibration Dataset:** [Pile](https://huggingface.co/datasets/mit-han-lab/pile-val-backup)
@@ -28,8 +31,8 @@ The model was quantized from [zai-org/GLM-5](https://huggingface.co/zai-org/GLM-
28
  sudo sysctl -w vm.max_map_count=4194304
29
  cd Quark/examples/torch/language_modeling/llm_ptq/
30
  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
31
- export MODEL_DIR=/zai-org/GLM-5
32
- export output_dir=/amd/GLM-5-NVFP4
33
  exclude_layers="*self_attn* *mlp.gate *lm_head *mlp.gate_proj *mlp.up_proj *mlp.down_proj"
34
  python3 quantize_quark.py --model_dir $MODEL_DIR \
35
  --quant_scheme nvfp4 \
@@ -93,7 +96,7 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
93
  export PYTORCH_ALLOC_CONF=expandable_segments:True
94
  lm_eval \
95
  --model vllm \
96
- --model_args pretrained=/amd/GLM-5-NVFP4,tensor_parallel_size=8,max_model_len=4096,gpu_memory_utilization=0.90,enforce_eager=True,max_gen_toks=2048,kv_cache_dtype=bfloat16,trust_remote_code=True \
97
  --tasks gsm8k \
98
  --num_fewshot 5 \
99
  --batch_size auto
@@ -103,4 +106,4 @@ lm_eval \
103
 
104
 
105
  # License
106
- Modifications Copyright(c) 2026 Advanced Micro Devices, Inc. All rights reserved.
 
11
  - **Output:** Text
12
  - **Supported Hardware Microarchitecture:** AMD MI300/MI350/MI355 (emulation)
13
  - **ROCm:** 7.2.2
14
+ - **PyTorch**: 2.10.0
15
+ - **Transformers**: 5.2.0
16
  - **Operating System(s):** Linux
17
  - **Inference Engine:** [vLLM](https://docs.vllm.ai/en/latest/)
18
  - **Model Optimizer:** [AMD-Quark](https://quark.docs.amd.com/latest/index.html) (V0.12)
19
+ - **Quantized layers:** `experts` and `shared_experts`
20
  - **Weight quantization:** MOE-only, NVFP4, Static
21
  - **Activation quantization:** MOE-only, NVFP4, Dynamic
22
  - **Calibration Dataset:** [Pile](https://huggingface.co/datasets/mit-han-lab/pile-val-backup)
 
31
  sudo sysctl -w vm.max_map_count=4194304
32
  cd Quark/examples/torch/language_modeling/llm_ptq/
33
  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
34
+ export MODEL_DIR=zai-org/GLM-5
35
+ export output_dir=amd/GLM-5-NVFP4
36
  exclude_layers="*self_attn* *mlp.gate *lm_head *mlp.gate_proj *mlp.up_proj *mlp.down_proj"
37
  python3 quantize_quark.py --model_dir $MODEL_DIR \
38
  --quant_scheme nvfp4 \
 
96
  export PYTORCH_ALLOC_CONF=expandable_segments:True
97
  lm_eval \
98
  --model vllm \
99
+ --model_args pretrained=amd/GLM-5-NVFP4,tensor_parallel_size=8,max_model_len=4096,gpu_memory_utilization=0.90,enforce_eager=True,max_gen_toks=2048,kv_cache_dtype=bfloat16,trust_remote_code=True \
100
  --tasks gsm8k \
101
  --num_fewshot 5 \
102
  --batch_size auto
 
106
 
107
 
108
  # License
109
+ Modifications Copyright(c) 2026 Advanced Micro Devices, Inc. All rights reserved.