Update README.md
Browse files
README.md
CHANGED
|
@@ -122,7 +122,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 122 |
"daslab-testing/CloverLM",
|
| 123 |
trust_remote_code=True,
|
| 124 |
dtype="bfloat16",
|
| 125 |
-
quartet_2_impl="
|
| 126 |
).to("cuda") # for GPU usage or "cpu" for CPU usage
|
| 127 |
|
| 128 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
@@ -164,7 +164,7 @@ Attention backend options: `pytorch` (default), `flash2`, `flash3`, `flash4`.
|
|
| 164 |
- PyTorch 2.10+ with CUDA 13.0
|
| 165 |
- `transformers ≥ 5.3.0`
|
| 166 |
- `tokenmonster ≥ 1.1.12`
|
| 167 |
-
- [Quartet II kernels](https://github.com/IST-DASLab/Quartet-II)
|
| 168 |
|
| 169 |
## Architecture Details
|
| 170 |
|
|
@@ -190,8 +190,8 @@ The model uses 264 weight tensors totaling ~4.14 B parameters.
|
|
| 190 |
@article{cloverlm2026,
|
| 191 |
title = {Speedrunning GPT3: Pretraining an OPT-175B-Quality Model Cheaply
|
| 192 |
by Leveraging Native NVFP4},
|
| 193 |
-
author = {Erik Schultheis and
|
| 194 |
-
|
| 195 |
year = {2026},
|
| 196 |
}
|
| 197 |
```
|
|
|
|
| 122 |
"daslab-testing/CloverLM",
|
| 123 |
trust_remote_code=True,
|
| 124 |
dtype="bfloat16",
|
| 125 |
+
quartet_2_impl="pseudoquant", # on non-Blackwell GPUs or "quartet2" for native NVFP4 kernel
|
| 126 |
).to("cuda") # for GPU usage or "cpu" for CPU usage
|
| 127 |
|
| 128 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
| 164 |
- PyTorch 2.10+ with CUDA 13.0
|
| 165 |
- `transformers ≥ 5.3.0`
|
| 166 |
- `tokenmonster ≥ 1.1.12`
|
| 167 |
+
- [Quartet II kernels](https://github.com/IST-DASLab/Quartet-II)
|
| 168 |
|
| 169 |
## Architecture Details
|
| 170 |
|
|
|
|
| 190 |
@article{cloverlm2026,
|
| 191 |
title = {Speedrunning GPT3: Pretraining an OPT-175B-Quality Model Cheaply
|
| 192 |
by Leveraging Native NVFP4},
|
| 193 |
+
author = {Erik Schultheis and Georgios Vlassis and Matin Ansaripour and
|
| 194 |
+
Andrei Panferov and Dan Alistarh},
|
| 195 |
year = {2026},
|
| 196 |
}
|
| 197 |
```
|