mansaripo commited on
Commit
afd6f57
·
verified ·
1 Parent(s): ac62373

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -5
README.md CHANGED
@@ -122,7 +122,7 @@ model = AutoModelForCausalLM.from_pretrained(
122
  "daslab-testing/CloverLM",
123
  trust_remote_code=True,
124
  dtype="bfloat16",
125
- quartet_2_impl="quartet2", # native NVFP4 kernel or "pseudoquant" on non-Blackwell GPUs
126
  ).to("cuda") # for GPU usage or "cpu" for CPU usage
127
 
128
  tokenizer = AutoTokenizer.from_pretrained(
@@ -134,6 +134,7 @@ input_ids = tokenizer("The capital of France is", return_tensors="pt").input_ids
134
  output = model.generate(input_ids.to(model.device), max_new_tokens=32)
135
  print(tokenizer.decode(output[0]))
136
  ```
 
137
 
138
  ### Running Evaluations
139
 
@@ -164,7 +165,7 @@ Attention backend options: `pytorch` (default), `flash2`, `flash3`, `flash4`.
164
  - PyTorch 2.10+ with CUDA 13.0
165
  - `transformers ≥ 5.3.0`
166
  - `tokenmonster ≥ 1.1.12`
167
- - [Quartet II kernels](https://github.com/IST-DASLab/Quartet-II) (for native FP4; `pseudoquant` mode works without them)
168
 
169
  ## Architecture Details
170
 
@@ -190,8 +191,8 @@ The model uses 264 weight tensors totaling ~4.14 B parameters.
190
  @article{cloverlm2026,
191
  title = {Speedrunning GPT3: Pretraining an OPT-175B-Quality Model Cheaply
192
  by Leveraging Native NVFP4},
193
- author = {Erik Schultheis and Matin Ansaripour and Andrei Panferov and
194
- Georgios Vlassis and Dan Alistarh},
195
  year = {2026},
196
  }
197
- ```
 
122
  "daslab-testing/CloverLM",
123
  trust_remote_code=True,
124
  dtype="bfloat16",
125
+ quartet_2_impl="pseudoquant", # on non-Blackwell GPUs or "quartet2" for native NVFP4 kernel
126
  ).to("cuda") # for GPU usage or "cpu" for CPU usage
127
 
128
  tokenizer = AutoTokenizer.from_pretrained(
 
134
  output = model.generate(input_ids.to(model.device), max_new_tokens=32)
135
  print(tokenizer.decode(output[0]))
136
  ```
137
+ Note that `quartet_2_impl="quartet2"` only supports inputs with `(micro_batch_size * seq_length) % 128 == 0`.
138
 
139
  ### Running Evaluations
140
 
 
165
  - PyTorch 2.10+ with CUDA 13.0
166
  - `transformers ≥ 5.3.0`
167
  - `tokenmonster ≥ 1.1.12`
168
+ - [Quartet II kernels](https://github.com/IST-DASLab/Quartet-II)
169
 
170
  ## Architecture Details
171
 
 
191
  @article{cloverlm2026,
192
  title = {Speedrunning GPT3: Pretraining an OPT-175B-Quality Model Cheaply
193
  by Leveraging Native NVFP4},
194
+ author = {Erik Schultheis and Georgios Vlassis and Matin Ansaripour and
195
+ Andrei Panferov and Dan Alistarh},
196
  year = {2026},
197
  }
198
+ ```