psynote123 commited on
Commit
512099f
·
verified ·
1 Parent(s): e894e37

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -57,7 +57,7 @@ model = AutoModelForCausalLM.from_pretrained(
57
  token=hf_token,
58
  torch_dtype=torch.bfloat16,
59
  attn_implementation="sdpa",
60
- mode='s'
61
  ).to(device)
62
  model.generation_config.pad_token_id = tokenizer.eos_token_id
63
 
@@ -153,7 +153,7 @@ __100 input/300 output; tok/s:__
153
  | GPU/Model | S | M | L | XL | Original | W8A8, int8 |
154
  |-----------|-----|---|---|----|----------|------------|
155
  | H100 | 194 | 191 | 161 | 131 | 58 | 198 | - |
156
- | L40S | -1 | -1 | -1 | -1 | -1 | -1 | - |
157
 
158
 
159
 
 
57
  token=hf_token,
58
  torch_dtype=torch.bfloat16,
59
  attn_implementation="sdpa",
60
+ mode='S'
61
  ).to(device)
62
  model.generation_config.pad_token_id = tokenizer.eos_token_id
63
 
 
153
  | GPU/Model | S | M | L | XL | Original | W8A8, int8 |
154
  |-----------|-----|---|---|----|----------|------------|
155
  | H100 | 194 | 191 | 161 | 131 | 58 | 198 | - |
156
+ | L40S | 72 | 70 | 56 | 44 | 40 | 74 | - |
157
 
158
 
159