Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
|
@@ -57,7 +57,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 57 |
token=hf_token,
|
| 58 |
torch_dtype=torch.bfloat16,
|
| 59 |
attn_implementation="sdpa",
|
| 60 |
-
mode='
|
| 61 |
).to(device)
|
| 62 |
model.generation_config.pad_token_id = tokenizer.eos_token_id
|
| 63 |
|
|
@@ -153,7 +153,7 @@ __100 input/300 output; tok/s:__
|
|
| 153 |
| GPU/Model | S | M | L | XL | Original | W8A8, int8 |
|
| 154 |
|-----------|-----|---|---|----|----------|------------|
|
| 155 |
| H100 | 194 | 191 | 161 | 131 | 58 | 198 | - |
|
| 156 |
-
| L40S |
|
| 157 |
|
| 158 |
|
| 159 |
|
|
|
|
| 57 |
token=hf_token,
|
| 58 |
torch_dtype=torch.bfloat16,
|
| 59 |
attn_implementation="sdpa",
|
| 60 |
+
mode='S'
|
| 61 |
).to(device)
|
| 62 |
model.generation_config.pad_token_id = tokenizer.eos_token_id
|
| 63 |
|
|
|
|
| 153 |
| GPU/Model | S | M | L | XL | Original | W8A8, int8 |
|
| 154 |
|-----------|-----|---|---|----|----------|------------|
|
| 155 |
| H100 | 194 | 191 | 161 | 131 | 58 | 198 | - |
|
| 156 |
+
| L40S | 72 | 70 | 56 | 44 | 40 | 74 | - |
|
| 157 |
|
| 158 |
|
| 159 |
|