Commit ·
5db64b3
1
Parent(s): d6abd97
Patch fix.
Browse files- config.json +0 -1
- configuration_ministu.py +1 -1
config.json
CHANGED
|
@@ -26,7 +26,6 @@
|
|
| 26 |
"fsdp": true,
|
| 27 |
"ddp": false,
|
| 28 |
"mixed_precision": true,
|
| 29 |
-
"torch_dtype": "bfloat16",
|
| 30 |
"use_cpu_offload": false,
|
| 31 |
"sharding_strategy": "full_shard",
|
| 32 |
"state_dict_type": "full",
|
|
|
|
| 26 |
"fsdp": true,
|
| 27 |
"ddp": false,
|
| 28 |
"mixed_precision": true,
|
|
|
|
| 29 |
"use_cpu_offload": false,
|
| 30 |
"sharding_strategy": "full_shard",
|
| 31 |
"state_dict_type": "full",
|
configuration_ministu.py
CHANGED
|
@@ -22,7 +22,7 @@ class MiniSTUConfig(PretrainedConfig):
|
|
| 22 |
use_approx: bool = True,
|
| 23 |
use_attn: bool = True,
|
| 24 |
softcap: float = 50.0,
|
| 25 |
-
torch_dtype
|
| 26 |
device: str = None,
|
| 27 |
**kwargs,
|
| 28 |
):
|
|
|
|
| 22 |
use_approx: bool = True,
|
| 23 |
use_attn: bool = True,
|
| 24 |
softcap: float = 50.0,
|
| 25 |
+
torch_dtype = torch.bfloat16,
|
| 26 |
device: str = None,
|
| 27 |
**kwargs,
|
| 28 |
):
|