Buckets:

rtrm's picture
|
download
raw
11.1 kB

AdEMAMix

AdEMAMix is a variant of the Adam optimizer.

bitsandbytes also supports paged optimizers which take advantage of CUDAs unified memory to transfer memory from the GPU to the CPU when GPU memory is exhausted.

AdEMAMix[[api-class]][[bitsandbytes.optim.AdEMAMix]]

class bitsandbytes.optim.AdEMAMixbitsandbytes.optim.AdEMAMixhttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L107[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "optim_bits", "val": ": typing.Literal[8, 32] = 32"}, {"name": "min_8bit_size", "val": ": int = 4096"}, {"name": "is_paged", "val": ": bool = False"}]

initbitsandbytes.optim.AdEMAMix.inithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L108[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "optim_bits", "val": ": typing.Literal[8, 32] = 32"}, {"name": "min_8bit_size", "val": ": int = 4096"}, {"name": "is_paged", "val": ": bool = False"}]

AdEMAMix8bit[[bitsandbytes.optim.AdEMAMix8bit]]

class bitsandbytes.optim.AdEMAMix8bitbitsandbytes.optim.AdEMAMix8bithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L274[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "min_8bit_size", "val": ": int = 4096"}, {"name": "is_paged", "val": ": bool = False"}]

initbitsandbytes.optim.AdEMAMix8bit.inithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L275[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "min_8bit_size", "val": ": int = 4096"}, {"name": "is_paged", "val": ": bool = False"}]

AdEMAMix32bit[[bitsandbytes.optim.AdEMAMix32bit]]

class bitsandbytes.optim.AdEMAMix32bitbitsandbytes.optim.AdEMAMix32bithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L359[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "min_8bit_size", "val": ": int = 4096"}, {"name": "is_paged", "val": ": bool = False"}]

initbitsandbytes.optim.AdEMAMix32bit.inithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L360[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "min_8bit_size", "val": ": int = 4096"}, {"name": "is_paged", "val": ": bool = False"}]

PagedAdEMAMix[[bitsandbytes.optim.PagedAdEMAMix]]

class bitsandbytes.optim.PagedAdEMAMixbitsandbytes.optim.PagedAdEMAMixhttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L330[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "optim_bits", "val": ": typing.Literal[8, 32] = 32"}, {"name": "min_8bit_size", "val": ": int = 4096"}]

initbitsandbytes.optim.PagedAdEMAMix.inithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L331[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "optim_bits", "val": ": typing.Literal[8, 32] = 32"}, {"name": "min_8bit_size", "val": ": int = 4096"}]

PagedAdEMAMix8bit[[bitsandbytes.optim.PagedAdEMAMix8bit]]

class bitsandbytes.optim.PagedAdEMAMix8bitbitsandbytes.optim.PagedAdEMAMix8bithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L303[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "min_8bit_size", "val": ": int = 4096"}]

initbitsandbytes.optim.PagedAdEMAMix8bit.inithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L304[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "min_8bit_size", "val": ": int = 4096"}]

PagedAdEMAMix32bit[[bitsandbytes.optim.PagedAdEMAMix32bit]]

class bitsandbytes.optim.PagedAdEMAMix32bitbitsandbytes.optim.PagedAdEMAMix32bithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L392[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "min_8bit_size", "val": ": int = 4096"}]

initbitsandbytes.optim.PagedAdEMAMix32bit.inithttps://github.com/bitsandbytes-foundation/bitsandbytes/blob/v0.48.2/bitsandbytes/optim/ademamix.py#L393[{"name": "params", "val": ": Iterable"}, {"name": "lr", "val": ": float = 0.001"}, {"name": "betas", "val": ": tuple = (0.9, 0.999, 0.9999)"}, {"name": "alpha", "val": ": float = 5.0"}, {"name": "t_alpha", "val": ": typing.Optional[int] = None"}, {"name": "t_beta3", "val": ": typing.Optional[int] = None"}, {"name": "eps", "val": ": float = 1e-08"}, {"name": "weight_decay", "val": ": float = 0.01"}, {"name": "min_8bit_size", "val": ": int = 4096"}]

Xet Storage Details

Size:
11.1 kB
·
Xet hash:
723fe9327b9d9edd06d2dc53a977978f61555bd2b08009d3833546a3173c56db

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.