YAML Metadata Warning:empty or missing yaml metadata in repo card
Check out the documentation for more information.
Llama-3.2-1B-Instruct quantized to fp8 using AutoFP8.
This model has tensorwise fp8 scale factors.
AutoFP8 is old, I had to apply the following patch to run:
diff --git a/auto_fp8/modeling.py b/auto_fp8/modeling.py
index 04a9e71..9fdacda 100644
--- a/auto_fp8/modeling.py
+++ b/auto_fp8/modeling.py
@@ -58,10 +58,10 @@ class AutoFP8ForCausalLM:
# Parameters related to loading from Hugging Face Hub
cache_dir = model_init_kwargs.pop("cache_dir", None)
force_download = model_init_kwargs.pop("force_download", False)
- resume_download = model_init_kwargs.pop("resume_download", False)
+ #resume_download = model_init_kwargs.pop("resume_download", False)
proxies = model_init_kwargs.pop("proxies", None)
local_files_only = model_init_kwargs.pop("local_files_only", False)
- use_auth_token = model_init_kwargs.pop("use_auth_token", None)
+ #use_auth_token = model_init_kwargs.pop("use_auth_token", None)
revision = model_init_kwargs.pop("revision", None)
subfolder = model_init_kwargs.pop("subfolder", "")
commit_hash = model_init_kwargs.pop("_commit_hash", None)
@@ -70,9 +70,9 @@ class AutoFP8ForCausalLM:
"cache_dir": cache_dir,
"force_download": force_download,
"proxies": proxies,
- "resume_download": resume_download,
+ #"resume_download": resume_download,
"local_files_only": local_files_only,
- "use_auth_token": use_auth_token,
+ #"use_auth_token": use_auth_token,
"revision": revision,
"subfolder": subfolder,
"_commit_hash": commit_hash,
The tokenizer_class has also been manually updated to use PreTrainedTokenizerFast
- Downloads last month
- 288
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support