Spaces:
Runtime error
Runtime error
Rúben Almeida
commited on
Commit
·
edebf90
1
Parent(s):
0735f93
Exception handling for non-supported AWQ quantization not in the correct place
Browse files- main.py +5 -3
- tests/.env.example +3 -1
- tests/test_awq.py +46 -0
- tests/test_convertion.py +0 -31
- tests/test_gguf.py +0 -0
- tests/test_gptq.py +0 -0
main.py
CHANGED
|
@@ -65,14 +65,16 @@ def redirect_to_docs():
|
|
| 65 |
### FastAPI Endpoints
|
| 66 |
@app.post("/convert_awq", response_model=None)
|
| 67 |
def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
|
| 68 |
-
model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
|
| 69 |
-
tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
|
| 70 |
|
| 71 |
try:
|
| 72 |
-
model.
|
| 73 |
except TypeError as e:
|
| 74 |
raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
if request.hf_push_repo:
|
| 77 |
model.save_quantized(request.hf_push_repo)
|
| 78 |
tokenizer.save_pretrained(request.hf_push_repo)
|
|
|
|
| 65 |
### FastAPI Endpoints
|
| 66 |
@app.post("/convert_awq", response_model=None)
|
| 67 |
def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
|
|
|
|
|
|
|
| 68 |
|
| 69 |
try:
|
| 70 |
+
model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
|
| 71 |
except TypeError as e:
|
| 72 |
raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
|
| 73 |
|
| 74 |
+
tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
|
| 75 |
+
|
| 76 |
+
model.quantize(tokenizer, quant_config=request.quantization_config.model_dump())
|
| 77 |
+
|
| 78 |
if request.hf_push_repo:
|
| 79 |
model.save_quantized(request.hf_push_repo)
|
| 80 |
tokenizer.save_pretrained(request.hf_push_repo)
|
tests/.env.example
CHANGED
|
@@ -1,2 +1,4 @@
|
|
| 1 |
ENDPOINT=
|
| 2 |
-
HF_TOKEN=
|
|
|
|
|
|
|
|
|
| 1 |
ENDPOINT=
|
| 2 |
+
HF_TOKEN=
|
| 3 |
+
HF_PUSH_REPO=
|
| 4 |
+
HF_ORGANIZATION=
|
tests/test_awq.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
import requests
|
| 3 |
+
from environs import Env
|
| 4 |
+
from huggingface_hub import login
|
| 5 |
+
|
| 6 |
+
env = Env()
|
| 7 |
+
env.read_env(override=True)
|
| 8 |
+
|
| 9 |
+
def test_incompatible_model():
|
| 10 |
+
with pytest.raises(requests.exceptions.HTTPError):
|
| 11 |
+
response = requests.post(
|
| 12 |
+
f"{env.str('ENDPOINT')}/convert_awq",
|
| 13 |
+
json={
|
| 14 |
+
"hf_model_name": "gpt2",
|
| 15 |
+
"hf_tokenizer_name": "gpt2",
|
| 16 |
+
"hf_push_repo": None,
|
| 17 |
+
}
|
| 18 |
+
)
|
| 19 |
+
assert response.status_code == 400
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def test_convert_download():
|
| 23 |
+
response = requests.post(
|
| 24 |
+
f"{env.str('ENDPOINT')}/convert_awq",
|
| 25 |
+
json={
|
| 26 |
+
"hf_model_name": "Qwen/Qwen2.5-14B-Instruct",
|
| 27 |
+
}
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
response.raise_for_status()
|
| 31 |
+
|
| 32 |
+
assert response.content_type == 'application/zip'
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def test_convert_push():
|
| 36 |
+
model_name = "Qwen/Qwen2.5-14B-Instruct"
|
| 37 |
+
|
| 38 |
+
response = requests.post(
|
| 39 |
+
f"{env.str('ENDPOINT')}/convert_awq",
|
| 40 |
+
json={
|
| 41 |
+
"hf_model_name": "Qwen/Qwen2.5-14B-Instruct",
|
| 42 |
+
"hf_push_repo": env.str("HF_PUSH_REPO") or f"{env.str('HF_ORGANIZATION')}/{model_name.split('/')[-1]}-AWQ",
|
| 43 |
+
}
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
response.raise_for_status()
|
tests/test_convertion.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
import pytest
|
| 2 |
-
import requests
|
| 3 |
-
from environs import Env
|
| 4 |
-
from huggingface_hub import login
|
| 5 |
-
|
| 6 |
-
env = Env()
|
| 7 |
-
env.read_env(override=True)
|
| 8 |
-
|
| 9 |
-
@pytest.mark.parametrize("model_name", [
|
| 10 |
-
"gpt2",
|
| 11 |
-
])
|
| 12 |
-
def test_convert_download(model_name):
|
| 13 |
-
if env.str("HF_TOKEN"):
|
| 14 |
-
login(token=env("HF_TOKEN"))
|
| 15 |
-
|
| 16 |
-
response = requests.post(
|
| 17 |
-
env.str("ENDPOINT"),
|
| 18 |
-
json={
|
| 19 |
-
"hf_model_name": model_name,
|
| 20 |
-
"hf_tokenizer_name": model_name,
|
| 21 |
-
"hf_push_repo": None,
|
| 22 |
-
}
|
| 23 |
-
)
|
| 24 |
-
|
| 25 |
-
response.raise_for_status()
|
| 26 |
-
|
| 27 |
-
assert response.content_type == 'application/zip'
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def test_convert_push():
|
| 31 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_gguf.py
ADDED
|
File without changes
|
tests/test_gptq.py
ADDED
|
File without changes
|