Spaces:
Running
on
A100
Running
on
A100
MekkCyber
commited on
Commit
·
f71fb6d
1
Parent(s):
e5bb0c6
update repo name
Browse files
app.py
CHANGED
|
@@ -73,9 +73,7 @@ def quantize_model(model_name, quantization_type, group_size=128, auth_token=Non
|
|
| 73 |
def save_model(model, model_name, quantization_type, group_size=128, username=None, auth_token=None, quantized_model_name=None):
|
| 74 |
print("Saving quantized model")
|
| 75 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
| 76 |
-
|
| 77 |
-
with open(os.path.join(tmpdirname, "README.md"), "w") as f:
|
| 78 |
-
f.write(model_card)
|
| 79 |
|
| 80 |
model.save_pretrained(tmpdirname, safe_serialization=False, use_auth_token=auth_token.token)
|
| 81 |
if quantized_model_name :
|
|
@@ -86,6 +84,9 @@ def save_model(model, model_name, quantization_type, group_size=128, username=No
|
|
| 86 |
else :
|
| 87 |
repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}"
|
| 88 |
|
|
|
|
|
|
|
|
|
|
| 89 |
# Push to Hub
|
| 90 |
api = HfApi(token=auth_token.token)
|
| 91 |
api.create_repo(repo_name, exist_ok=True)
|
|
@@ -107,8 +108,11 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
|
|
| 107 |
return exists_message
|
| 108 |
if quantization_type == "int4_weight_only" and device == "cpu" :
|
| 109 |
return "int4_weight_only not supported on cpu"
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
|
| 114 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
|
|
| 73 |
def save_model(model, model_name, quantization_type, group_size=128, username=None, auth_token=None, quantized_model_name=None):
|
| 74 |
print("Saving quantized model")
|
| 75 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
| 76 |
+
|
|
|
|
|
|
|
| 77 |
|
| 78 |
model.save_pretrained(tmpdirname, safe_serialization=False, use_auth_token=auth_token.token)
|
| 79 |
if quantized_model_name :
|
|
|
|
| 84 |
else :
|
| 85 |
repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}"
|
| 86 |
|
| 87 |
+
model_card = create_model_card(repo_name, quantization_type, group_size)
|
| 88 |
+
with open(os.path.join(tmpdirname, "README.md"), "w") as f:
|
| 89 |
+
f.write(model_card)
|
| 90 |
# Push to Hub
|
| 91 |
api = HfApi(token=auth_token.token)
|
| 92 |
api.create_repo(repo_name, exist_ok=True)
|
|
|
|
| 108 |
return exists_message
|
| 109 |
if quantization_type == "int4_weight_only" and device == "cpu" :
|
| 110 |
return "int4_weight_only not supported on cpu"
|
| 111 |
+
try :
|
| 112 |
+
quantized_model = quantize_model(model_name, quantization_type, group_size, oauth_token, profile.username, device)
|
| 113 |
+
return save_model(quantized_model, model_name, quantization_type, group_size, profile.username, oauth_token, quantized_model_name)
|
| 114 |
+
except Exception as e :
|
| 115 |
+
return e
|
| 116 |
|
| 117 |
|
| 118 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|