Sebastien De Greef
commited on
Commit
·
5159911
1
Parent(s):
c0f4fad
chore: Refactor save_model function to support multiple quantization methods
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ logger.debug('This is a debug message')
|
|
| 23 |
hf_user = None
|
| 24 |
hfApi = HfApi()
|
| 25 |
try:
|
| 26 |
-
hf_user = hfApi.whoami()
|
| 27 |
except Exception as e:
|
| 28 |
hf_user = "not logged in"
|
| 29 |
|
|
@@ -39,7 +39,17 @@ model_options = [
|
|
| 39 |
"unsloth/Phi-3-medium-4k-instruct",
|
| 40 |
"unsloth/mistral-7b-bnb-4bit",
|
| 41 |
"unsloth/gemma-2-9b-bnb-4bit",
|
|
|
|
| 42 |
"unsloth/gemma-2-27b-bnb-4bit", # Gemma 2x faster!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
]
|
| 44 |
gpu_stats = torch.cuda.get_device_properties(0)
|
| 45 |
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
|
@@ -115,21 +125,23 @@ def inference(prompt, input_text):
|
|
| 115 |
result = tokenizer.batch_decode(outputs)
|
| 116 |
return result[0], gr.update(visible=True, interactive=True)
|
| 117 |
|
| 118 |
-
def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, gguf_4bit, gguf_custom, gguf_custom_value, merge_16bit, merge_4bit, just_lora, push_to_hub):
|
| 119 |
global model, tokenizer
|
|
|
|
|
|
|
|
|
|
| 120 |
if gguf_custom:
|
| 121 |
gguf_custom_value = gguf_custom_value
|
|
|
|
| 122 |
else:
|
| 123 |
gguf_custom_value = None
|
| 124 |
|
| 125 |
if gguf_16bit:
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
else:
|
| 132 |
-
gguf = None
|
| 133 |
|
| 134 |
if merge_16bit:
|
| 135 |
merge = "16bit"
|
|
@@ -142,14 +154,23 @@ def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, ggu
|
|
| 142 |
|
| 143 |
#model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
|
| 144 |
if push_to_hub:
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
return "Model saved", gr.update(visible=True, interactive=True)
|
| 147 |
|
|
|
|
|
|
|
|
|
|
| 148 |
# Create the Gradio interface
|
| 149 |
with gr.Blocks(title="Unsloth fine-tuning") as demo:
|
|
|
|
|
|
|
|
|
|
| 150 |
with gr.Column():
|
| 151 |
gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
|
| 152 |
-
gr.LoginButton()
|
| 153 |
with gr.Column():
|
| 154 |
gr.Markdown(f"**User:** {hf_user}\n\n**GPU Information:** {gpu_stats.name} ({max_memory} GB)\n\n[Unsloth Docs](http://docs.unsloth.com/)\n\n[Unsloth GitHub](https://github.com/unslothai/unsloth)")
|
| 155 |
with gr.Tab("Base Model Parameters"):
|
|
@@ -282,8 +303,6 @@ with gr.Blocks(title="Unsloth fine-tuning") as demo:
|
|
| 282 |
train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
|
| 283 |
|
| 284 |
with gr.Tab("Save & Push Options"):
|
| 285 |
-
|
| 286 |
-
|
| 287 |
|
| 288 |
with gr.Row():
|
| 289 |
gr.Markdown("### Merging Options")
|
|
|
|
| 23 |
hf_user = None
|
| 24 |
hfApi = HfApi()
|
| 25 |
try:
|
| 26 |
+
hf_user = hfApi.whoami()["name"]
|
| 27 |
except Exception as e:
|
| 28 |
hf_user = "not logged in"
|
| 29 |
|
|
|
|
| 39 |
"unsloth/Phi-3-medium-4k-instruct",
|
| 40 |
"unsloth/mistral-7b-bnb-4bit",
|
| 41 |
"unsloth/gemma-2-9b-bnb-4bit",
|
| 42 |
+
"unsloth/gemma-2-9b-bnb-4bit-instruct",
|
| 43 |
"unsloth/gemma-2-27b-bnb-4bit", # Gemma 2x faster!
|
| 44 |
+
"unsloth/gemma-2-27b-bnb-4bit-instruct", # Gemma 2x faster!
|
| 45 |
+
"unsloth/Qwen2-1.5B-bnb-4bit",
|
| 46 |
+
"unsloth/Qwen2-1.5B-bnb-4bit-instruct",
|
| 47 |
+
"unsloth/Qwen2-7B-bnb-4bit",
|
| 48 |
+
"unsloth/Qwen2-7B-bnb-4bit-instruct",
|
| 49 |
+
"unsloth/Qwen2-72B-bnb-4bit",
|
| 50 |
+
"unsloth/Qwen2-72B-bnb-4bit-instruct",
|
| 51 |
+
"unsloth/yi-6b-bnb-4bit",
|
| 52 |
+
"unsloth/yi-34b-bnb-4bit",
|
| 53 |
]
|
| 54 |
gpu_stats = torch.cuda.get_device_properties(0)
|
| 55 |
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
|
|
|
| 125 |
result = tokenizer.batch_decode(outputs)
|
| 126 |
return result[0], gr.update(visible=True, interactive=True)
|
| 127 |
|
| 128 |
+
def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, gguf_4bit, gguf_custom, gguf_custom_value, merge_16bit, merge_4bit, just_lora, push_to_hub, progress=gr.Progress()):
|
| 129 |
global model, tokenizer
|
| 130 |
+
|
| 131 |
+
quants = []
|
| 132 |
+
|
| 133 |
if gguf_custom:
|
| 134 |
gguf_custom_value = gguf_custom_value
|
| 135 |
+
quants.append(gguf_custom_value)
|
| 136 |
else:
|
| 137 |
gguf_custom_value = None
|
| 138 |
|
| 139 |
if gguf_16bit:
|
| 140 |
+
quants.append("f16")
|
| 141 |
+
if gguf_8bit:
|
| 142 |
+
quants.append("q8_0")
|
| 143 |
+
if gguf_4bit:
|
| 144 |
+
quants.append("q4_k_m")
|
|
|
|
|
|
|
| 145 |
|
| 146 |
if merge_16bit:
|
| 147 |
merge = "16bit"
|
|
|
|
| 154 |
|
| 155 |
#model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
|
| 156 |
if push_to_hub:
|
| 157 |
+
current_quant = 0
|
| 158 |
+
for q in quants:
|
| 159 |
+
progress(current_quant/len(quants), desc=f"Pushing model {model_name} with {q} to HuggingFace Hub")
|
| 160 |
+
model.push_to_hub_gguf(hub_model_name, tokenizer, quantization_method=q, token=hub_token)
|
| 161 |
+
current_quant += 1
|
| 162 |
return "Model saved", gr.update(visible=True, interactive=True)
|
| 163 |
|
| 164 |
+
def username(profile: gr.OAuthProfile | None):
|
| 165 |
+
return profile["name"] if profile else "not logged in"
|
| 166 |
+
|
| 167 |
# Create the Gradio interface
|
| 168 |
with gr.Blocks(title="Unsloth fine-tuning") as demo:
|
| 169 |
+
gr.LoginButton()
|
| 170 |
+
logged_user = gr.Markdown(f"**User:** {hf_user}")
|
| 171 |
+
demo.load(username, inputs=None, outputs=logged_user)
|
| 172 |
with gr.Column():
|
| 173 |
gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
|
|
|
|
| 174 |
with gr.Column():
|
| 175 |
gr.Markdown(f"**User:** {hf_user}\n\n**GPU Information:** {gpu_stats.name} ({max_memory} GB)\n\n[Unsloth Docs](http://docs.unsloth.com/)\n\n[Unsloth GitHub](https://github.com/unslothai/unsloth)")
|
| 176 |
with gr.Tab("Base Model Parameters"):
|
|
|
|
| 303 |
train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
|
| 304 |
|
| 305 |
with gr.Tab("Save & Push Options"):
|
|
|
|
|
|
|
| 306 |
|
| 307 |
with gr.Row():
|
| 308 |
gr.Markdown("### Merging Options")
|