Update app.py
Browse files
app.py
CHANGED
|
@@ -91,11 +91,12 @@ class CustomTrainerCallback(TrainerCallback):
|
|
| 91 |
try:
|
| 92 |
upload_folder(
|
| 93 |
folder_path=ckpt_path,
|
| 94 |
-
path_in_repo=
|
| 95 |
repo_id=self.repo_id,
|
| 96 |
-
token=self.hf_token
|
|
|
|
| 97 |
)
|
| 98 |
-
job.add_log(f"Cloud: Checkpoint {step} synced to
|
| 99 |
except Exception:
|
| 100 |
pass
|
| 101 |
|
|
@@ -200,11 +201,12 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
|
|
| 200 |
original_model = AutoModelForCausalLM.from_pretrained(
|
| 201 |
model_name,
|
| 202 |
trust_remote_code=True,
|
| 203 |
-
|
| 204 |
-
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 205 |
-
low_cpu_mem_usage=True
|
| 206 |
)
|
| 207 |
|
|
|
|
|
|
|
|
|
|
| 208 |
peft_config = LoraConfig(
|
| 209 |
r=int(lora_r),
|
| 210 |
lora_alpha=int(lora_alpha),
|
|
@@ -260,10 +262,11 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
|
|
| 260 |
model_name,
|
| 261 |
return_dict=True,
|
| 262 |
torch_dtype=torch.float16,
|
| 263 |
-
trust_remote_code=True
|
| 264 |
-
device_map="auto",
|
| 265 |
-
low_cpu_mem_usage=True
|
| 266 |
)
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
model_to_merge = PeftModel.from_pretrained(base_reload, output_dir)
|
| 269 |
final_model = model_to_merge.merge_and_unload()
|
|
@@ -286,7 +289,12 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
|
|
| 286 |
inject_json(c_gen, "generation_config.json")
|
| 287 |
|
| 288 |
job.set_progress(0.95, "Network: Uploading final model...")
|
| 289 |
-
upload_folder(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
job.repo_url = f"https://huggingface.co/{full_repo_id}"
|
| 292 |
job.status = "COMPLETED"
|
|
@@ -345,7 +353,7 @@ def load_from_url(request: gr.Request):
|
|
| 345 |
pass
|
| 346 |
return gr.update(selected="launch_tab"), ""
|
| 347 |
|
| 348 |
-
with gr.Blocks(title="Nucleus Enterprise") as demo:
|
| 349 |
with gr.Column():
|
| 350 |
gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
|
| 351 |
gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")
|
|
|
|
| 91 |
try:
|
| 92 |
upload_folder(
|
| 93 |
folder_path=ckpt_path,
|
| 94 |
+
path_in_repo=".",
|
| 95 |
repo_id=self.repo_id,
|
| 96 |
+
token=self.hf_token,
|
| 97 |
+
commit_message=f"Update from checkpoint {step}"
|
| 98 |
)
|
| 99 |
+
job.add_log(f"Cloud: Checkpoint {step} synced to Root")
|
| 100 |
except Exception:
|
| 101 |
pass
|
| 102 |
|
|
|
|
| 201 |
original_model = AutoModelForCausalLM.from_pretrained(
|
| 202 |
model_name,
|
| 203 |
trust_remote_code=True,
|
| 204 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
|
|
|
|
|
|
| 205 |
)
|
| 206 |
|
| 207 |
+
if torch.cuda.is_available():
|
| 208 |
+
original_model = original_model.cuda()
|
| 209 |
+
|
| 210 |
peft_config = LoraConfig(
|
| 211 |
r=int(lora_r),
|
| 212 |
lora_alpha=int(lora_alpha),
|
|
|
|
| 262 |
model_name,
|
| 263 |
return_dict=True,
|
| 264 |
torch_dtype=torch.float16,
|
| 265 |
+
trust_remote_code=True
|
|
|
|
|
|
|
| 266 |
)
|
| 267 |
+
|
| 268 |
+
if torch.cuda.is_available():
|
| 269 |
+
base_reload = base_reload.cuda()
|
| 270 |
|
| 271 |
model_to_merge = PeftModel.from_pretrained(base_reload, output_dir)
|
| 272 |
final_model = model_to_merge.merge_and_unload()
|
|
|
|
| 289 |
inject_json(c_gen, "generation_config.json")
|
| 290 |
|
| 291 |
job.set_progress(0.95, "Network: Uploading final model...")
|
| 292 |
+
upload_folder(
|
| 293 |
+
folder_path=final_path,
|
| 294 |
+
path_in_repo=".",
|
| 295 |
+
repo_id=full_repo_id,
|
| 296 |
+
token=hf_token
|
| 297 |
+
)
|
| 298 |
|
| 299 |
job.repo_url = f"https://huggingface.co/{full_repo_id}"
|
| 300 |
job.status = "COMPLETED"
|
|
|
|
| 353 |
pass
|
| 354 |
return gr.update(selected="launch_tab"), ""
|
| 355 |
|
| 356 |
+
with gr.Blocks(title="Nucleus Enterprise", theme=gr.themes.Base()) as demo:
|
| 357 |
with gr.Column():
|
| 358 |
gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
|
| 359 |
gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")
|