Ksjsjjdj commited on
Commit
425571a
·
verified ·
1 Parent(s): 8163d13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -91,11 +91,12 @@ class CustomTrainerCallback(TrainerCallback):
91
  try:
92
  upload_folder(
93
  folder_path=ckpt_path,
94
- path_in_repo=ckpt_name,
95
  repo_id=self.repo_id,
96
- token=self.hf_token
 
97
  )
98
- job.add_log(f"Cloud: Checkpoint {step} synced to Hub")
99
  except Exception:
100
  pass
101
 
@@ -200,11 +201,12 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
200
  original_model = AutoModelForCausalLM.from_pretrained(
201
  model_name,
202
  trust_remote_code=True,
203
- device_map="auto",
204
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
205
- low_cpu_mem_usage=True
206
  )
207
 
 
 
 
208
  peft_config = LoraConfig(
209
  r=int(lora_r),
210
  lora_alpha=int(lora_alpha),
@@ -260,10 +262,11 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
260
  model_name,
261
  return_dict=True,
262
  torch_dtype=torch.float16,
263
- trust_remote_code=True,
264
- device_map="auto",
265
- low_cpu_mem_usage=True
266
  )
 
 
 
267
 
268
  model_to_merge = PeftModel.from_pretrained(base_reload, output_dir)
269
  final_model = model_to_merge.merge_and_unload()
@@ -286,7 +289,12 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
286
  inject_json(c_gen, "generation_config.json")
287
 
288
  job.set_progress(0.95, "Network: Uploading final model...")
289
- upload_folder(folder_path=final_path, repo_id=full_repo_id, token=hf_token)
 
 
 
 
 
290
 
291
  job.repo_url = f"https://huggingface.co/{full_repo_id}"
292
  job.status = "COMPLETED"
@@ -345,7 +353,7 @@ def load_from_url(request: gr.Request):
345
  pass
346
  return gr.update(selected="launch_tab"), ""
347
 
348
- with gr.Blocks(title="Nucleus Enterprise") as demo:
349
  with gr.Column():
350
  gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
351
  gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")
 
91
  try:
92
  upload_folder(
93
  folder_path=ckpt_path,
94
+ path_in_repo=".",
95
  repo_id=self.repo_id,
96
+ token=self.hf_token,
97
+ commit_message=f"Update from checkpoint {step}"
98
  )
99
+ job.add_log(f"Cloud: Checkpoint {step} synced to Root")
100
  except Exception:
101
  pass
102
 
 
201
  original_model = AutoModelForCausalLM.from_pretrained(
202
  model_name,
203
  trust_remote_code=True,
204
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 
 
205
  )
206
 
207
+ if torch.cuda.is_available():
208
+ original_model = original_model.cuda()
209
+
210
  peft_config = LoraConfig(
211
  r=int(lora_r),
212
  lora_alpha=int(lora_alpha),
 
262
  model_name,
263
  return_dict=True,
264
  torch_dtype=torch.float16,
265
+ trust_remote_code=True
 
 
266
  )
267
+
268
+ if torch.cuda.is_available():
269
+ base_reload = base_reload.cuda()
270
 
271
  model_to_merge = PeftModel.from_pretrained(base_reload, output_dir)
272
  final_model = model_to_merge.merge_and_unload()
 
289
  inject_json(c_gen, "generation_config.json")
290
 
291
  job.set_progress(0.95, "Network: Uploading final model...")
292
+ upload_folder(
293
+ folder_path=final_path,
294
+ path_in_repo=".",
295
+ repo_id=full_repo_id,
296
+ token=hf_token
297
+ )
298
 
299
  job.repo_url = f"https://huggingface.co/{full_repo_id}"
300
  job.status = "COMPLETED"
 
353
  pass
354
  return gr.update(selected="launch_tab"), ""
355
 
356
+ with gr.Blocks(title="Nucleus Enterprise", theme=gr.themes.Base()) as demo:
357
  with gr.Column():
358
  gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
359
  gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")