Ksjsjjdj commited on
Commit
6d6218a
·
verified ·
1 Parent(s): 7701af4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -22
app.py CHANGED
@@ -60,8 +60,10 @@ class JobStatus:
60
  self.add_log(msg)
61
 
62
  class CustomTrainerCallback(TrainerCallback):
63
- def __init__(self, job_id):
64
  self.job_id = job_id
 
 
65
 
66
  def on_step_end(self, args, state, control, **kwargs):
67
  if self.job_id in JOBS:
@@ -77,7 +79,25 @@ class CustomTrainerCallback(TrainerCallback):
77
  def on_save(self, args, state, control, **kwargs):
78
  if self.job_id in JOBS:
79
  job = JOBS[self.job_id]
80
- job.add_log(f"System: Checkpoint saved at step {state.global_step}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  return control
82
 
83
  @spaces.GPU(duration=300)
@@ -100,7 +120,9 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
100
  login(token=hf_token)
101
  try:
102
  username = whoami()["name"]
103
- job.add_log(f"Auth: Verified as {username}")
 
 
104
  except:
105
  raise Exception("Authentication Failed")
106
 
@@ -215,7 +237,7 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
215
  model=peft_model,
216
  train_dataset=dataset_iterable,
217
  args=training_args,
218
- callbacks=[CustomTrainerCallback(job_id)]
219
  )
220
 
221
  job.set_progress(0.2, "Training: Phase initiated...")
@@ -255,12 +277,10 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
255
  inject_json(c_tok, "tokenizer_config.json")
256
  inject_json(c_gen, "generation_config.json")
257
 
258
- job.set_progress(0.95, "Network: Uploading to HuggingFace...")
259
- full_repo = f"{username}/{new_repo_name}"
260
- create_repo(full_repo, token=hf_token, exist_ok=True)
261
- upload_folder(folder_path=final_path, repo_id=full_repo, token=hf_token)
262
 
263
- job.repo_url = f"https://huggingface.co/{full_repo}"
264
  job.status = "COMPLETED"
265
  job.set_progress(1.0, "System: Mission Accomplished")
266
 
@@ -317,19 +337,7 @@ def load_from_url(request: gr.Request):
317
  pass
318
  return gr.update(selected="launch_tab"), ""
319
 
320
- css = """
321
- @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=Inter:wght@400;700&display=swap');
322
- body { background: #0b0f19; color: #fff; font-family: 'Inter', sans-serif; }
323
- .gradio-container { border: 1px solid #2d3748; border-radius: 8px; background: #111827; }
324
- h1 { color: #6366f1; text-align: center; font-weight: 800; text-transform: uppercase; letter-spacing: 2px; }
325
- .gr-button.primary { background: #4f46e5; border: none; color: white; font-weight: bold; }
326
- .gr-button.primary:hover { background: #4338ca; }
327
- .gr-input, .gr-textarea, .gr-box { background: #1f2937 !important; border-color: #374151 !important; color: #e5e7eb !important; }
328
- .gr-code { background: #000 !important; color: #0f0 !important; font-family: 'IBM Plex Mono', monospace; border: 1px solid #333; }
329
- #status-badge { font-weight: bold; padding: 4px 8px; border-radius: 4px; background: #374151; display: inline-block; }
330
- """
331
-
332
- with gr.Blocks(title="Nucleus Enterprise", css=css, theme=gr.themes.Base()) as demo:
333
  with gr.Column():
334
  gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
335
  gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")
 
60
  self.add_log(msg)
61
 
62
  class CustomTrainerCallback(TrainerCallback):
63
+ def __init__(self, job_id, hf_token, repo_id):
64
  self.job_id = job_id
65
+ self.hf_token = hf_token
66
+ self.repo_id = repo_id
67
 
68
  def on_step_end(self, args, state, control, **kwargs):
69
  if self.job_id in JOBS:
 
79
  def on_save(self, args, state, control, **kwargs):
80
  if self.job_id in JOBS:
81
  job = JOBS[self.job_id]
82
+ step = state.global_step
83
+ ckpt_name = f"checkpoint-{step}"
84
+ ckpt_path = os.path.join(args.output_dir, ckpt_name)
85
+
86
+ job.add_log(f"System: Local checkpoint saved ({ckpt_name})")
87
+
88
+ def _upload_bg():
89
+ try:
90
+ upload_folder(
91
+ folder_path=ckpt_path,
92
+ path_in_repo=ckpt_name,
93
+ repo_id=self.repo_id,
94
+ token=self.hf_token
95
+ )
96
+ job.add_log(f"Cloud: Checkpoint {step} synced to Hub")
97
+ except Exception:
98
+ pass
99
+
100
+ threading.Thread(target=_upload_bg, daemon=True).start()
101
  return control
102
 
103
  @spaces.GPU(duration=300)
 
120
  login(token=hf_token)
121
  try:
122
  username = whoami()["name"]
123
+ full_repo_id = f"{username}/{new_repo_name}"
124
+ create_repo(full_repo_id, token=hf_token, exist_ok=True)
125
+ job.add_log(f"Auth: Verified as {username}. Target: {full_repo_id}")
126
  except:
127
  raise Exception("Authentication Failed")
128
 
 
237
  model=peft_model,
238
  train_dataset=dataset_iterable,
239
  args=training_args,
240
+ callbacks=[CustomTrainerCallback(job_id, hf_token, full_repo_id)]
241
  )
242
 
243
  job.set_progress(0.2, "Training: Phase initiated...")
 
277
  inject_json(c_tok, "tokenizer_config.json")
278
  inject_json(c_gen, "generation_config.json")
279
 
280
+ job.set_progress(0.95, "Network: Uploading final model...")
281
+ upload_folder(folder_path=final_path, repo_id=full_repo_id, token=hf_token)
 
 
282
 
283
+ job.repo_url = f"https://huggingface.co/{full_repo_id}"
284
  job.status = "COMPLETED"
285
  job.set_progress(1.0, "System: Mission Accomplished")
286
 
 
337
  pass
338
  return gr.update(selected="launch_tab"), ""
339
 
340
+ with gr.Blocks(title="Nucleus Enterprise", theme=gr.themes.Base()) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
341
  with gr.Column():
342
  gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
343
  gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")