Update app.py
Browse files
app.py
CHANGED
|
@@ -60,8 +60,10 @@ class JobStatus:
|
|
| 60 |
self.add_log(msg)
|
| 61 |
|
| 62 |
class CustomTrainerCallback(TrainerCallback):
|
| 63 |
-
def __init__(self, job_id):
|
| 64 |
self.job_id = job_id
|
|
|
|
|
|
|
| 65 |
|
| 66 |
def on_step_end(self, args, state, control, **kwargs):
|
| 67 |
if self.job_id in JOBS:
|
|
@@ -77,7 +79,25 @@ class CustomTrainerCallback(TrainerCallback):
|
|
| 77 |
def on_save(self, args, state, control, **kwargs):
|
| 78 |
if self.job_id in JOBS:
|
| 79 |
job = JOBS[self.job_id]
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
return control
|
| 82 |
|
| 83 |
@spaces.GPU(duration=300)
|
|
@@ -100,7 +120,9 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
|
|
| 100 |
login(token=hf_token)
|
| 101 |
try:
|
| 102 |
username = whoami()["name"]
|
| 103 |
-
|
|
|
|
|
|
|
| 104 |
except:
|
| 105 |
raise Exception("Authentication Failed")
|
| 106 |
|
|
@@ -215,7 +237,7 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
|
|
| 215 |
model=peft_model,
|
| 216 |
train_dataset=dataset_iterable,
|
| 217 |
args=training_args,
|
| 218 |
-
callbacks=[CustomTrainerCallback(job_id)]
|
| 219 |
)
|
| 220 |
|
| 221 |
job.set_progress(0.2, "Training: Phase initiated...")
|
|
@@ -255,12 +277,10 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
|
|
| 255 |
inject_json(c_tok, "tokenizer_config.json")
|
| 256 |
inject_json(c_gen, "generation_config.json")
|
| 257 |
|
| 258 |
-
job.set_progress(0.95, "Network: Uploading
|
| 259 |
-
|
| 260 |
-
create_repo(full_repo, token=hf_token, exist_ok=True)
|
| 261 |
-
upload_folder(folder_path=final_path, repo_id=full_repo, token=hf_token)
|
| 262 |
|
| 263 |
-
job.repo_url = f"https://huggingface.co/{
|
| 264 |
job.status = "COMPLETED"
|
| 265 |
job.set_progress(1.0, "System: Mission Accomplished")
|
| 266 |
|
|
@@ -317,19 +337,7 @@ def load_from_url(request: gr.Request):
|
|
| 317 |
pass
|
| 318 |
return gr.update(selected="launch_tab"), ""
|
| 319 |
|
| 320 |
-
|
| 321 |
-
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=Inter:wght@400;700&display=swap');
|
| 322 |
-
body { background: #0b0f19; color: #fff; font-family: 'Inter', sans-serif; }
|
| 323 |
-
.gradio-container { border: 1px solid #2d3748; border-radius: 8px; background: #111827; }
|
| 324 |
-
h1 { color: #6366f1; text-align: center; font-weight: 800; text-transform: uppercase; letter-spacing: 2px; }
|
| 325 |
-
.gr-button.primary { background: #4f46e5; border: none; color: white; font-weight: bold; }
|
| 326 |
-
.gr-button.primary:hover { background: #4338ca; }
|
| 327 |
-
.gr-input, .gr-textarea, .gr-box { background: #1f2937 !important; border-color: #374151 !important; color: #e5e7eb !important; }
|
| 328 |
-
.gr-code { background: #000 !important; color: #0f0 !important; font-family: 'IBM Plex Mono', monospace; border: 1px solid #333; }
|
| 329 |
-
#status-badge { font-weight: bold; padding: 4px 8px; border-radius: 4px; background: #374151; display: inline-block; }
|
| 330 |
-
"""
|
| 331 |
-
|
| 332 |
-
with gr.Blocks(title="Nucleus Enterprise", css=css, theme=gr.themes.Base()) as demo:
|
| 333 |
with gr.Column():
|
| 334 |
gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
|
| 335 |
gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")
|
|
|
|
| 60 |
self.add_log(msg)
|
| 61 |
|
| 62 |
class CustomTrainerCallback(TrainerCallback):
|
| 63 |
+
def __init__(self, job_id, hf_token, repo_id):
|
| 64 |
self.job_id = job_id
|
| 65 |
+
self.hf_token = hf_token
|
| 66 |
+
self.repo_id = repo_id
|
| 67 |
|
| 68 |
def on_step_end(self, args, state, control, **kwargs):
|
| 69 |
if self.job_id in JOBS:
|
|
|
|
| 79 |
def on_save(self, args, state, control, **kwargs):
|
| 80 |
if self.job_id in JOBS:
|
| 81 |
job = JOBS[self.job_id]
|
| 82 |
+
step = state.global_step
|
| 83 |
+
ckpt_name = f"checkpoint-{step}"
|
| 84 |
+
ckpt_path = os.path.join(args.output_dir, ckpt_name)
|
| 85 |
+
|
| 86 |
+
job.add_log(f"System: Local checkpoint saved ({ckpt_name})")
|
| 87 |
+
|
| 88 |
+
def _upload_bg():
|
| 89 |
+
try:
|
| 90 |
+
upload_folder(
|
| 91 |
+
folder_path=ckpt_path,
|
| 92 |
+
path_in_repo=ckpt_name,
|
| 93 |
+
repo_id=self.repo_id,
|
| 94 |
+
token=self.hf_token
|
| 95 |
+
)
|
| 96 |
+
job.add_log(f"Cloud: Checkpoint {step} synced to Hub")
|
| 97 |
+
except Exception:
|
| 98 |
+
pass
|
| 99 |
+
|
| 100 |
+
threading.Thread(target=_upload_bg, daemon=True).start()
|
| 101 |
return control
|
| 102 |
|
| 103 |
@spaces.GPU(duration=300)
|
|
|
|
| 120 |
login(token=hf_token)
|
| 121 |
try:
|
| 122 |
username = whoami()["name"]
|
| 123 |
+
full_repo_id = f"{username}/{new_repo_name}"
|
| 124 |
+
create_repo(full_repo_id, token=hf_token, exist_ok=True)
|
| 125 |
+
job.add_log(f"Auth: Verified as {username}. Target: {full_repo_id}")
|
| 126 |
except:
|
| 127 |
raise Exception("Authentication Failed")
|
| 128 |
|
|
|
|
| 237 |
model=peft_model,
|
| 238 |
train_dataset=dataset_iterable,
|
| 239 |
args=training_args,
|
| 240 |
+
callbacks=[CustomTrainerCallback(job_id, hf_token, full_repo_id)]
|
| 241 |
)
|
| 242 |
|
| 243 |
job.set_progress(0.2, "Training: Phase initiated...")
|
|
|
|
| 277 |
inject_json(c_tok, "tokenizer_config.json")
|
| 278 |
inject_json(c_gen, "generation_config.json")
|
| 279 |
|
| 280 |
+
job.set_progress(0.95, "Network: Uploading final model...")
|
| 281 |
+
upload_folder(folder_path=final_path, repo_id=full_repo_id, token=hf_token)
|
|
|
|
|
|
|
| 282 |
|
| 283 |
+
job.repo_url = f"https://huggingface.co/{full_repo_id}"
|
| 284 |
job.status = "COMPLETED"
|
| 285 |
job.set_progress(1.0, "System: Mission Accomplished")
|
| 286 |
|
|
|
|
| 337 |
pass
|
| 338 |
return gr.update(selected="launch_tab"), ""
|
| 339 |
|
| 340 |
+
with gr.Blocks(title="Nucleus Enterprise", theme=gr.themes.Base()) as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
with gr.Column():
|
| 342 |
gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
|
| 343 |
gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")
|