Train

Sleeping

App Files Files Community

Ksjsjjdj commited on Nov 30, 2025

Commit

7701af4

verified ·

1 Parent(s): 572dfb0

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -316

app.py CHANGED Viewed

@@ -41,8 +41,8 @@ JOBS = {}
 class JobStatus:
     def __init__(self):
-        self.id = str(uuid.uuid4())[:8]
-        self.status = "IDLE"
         self.progress = 0.0
         self.logs = []
         self.result = None
@@ -68,10 +68,16 @@ class CustomTrainerCallback(TrainerCallback):
             job = JOBS[self.job_id]
             if state.max_steps > 0:
                 prog = state.global_step / state.max_steps
-                job.progress = 0.4 + (prog * 0.5)
-                if state.global_step % 5 == 0:
                     loss = state.log_history[-1].get('loss', 'N/A') if state.log_history else '...'
-                    job.add_log(f"Step {state.global_step}/{state.max_steps} | Loss: {loss}")
         return control
 @spaces.GPU(duration=300)
@@ -80,24 +86,23 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
                           reasoning_mode, c_conf, c_tok, c_gen):
     job = JOBS[job_id]
-    job.status = "ACTIVE"
-    job.add_log("System: Initializing specialized environment...")
     try:
         if not hf_token.startswith("hf_"):
-            raise ValueError("Invalid HuggingFace Token format")
         os.environ["WANDB_DISABLED"] = "true"
         os.environ["HF_TOKEN"] = hf_token
         os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
-        os.environ["TOKENIZERS_PARALLELISM"] = "false"
         login(token=hf_token)
         try:
             username = whoami()["name"]
-            job.add_log(f"Auth Success: Connected as {username}")
         except:
-            raise Exception("Authentication Failed: Check write permissions")
         if not hasattr(torch, 'xla'):
             class DummyXLA:
@@ -109,8 +114,8 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
         dataset_list = [item.strip() for item in raw_items if item.strip()]
         if reasoning_mode:
-            job.add_log("Mode: Reasoning Core Active (Math/Logic Injection)")
-            dataset_list.extend(["gsm8k", "openai/gsm8k", "microsoft/orca-math-word-problems-200k"])
         def load_single(ds_name, cfg):
             try:
@@ -124,7 +129,7 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
                 return None
         streams = []
-        job.set_progress(0.1, "Data: Establishing vector streams...")
         with ThreadPoolExecutor(max_workers=4) as executor:
             futures = []
@@ -137,54 +142,32 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
                     streams.append(res)
         if not streams:
-            raise Exception("Data Error: No valid streams available. Check dataset names.")
-        job.set_progress(0.2, f"Data: {len(streams)} streams locked and ready.")
-        job.add_log("Tokenizer: Loading configuration...")
-        try:
-            tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side="left", add_eos_token=True, add_bos_token=True)
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-        except Exception as e:
-            raise Exception(f"Tokenizer Load Failed: {str(e)}")
         def process_stream_generator():
             iterator = chain.from_iterable(streams)
             batch_buffer = []
             for item in iterator:
                 try:
-                    text = ""
-                    if "question" in item and "answer" in item:
-                        text = f"Question: {item['question']}\nAnswer: {item['answer']}"
-                    elif "text" in item:
-                        text = item["text"]
-                    elif "content" in item:
-                        text = item["content"]
-                    else:
-                        text = str(item)
-                    if len(text) < 10:
-                        continue
                     batch_buffer.append(text)
-                    if len(batch_buffer) >= 50:
                         for txt in batch_buffer:
-                            tokens = tokenizer(txt, truncation=True, max_length=2048)
                             tokens["labels"] = tokens["input_ids"].copy()
                             yield tokens
                         batch_buffer = []
                 except:
                     continue
-            for txt in batch_buffer:
-                tokens = tokenizer(txt, truncation=True, max_length=2048)
-                tokens["labels"] = tokens["input_ids"].copy()
-                yield tokens
-        job.set_progress(0.3, "Model: Loading base weights (4-bit/8-bit optimized)...")
         torch.cuda.empty_cache()
         gc.collect()
@@ -196,14 +179,10 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
         )
-        target_mods = ["q_proj", "k_proj", "v_proj", "dense", "fc1", "fc2", "o_proj"]
-        if reasoning_mode:
-            target_mods.extend(["gate_proj", "up_proj", "down_proj"])
         peft_config = LoraConfig(
-            r=int(lora_r) * 2 if reasoning_mode else int(lora_r),
             lora_alpha=int(lora_alpha),
-            target_modules=target_mods,
             bias="none",
             lora_dropout=lora_dropout,
             task_type="CAUSAL_LM"
@@ -214,25 +193,20 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
         output_dir = f"checkpoints/{job_id}"
-        total_steps = int(train_steps)
-        save_interval = max(10, int(total_steps * 0.2))
         training_args = TrainingArguments(
             output_dir=output_dir,
             per_device_train_batch_size=int(batch_size),
             gradient_accumulation_steps=4,
-            max_steps=total_steps,
             learning_rate=learning_rate,
             optim="adamw_torch",
-            logging_steps=5,
             save_strategy="steps",
-            save_steps=save_interval,
             save_total_limit=2,
             report_to="none",
             fp16=True if torch.cuda.is_available() else False,
-            lr_scheduler_type="cosine" if reasoning_mode else "linear",
-            disable_tqdm=True,
-            dataloader_pin_memory=False
         )
         dataset_iterable = IterableDataset.from_generator(process_stream_generator)
@@ -244,23 +218,19 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
             callbacks=[CustomTrainerCallback(job_id)]
         )
-        job.set_progress(0.4, f"Training: Matrix adaptation started (Checkpointing every {save_interval} steps)...")
         trainer.train()
-        job.set_progress(0.85, "Saving: Serializing adapters...")
-        trainer.save_model(output_dir)
-        job.set_progress(0.9, "Merging: Fusing weights and cleanup...")
         del peft_model
         del original_model
-        del trainer
         torch.cuda.empty_cache()
         gc.collect()
         base_reload = AutoModelForCausalLM.from_pretrained(
             model_name,
             return_dict=True,
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             trust_remote_code=True,
             device_map="auto"
         )
@@ -278,36 +248,34 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
                     data = json.loads(content)
                     with open(os.path.join(final_path, fname), 'w') as f:
                         json.dump(data, f, indent=2)
-                    job.add_log(f"Config: Injected {fname}")
                 except:
-                    job.add_log(f"Config: Failed to inject {fname} (Invalid JSON)")
         inject_json(c_conf, "config.json")
         inject_json(c_tok, "tokenizer_config.json")
         inject_json(c_gen, "generation_config.json")
-        job.set_progress(0.95, "Upload: Pushing artifacts to Hub...")
         full_repo = f"{username}/{new_repo_name}"
         create_repo(full_repo, token=hf_token, exist_ok=True)
         upload_folder(folder_path=final_path, repo_id=full_repo, token=hf_token)
         job.repo_url = f"https://huggingface.co/{full_repo}"
         job.status = "COMPLETED"
-        job.set_progress(1.0, "System: Operation Finalized Successfully")
     except Exception as e:
         job.status = "FAILED"
         job.error = str(e)
         job.add_log(f"CRITICAL ERROR: {str(e)}")
         torch.cuda.empty_cache()
-        gc.collect()
 def start_training_wrapper(hf_token, model_name, new_repo_name, lora_r, lora_alpha, lora_dropout,
                            train_steps, learning_rate, batch_size, datasets_text,
                            reasoning_mode, c_conf, c_tok, c_gen):
     if not hf_token or not model_name:
-        return "ERROR: Missing Credentials", gr.update(visible=False), gr.update(visible=False)
     new_job = JobStatus()
     JOBS[new_job.id] = new_job
@@ -319,44 +287,25 @@ def start_training_wrapper(hf_token, model_name, new_repo_name, lora_r, lora_alp
     )
     thread.daemon = True
     thread.start()
-    return new_job.id, gr.update(visible=True, value=f"SESSION ID: {new_job.id}"), gr.update(visible=True)
 def get_job_update(job_id):
     if job_id not in JOBS:
-        return (
-            "<span style='color: #ef4444'>INVALID SESSION ID</span>",
-            "--:--",
-            "0%",
-            "",
-            gr.update(visible=False)
-        )
     job = JOBS[job_id]
-    log_html = "<br>".join([f"<div class='log-line'>{l}</div>" for l in job.logs[-50:]])
-    progress_html = f"""
-    <div class="p-bar-wrapper">
-        <div class="p-bar-fill" style="width: {job.progress * 100}%"></div>
-    </div>
-    <div class="p-text">{int(job.progress * 100)}% COMPLETE</div>
-    """
-    status_map = {
-        "IDLE": "#94a3b8",
-        "ACTIVE": "#3b82f6",
-        "COMPLETED": "#10b981",
-        "FAILED": "#ef4444"
-    }
-    status_html = f"<span style='color: {status_map.get(job.status, '#fff')}; font-weight: 900; letter-spacing: 1px;'>{job.status}</span>"
     result_comp = gr.update(visible=False)
     if job.status == "COMPLETED" and job.repo_url:
-        result_comp = gr.update(visible=True, value=f"ACCESS MODEL ARTIFACT: {job.repo_url}")
-    return status_html, job.created_at, progress_html, log_html, result_comp
 def load_from_url(request: gr.Request):
     try:
@@ -369,243 +318,79 @@ def load_from_url(request: gr.Request):
     return gr.update(selected="launch_tab"), ""
 css = """
-@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;500;700&family=JetBrains+Mono:wght@400;700&display=swap');
-:root {
-    --bg-dark: #0a0a0f;
-    --panel-dark: #13131f;
-    --primary: #6366f1;
-    --accent: #8b5cf6;
-    --text-main: #e2e8f0;
-    --text-dim: #64748b;
-    --border: #1e1e2e;
-}
-body {
-    background-color: var(--bg-dark) !important;
-    font-family: 'Space Grotesk', sans-serif !important;
-}
-.gradio-container {
-    background-color: transparent !important;
-    max-width: 1400px !important;
-}
-.header-container {
-    text-align: center;
-    padding: 3rem 0;
-    background: radial-gradient(circle at center, rgba(99, 102, 241, 0.05) 0%, transparent 60%);
-    margin-bottom: 2rem;
-    border-bottom: 1px solid var(--border);
-}
-h1 {
-    font-size: 3.5rem;
-    background: linear-gradient(135deg, #fff 0%, #94a3b8 100%);
-    -webkit-background-clip: text;
-    -webkit-text-fill-color: transparent;
-    text-transform: uppercase;
-    letter-spacing: -2px;
-    margin-bottom: 0.5rem;
-}
-.sub-header {
-    font-family: 'JetBrains Mono', monospace;
-    color: var(--primary);
-    font-size: 0.9rem;
-    letter-spacing: 2px;
-    text-transform: uppercase;
-}
-.gr-box, .gr-panel {
-    background: var(--panel-dark) !important;
-    border: 1px solid var(--border) !important;
-    border-radius: 4px !important;
-}
-.gr-input, .gr-textarea, .gr-number, .gr-dropdown {
-    background: #0d0d12 !important;
-    border: 1px solid var(--border) !important;
-    color: var(--text-main) !important;
-    font-family: 'JetBrains Mono', monospace;
-    font-size: 13px;
-    border-radius: 4px !important;
-}
-.gr-input:focus {
-    border-color: var(--primary) !important;
-    box-shadow: 0 0 0 1px var(--primary) !important;
-}
-.primary-btn {
-    background: var(--primary) !important;
-    border: none !important;
-    color: #fff !important;
-    font-family: 'JetBrains Mono', monospace !important;
-    text-transform: uppercase;
-    letter-spacing: 1px;
-    padding: 12px 24px !important;
-    border-radius: 2px !important;
-    transition: all 0.2s ease;
-}
-.primary-btn:hover {
-    background: var(--accent) !important;
-    box-shadow: 0 0 15px rgba(99, 102, 241, 0.3);
-}
-.p-bar-wrapper {
-    width: 100%;
-    height: 4px;
-    background: #1e1e2e;
-    margin-top: 15px;
-}
-.p-bar-fill {
-    height: 100%;
-    background: linear-gradient(90deg, var(--primary), var(--accent));
-    transition: width 0.4s cubic-bezier(0.4, 0, 0.2, 1);
-}
-.p-text {
-    font-family: 'JetBrains Mono', monospace;
-    font-size: 10px;
-    color: var(--primary);
-    text-align: right;
-    margin-top: 5px;
-}
-.log-line {
-    font-family: 'JetBrains Mono', monospace;
-    font-size: 11px;
-    color: var(--text-dim);
-    padding: 2px 0;
-    border-bottom: 1px solid rgba(255,255,255,0.03);
-}
-.session-box {
-    background: rgba(99, 102, 241, 0.1);
-    border: 1px solid var(--primary);
-    color: var(--primary);
-    font-family: 'JetBrains Mono', monospace;
-    padding: 1rem;
-    text-align: center;
-    font-size: 1.2rem;
-    margin: 1rem 0;
-}
-.label-wrap {
-    background: var(--panel-dark) !important;
-    border: 1px solid var(--border);
-    color: var(--text-main) !important;
-}
 """
-with gr.Blocks(title="Nucleus Enterprise") as demo:
-    gr.HTML(f"<style>{css}</style>")
     with gr.Column():
-        gr.HTML("""
-        <div class="header-container">
-            <h1>Nucleus Enterprise</h1>
-            <div class="sub-header">Autonomous Neural Foundry // V.4.0</div>
-        </div>
-        """)
         with gr.Tabs() as main_tabs:
-            with gr.TabItem("DEPLOYMENT", id="launch_tab"):
                 with gr.Row():
                     with gr.Column(scale=2):
                         with gr.Row():
-                            hf_token = gr.Textbox(label="HUGGINGFACE KEY", type="password", value=os.getenv("HF_TOKEN", ""))
-                            model_name = gr.Textbox(label="BASE MODEL ID", placeholder="Qwen/Qwen2.5-0.5B")
-                        repo_name = gr.Textbox(label="TARGET REPOSITORY", value="nucleus-build-v1")
-                        datasets = gr.Textbox(label="DATA STREAMS (CSV)", placeholder="Salesforce/fineweb_deduplicated", lines=4)
-                        reasoning_toggle = gr.Checkbox(label="ENABLE REASONING CORE (INJECTS LOGIC DATASETS)", value=False, elem_id="reasoning-switch")
                     with gr.Column(scale=1):
-                        gr.Markdown("### HYPERPARAMETERS")
-                        train_steps = gr.Number(label="STEPS", value=100)
-                        lr = gr.Number(label="LEARNING RATE", value=2e-4)
-                        batch = gr.Number(label="BATCH SIZE", value=1)
-                        gr.Markdown("### LORA ADAPTERS")
-                        lora_r = gr.Slider(8, 256, 32, step=8, label="RANK")
-                        lora_a = gr.Slider(8, 512, 64, step=8, label="ALPHA")
-                        lora_d = gr.Slider(0, 0.5, 0.05, label="DROPOUT")
-                with gr.Accordion("ADVANCED CONFIGURATION INJECTION", open=False):
-                    with gr.Row():
-                        conf_json = gr.Code(label="CONFIG.JSON", language="json")
-                        tok_json = gr.Code(label="TOKENIZER_CONFIG.JSON", language="json")
-                        gen_json = gr.Code(label="GENERATION_CONFIG.JSON", language="json")
-                launch_btn = gr.Button("INITIALIZE TRAINING SEQUENCE", elem_classes="primary-btn")
-                job_info_area = gr.Group(visible=False)
-                with job_info_area:
-                    new_job_id_display = gr.HTML()
-                    share_link_display = gr.Textbox(label="DIRECT MONITOR UPLINK", interactive=True)
-                    hidden_job_id = gr.Textbox(visible=False)
-            with gr.TabItem("TELEMETRY", id="monitor_tab"):
                 with gr.Row():
-                    input_job_id = gr.Textbox(label="SESSION ID", placeholder="ENTER 8-DIGIT ID")
-                    refresh_btn = gr.Button("ESTABLISH UPLINK", elem_classes="primary-btn")
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        status_display = gr.HTML(label="STATUS")
-                        created_display = gr.Textbox(label="TIMESTAMP", interactive=False)
-                        final_link = gr.Markdown(visible=False)
-                    with gr.Column(scale=2):
-                        progress_display = gr.HTML()
-                        with gr.Accordion("SYSTEM LOGS", open=False):
-                            logs_display = gr.HTML()
-    timer = gr.Timer(3000, active=False)
-    def activate_timer():
-        return gr.Timer(active=True)
-    demo.load(
-        load_from_url,
-        None,
-        [main_tabs, input_job_id]
-    )
-    launch_btn.click(
         start_training_wrapper,
-        inputs=[hf_token, model_name, repo_name, lora_r, lora_a, lora_d, train_steps, lr, batch, datasets, reasoning_toggle, conf_json, tok_json, gen_json],
-        outputs=[hidden_job_id, new_job_id_display, job_info_area]
     ).then(
-        fn=None,
-        inputs=[hidden_job_id],
-        outputs=[share_link_display],
-        js="(id) => { return window.location.protocol + '//' + window.location.host + window.location.pathname + '?job_id=' + id; }"
     ).then(
-        fn=lambda id: f"<div class='session-box'>{id}</div>",
-        inputs=[hidden_job_id],
-        outputs=[new_job_id_display]
     )
-    refresh_btn.click(
-        get_job_update,
-        inputs=[input_job_id],
-        outputs=[status_display, created_display, progress_display, logs_display, final_link]
-    ).then(
-        activate_timer,
-        None,
-        timer
-    )
-    timer.tick(
-        get_job_update,
-        inputs=[input_job_id],
-        outputs=[status_display, created_display, progress_display, logs_display, final_link]
-    )
 if __name__ == "__main__":
     demo.launch(ssr_mode=False)

 class JobStatus:
     def __init__(self):
+        self.id = str(uuid.uuid4())
+        self.status = "INITIALIZING"
         self.progress = 0.0
         self.logs = []
         self.result = None
             job = JOBS[self.job_id]
             if state.max_steps > 0:
                 prog = state.global_step / state.max_steps
+                job.progress = 0.1 + (prog * 0.8)
+                if state.global_step % 1 == 0:
                     loss = state.log_history[-1].get('loss', 'N/A') if state.log_history else '...'
+                    job.add_log(f"Training Step {state.global_step}/{state.max_steps} | Loss: {loss}")
+        return control
+    def on_save(self, args, state, control, **kwargs):
+        if self.job_id in JOBS:
+            job = JOBS[self.job_id]
+            job.add_log(f"System: Checkpoint saved at step {state.global_step}")
         return control
 @spaces.GPU(duration=300)
                           reasoning_mode, c_conf, c_tok, c_gen):
     job = JOBS[job_id]
+    job.status = "RUNNING"
+    job.add_log("System: Starting Neural Forge Engine...")
     try:
         if not hf_token.startswith("hf_"):
+            raise ValueError("Invalid HuggingFace Token")
         os.environ["WANDB_DISABLED"] = "true"
         os.environ["HF_TOKEN"] = hf_token
         os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
         login(token=hf_token)
         try:
             username = whoami()["name"]
+            job.add_log(f"Auth: Verified as {username}")
         except:
+            raise Exception("Authentication Failed")
         if not hasattr(torch, 'xla'):
             class DummyXLA:
         dataset_list = [item.strip() for item in raw_items if item.strip()]
         if reasoning_mode:
+            job.add_log("Config: Reasoning Core Active")
+            dataset_list.extend(["gsm8k", "openai/gsm8k"])
         def load_single(ds_name, cfg):
             try:
                 return None
         streams = []
+        job.set_progress(0.05, "Data: Connecting streams...")
         with ThreadPoolExecutor(max_workers=4) as executor:
             futures = []
                     streams.append(res)
         if not streams:
+            raise Exception("No valid datasets found")
+        job.set_progress(0.1, f"Data: {len(streams)} sources active.")
+        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side="left", add_eos_token=True, add_bos_token=True)
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
         def process_stream_generator():
             iterator = chain.from_iterable(streams)
             batch_buffer = []
             for item in iterator:
                 try:
+                    text = str(item.get("text", item.get("content", str(item))))
+                    if len(text) < 10: continue
                     batch_buffer.append(text)
+                    if len(batch_buffer) >= 20:
                         for txt in batch_buffer:
+                            tokens = tokenizer(txt, truncation=True, max_length=1024)
                             tokens["labels"] = tokens["input_ids"].copy()
                             yield tokens
                         batch_buffer = []
                 except:
                     continue
+        job.set_progress(0.15, "Model: Loading weights...")
         torch.cuda.empty_cache()
         gc.collect()
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
         )
         peft_config = LoraConfig(
+            r=int(lora_r),
             lora_alpha=int(lora_alpha),
+            target_modules=["q_proj", "k_proj", "v_proj", "dense", "fc1", "fc2", "o_proj"],
             bias="none",
             lora_dropout=lora_dropout,
             task_type="CAUSAL_LM"
         output_dir = f"checkpoints/{job_id}"
         training_args = TrainingArguments(
             output_dir=output_dir,
             per_device_train_batch_size=int(batch_size),
             gradient_accumulation_steps=4,
+            max_steps=int(train_steps),
             learning_rate=learning_rate,
             optim="adamw_torch",
+            logging_steps=1,
             save_strategy="steps",
+            save_steps=max(10, int(int(train_steps)/5)),
             save_total_limit=2,
             report_to="none",
             fp16=True if torch.cuda.is_available() else False,
+            disable_tqdm=True
         )
         dataset_iterable = IterableDataset.from_generator(process_stream_generator)
             callbacks=[CustomTrainerCallback(job_id)]
         )
+        job.set_progress(0.2, "Training: Phase initiated...")
         trainer.train()
+        job.set_progress(0.9, "Processing: Merging tensors...")
         del peft_model
         del original_model
         torch.cuda.empty_cache()
         gc.collect()
         base_reload = AutoModelForCausalLM.from_pretrained(
             model_name,
             return_dict=True,
+            torch_dtype=torch.float16,
             trust_remote_code=True,
             device_map="auto"
         )
                     data = json.loads(content)
                     with open(os.path.join(final_path, fname), 'w') as f:
                         json.dump(data, f, indent=2)
                 except:
+                    pass
         inject_json(c_conf, "config.json")
         inject_json(c_tok, "tokenizer_config.json")
         inject_json(c_gen, "generation_config.json")
+        job.set_progress(0.95, "Network: Uploading to HuggingFace...")
         full_repo = f"{username}/{new_repo_name}"
         create_repo(full_repo, token=hf_token, exist_ok=True)
         upload_folder(folder_path=final_path, repo_id=full_repo, token=hf_token)
         job.repo_url = f"https://huggingface.co/{full_repo}"
         job.status = "COMPLETED"
+        job.set_progress(1.0, "System: Mission Accomplished")
     except Exception as e:
         job.status = "FAILED"
         job.error = str(e)
         job.add_log(f"CRITICAL ERROR: {str(e)}")
         torch.cuda.empty_cache()
 def start_training_wrapper(hf_token, model_name, new_repo_name, lora_r, lora_alpha, lora_dropout,
                            train_steps, learning_rate, batch_size, datasets_text,
                            reasoning_mode, c_conf, c_tok, c_gen):
     if not hf_token or not model_name:
+        return None, gr.update(selected="launch_tab")
     new_job = JobStatus()
     JOBS[new_job.id] = new_job
     )
     thread.daemon = True
     thread.start()
+    return new_job.id, gr.update(selected="monitor_tab")
 def get_job_update(job_id):
+    if not job_id:
+        return "Waiting for Job ID...", "", 0, "", gr.update(visible=False)
     if job_id not in JOBS:
+        return "Job ID not found in memory.", "", 0, "", gr.update(visible=False)
     job = JOBS[job_id]
+    log_text = "\n".join(job.logs)
     result_comp = gr.update(visible=False)
     if job.status == "COMPLETED" and job.repo_url:
+        result_comp = gr.update(visible=True, value=f"✅ Model Published: {job.repo_url}")
+    return job.status, job.created_at, job.progress, log_text, result_comp
 def load_from_url(request: gr.Request):
     try:
     return gr.update(selected="launch_tab"), ""
 css = """
+@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=Inter:wght@400;700&display=swap');
+body { background: #0b0f19; color: #fff; font-family: 'Inter', sans-serif; }
+.gradio-container { border: 1px solid #2d3748; border-radius: 8px; background: #111827; }
+h1 { color: #6366f1; text-align: center; font-weight: 800; text-transform: uppercase; letter-spacing: 2px; }
+.gr-button.primary { background: #4f46e5; border: none; color: white; font-weight: bold; }
+.gr-button.primary:hover { background: #4338ca; }
+.gr-input, .gr-textarea, .gr-box { background: #1f2937 !important; border-color: #374151 !important; color: #e5e7eb !important; }
+.gr-code { background: #000 !important; color: #0f0 !important; font-family: 'IBM Plex Mono', monospace; border: 1px solid #333; }
+#status-badge { font-weight: bold; padding: 4px 8px; border-radius: 4px; background: #374151; display: inline-block; }
 """
+with gr.Blocks(title="Nucleus Enterprise", css=css, theme=gr.themes.Base()) as demo:
     with gr.Column():
+        gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
+        gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")
         with gr.Tabs() as main_tabs:
+            with gr.TabItem("🚀 LAUNCHPAD", id="launch_tab"):
                 with gr.Row():
                     with gr.Column(scale=2):
                         with gr.Row():
+                            hf_token = gr.Textbox(label="HuggingFace Token", type="password", value=os.getenv("HF_TOKEN", ""))
+                            model_name = gr.Textbox(label="Base Model", value="Qwen/Qwen2.5-0.5B")
+                        repo_name = gr.Textbox(label="Output Repository", value="nucleus-model-v1")
+                        datasets = gr.Textbox(label="Datasets (CSV)", value="Salesforce/fineweb_deduplicated", lines=3)
+                        reasoning = gr.Checkbox(label="Inject Reasoning (CoT/Math)", value=False)
                     with gr.Column(scale=1):
+                        steps = gr.Number(label="Steps", value=100)
+                        lr = gr.Number(label="Learning Rate", value=2e-4)
+                        batch = gr.Number(label="Batch Size", value=1)
+                        r = gr.Slider(8, 256, 32, step=8, label="LoRA Rank")
+                        a = gr.Slider(8, 512, 64, step=8, label="LoRA Alpha")
+                        d = gr.Slider(0, 0.5, 0.05, label="Dropout")
+                with gr.Accordion("Advanced Config", open=False):
+                    c_conf = gr.Code(label="config.json", language="json")
+                    c_tok = gr.Code(label="tokenizer_config.json", language="json")
+                    c_gen = gr.Code(label="generation_config.json", language="json")
+                btn_launch = gr.Button("INITIALIZE SYSTEM", variant="primary", size="lg")
+            with gr.TabItem("📡 TELEMETRY", id="monitor_tab"):
                 with gr.Row():
+                    job_id_input = gr.Textbox(label="Active Job ID", interactive=True)
+                    btn_refresh = gr.Button("Refresh Stream")
                 with gr.Row():
+                    status_out = gr.Textbox(label="Status", interactive=False)
+                    time_out = gr.Textbox(label="Start Time", interactive=False)
+                    progress_out = gr.Slider(label="Progress", minimum=0, maximum=1)
+                final_link = gr.Markdown(visible=False)
+                logs_out = gr.Code(label="Real-time Kernel Logs", language="shell", interactive=False, lines=15)
+    timer = gr.Timer(2000, active=False)
+    demo.load(load_from_url, None, [main_tabs, job_id_input]).then(lambda: gr.Timer(active=True), None, timer)
+    btn_launch.click(
         start_training_wrapper,
+        inputs=[hf_token, model_name, repo_name, r, a, d, steps, lr, batch, datasets, reasoning, c_conf, c_tok, c_gen],
+        outputs=[job_id_input, main_tabs]
     ).then(
+        None, [job_id_input], None,
+        js="(id) => { if (id) { const url = new URL(window.location); url.searchParams.set('job_id', id); window.history.pushState({}, '', url); } return id; }"
     ).then(
+        lambda: gr.Timer(active=True), None, timer
     )
+    btn_refresh.click(get_job_update, job_id_input, [status_out, time_out, progress_out, logs_out, final_link])
+    timer.tick(get_job_update, job_id_input, [status_out, time_out, progress_out, logs_out, final_link])
 if __name__ == "__main__":
     demo.launch(ssr_mode=False)