Train_v1

Sleeping

Ksjsjjdj commited on Dec 2, 2025

Commit

b9816b5

verified ·

1 Parent(s): 61fcbc1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -56,9 +56,19 @@ def weight_quant(w):
 class BitLinear(nn.Linear):
     def forward(self, x):
-        w = weight_quant(self.weight)
-        x = activation_quant(x)
-        return F.linear(x, w, self.bias)
 def convert_to_bitnet(model, copy_weights=False):
     for name, module in model.named_children():
@@ -243,9 +253,6 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name,
         model_size = sum(t.numel() for t in original_model.parameters())
         job.add_log(f"Model Size: {model_size/1000**2:.1f}M Parameters (1.58-bit)")
-        if torch.cuda.is_available():
-            original_model = original_model.to(torch.float16).cuda()
         output_dir = f"checkpoints/{job_id}"
         data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
@@ -388,7 +395,7 @@ def load_from_url(request: gr.Request):
 with gr.Blocks(title="Nucleus Enterprise") as demo:
     with gr.Column():
         gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
-        gr.Markdown("Autonomous LLM Foundry | V9.5 BitNet Edition")
         with gr.Tabs() as main_tabs:
             with gr.TabItem("🚀 LAUNCHPAD", id="launch_tab"):

 class BitLinear(nn.Linear):
     def forward(self, x):
+        target_dtype = x.dtype
+        w = self.weight.to(target_dtype)
+        w_quant = weight_quant(w).to(target_dtype)
+        x_quant = activation_quant(x).to(target_dtype)
+        if self.bias is not None:
+            b = self.bias.to(target_dtype)
+        else:
+            b = None
+        return F.linear(x_quant, w_quant, b)
 def convert_to_bitnet(model, copy_weights=False):
     for name, module in model.named_children():
         model_size = sum(t.numel() for t in original_model.parameters())
         job.add_log(f"Model Size: {model_size/1000**2:.1f}M Parameters (1.58-bit)")
         output_dir = f"checkpoints/{job_id}"
         data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
 with gr.Blocks(title="Nucleus Enterprise") as demo:
     with gr.Column():
         gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
+        gr.Markdown("Autonomous LLM Foundry | V10.0 BitNet Edition")
         with gr.Tabs() as main_tabs:
             with gr.TabItem("🚀 LAUNCHPAD", id="launch_tab"):