Ksjsjjdj commited on
Commit
b9816b5
·
verified ·
1 Parent(s): 61fcbc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -56,9 +56,19 @@ def weight_quant(w):
56
 
57
  class BitLinear(nn.Linear):
58
  def forward(self, x):
59
- w = weight_quant(self.weight)
60
- x = activation_quant(x)
61
- return F.linear(x, w, self.bias)
 
 
 
 
 
 
 
 
 
 
62
 
63
  def convert_to_bitnet(model, copy_weights=False):
64
  for name, module in model.named_children():
@@ -243,9 +253,6 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name,
243
  model_size = sum(t.numel() for t in original_model.parameters())
244
  job.add_log(f"Model Size: {model_size/1000**2:.1f}M Parameters (1.58-bit)")
245
 
246
- if torch.cuda.is_available():
247
- original_model = original_model.to(torch.float16).cuda()
248
-
249
  output_dir = f"checkpoints/{job_id}"
250
 
251
  data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
@@ -388,7 +395,7 @@ def load_from_url(request: gr.Request):
388
  with gr.Blocks(title="Nucleus Enterprise") as demo:
389
  with gr.Column():
390
  gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
391
- gr.Markdown("Autonomous LLM Foundry | V9.5 BitNet Edition")
392
 
393
  with gr.Tabs() as main_tabs:
394
  with gr.TabItem("🚀 LAUNCHPAD", id="launch_tab"):
 
56
 
57
  class BitLinear(nn.Linear):
58
  def forward(self, x):
59
+ target_dtype = x.dtype
60
+
61
+ w = self.weight.to(target_dtype)
62
+ w_quant = weight_quant(w).to(target_dtype)
63
+
64
+ x_quant = activation_quant(x).to(target_dtype)
65
+
66
+ if self.bias is not None:
67
+ b = self.bias.to(target_dtype)
68
+ else:
69
+ b = None
70
+
71
+ return F.linear(x_quant, w_quant, b)
72
 
73
  def convert_to_bitnet(model, copy_weights=False):
74
  for name, module in model.named_children():
 
253
  model_size = sum(t.numel() for t in original_model.parameters())
254
  job.add_log(f"Model Size: {model_size/1000**2:.1f}M Parameters (1.58-bit)")
255
 
 
 
 
256
  output_dir = f"checkpoints/{job_id}"
257
 
258
  data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
 
395
  with gr.Blocks(title="Nucleus Enterprise") as demo:
396
  with gr.Column():
397
  gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
398
+ gr.Markdown("Autonomous LLM Foundry | V10.0 BitNet Edition")
399
 
400
  with gr.Tabs() as main_tabs:
401
  with gr.TabItem("🚀 LAUNCHPAD", id="launch_tab"):