Update app.py
Browse files
app.py
CHANGED
|
@@ -56,9 +56,19 @@ def weight_quant(w):
|
|
| 56 |
|
| 57 |
class BitLinear(nn.Linear):
|
| 58 |
def forward(self, x):
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def convert_to_bitnet(model, copy_weights=False):
|
| 64 |
for name, module in model.named_children():
|
|
@@ -243,9 +253,6 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name,
|
|
| 243 |
model_size = sum(t.numel() for t in original_model.parameters())
|
| 244 |
job.add_log(f"Model Size: {model_size/1000**2:.1f}M Parameters (1.58-bit)")
|
| 245 |
|
| 246 |
-
if torch.cuda.is_available():
|
| 247 |
-
original_model = original_model.to(torch.float16).cuda()
|
| 248 |
-
|
| 249 |
output_dir = f"checkpoints/{job_id}"
|
| 250 |
|
| 251 |
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
|
@@ -388,7 +395,7 @@ def load_from_url(request: gr.Request):
|
|
| 388 |
with gr.Blocks(title="Nucleus Enterprise") as demo:
|
| 389 |
with gr.Column():
|
| 390 |
gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
|
| 391 |
-
gr.Markdown("Autonomous LLM Foundry |
|
| 392 |
|
| 393 |
with gr.Tabs() as main_tabs:
|
| 394 |
with gr.TabItem("🚀 LAUNCHPAD", id="launch_tab"):
|
|
|
|
| 56 |
|
| 57 |
class BitLinear(nn.Linear):
|
| 58 |
def forward(self, x):
|
| 59 |
+
target_dtype = x.dtype
|
| 60 |
+
|
| 61 |
+
w = self.weight.to(target_dtype)
|
| 62 |
+
w_quant = weight_quant(w).to(target_dtype)
|
| 63 |
+
|
| 64 |
+
x_quant = activation_quant(x).to(target_dtype)
|
| 65 |
+
|
| 66 |
+
if self.bias is not None:
|
| 67 |
+
b = self.bias.to(target_dtype)
|
| 68 |
+
else:
|
| 69 |
+
b = None
|
| 70 |
+
|
| 71 |
+
return F.linear(x_quant, w_quant, b)
|
| 72 |
|
| 73 |
def convert_to_bitnet(model, copy_weights=False):
|
| 74 |
for name, module in model.named_children():
|
|
|
|
| 253 |
model_size = sum(t.numel() for t in original_model.parameters())
|
| 254 |
job.add_log(f"Model Size: {model_size/1000**2:.1f}M Parameters (1.58-bit)")
|
| 255 |
|
|
|
|
|
|
|
|
|
|
| 256 |
output_dir = f"checkpoints/{job_id}"
|
| 257 |
|
| 258 |
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
|
|
|
| 395 |
with gr.Blocks(title="Nucleus Enterprise") as demo:
|
| 396 |
with gr.Column():
|
| 397 |
gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
|
| 398 |
+
gr.Markdown("Autonomous LLM Foundry | V10.0 BitNet Edition")
|
| 399 |
|
| 400 |
with gr.Tabs() as main_tabs:
|
| 401 |
with gr.TabItem("🚀 LAUNCHPAD", id="launch_tab"):
|