Adding modeling.py file
Browse files- modeling.py +3 -3
modeling.py
CHANGED
|
@@ -107,7 +107,7 @@ class MoLM(PreTrainedModel):
|
|
| 107 |
# Expand the expert_mask to match the logits shape (batch_size, 1, 1)
|
| 108 |
expert_mask_expanded = expert_mask.unsqueeze(-1).unsqueeze(-1).float()
|
| 109 |
|
| 110 |
-
expert_output = expert(input_ids, targets=targets, date=date,
|
| 111 |
|
| 112 |
logits = expert_output["logits"]
|
| 113 |
loss_to_log = expert_output["loss_to_log"]
|
|
@@ -234,10 +234,10 @@ class MoLM(PreTrainedModel):
|
|
| 234 |
def generate_from_string(self, in_str, max_new_tokens, date=None, temperature=1.0, top_k=None):
|
| 235 |
idx = (
|
| 236 |
torch.tensor(
|
| 237 |
-
self.tokenizer.encode(in_str
|
| 238 |
)
|
| 239 |
.view(1, -1)
|
| 240 |
-
.to(self.
|
| 241 |
)
|
| 242 |
out_idx = (
|
| 243 |
self.generate(idx, max_new_tokens, date, temperature, top_k)
|
|
|
|
| 107 |
# Expand the expert_mask to match the logits shape (batch_size, 1, 1)
|
| 108 |
expert_mask_expanded = expert_mask.unsqueeze(-1).unsqueeze(-1).float()
|
| 109 |
|
| 110 |
+
expert_output = expert(input_ids, targets=targets, date=date, **kwargs)
|
| 111 |
|
| 112 |
logits = expert_output["logits"]
|
| 113 |
loss_to_log = expert_output["loss_to_log"]
|
|
|
|
| 234 |
def generate_from_string(self, in_str, max_new_tokens, date=None, temperature=1.0, top_k=None):
|
| 235 |
idx = (
|
| 236 |
torch.tensor(
|
| 237 |
+
self.tokenizer.encode(in_str)
|
| 238 |
)
|
| 239 |
.view(1, -1)
|
| 240 |
+
.to(next(self.parameters()).device)
|
| 241 |
)
|
| 242 |
out_idx = (
|
| 243 |
self.generate(idx, max_new_tokens, date, temperature, top_k)
|