ssslakter
/

Slava Chaunin commited on
Commit
0d435a2
·
1 Parent(s): 478b3a3

fix model

Browse files
Files changed (2) hide show
  1. configuration_llada.py +0 -2
  2. modeling_llada.py +2 -4
configuration_llada.py CHANGED
@@ -444,8 +444,6 @@ class LLaDAConfig(PretrainedConfig):
444
  all_kwargs["use_cache"] = use_cache
445
  all_kwargs["architectures"] = all_kwargs.get("architectures", ["LLaDAModelLM"])
446
 
447
- for key, value in kwargs.items():
448
- setattr(self, key, value)
449
  super().__post_init__(**all_kwargs)
450
 
451
  @property
 
444
  all_kwargs["use_cache"] = use_cache
445
  all_kwargs["architectures"] = all_kwargs.get("architectures", ["LLaDAModelLM"])
446
 
 
 
447
  super().__post_init__(**all_kwargs)
448
 
449
  @property
modeling_llada.py CHANGED
@@ -1417,13 +1417,11 @@ class LLaDAModelLM(PreTrainedModel):
1417
  attention_bias: Optional[torch.Tensor] = None,
1418
  past_key_values: Optional[List[torch.FloatTensor]] = None,
1419
  labels: Optional[torch.LongTensor] = None,
1420
- use_cache: Optional[bool] = None,
1421
  output_attentions: Optional[bool] = None,
1422
  output_hidden_states: Optional[bool] = None,
1423
  **kwargs
1424
  ) -> Union[Tuple, CausalLMOutputWithPast]:
1425
- if use_cache is None:
1426
- use_cache = self.config.use_cache
1427
 
1428
  if output_attentions:
1429
  raise ValueError("output_attentions is not yet supported in LLaDA")
@@ -1465,7 +1463,7 @@ class LLaDAModelLM(PreTrainedModel):
1465
  model_inputs = {"input_ids": input_ids, "past_key_values": past_key_values}
1466
 
1467
  model_inputs.update(kwargs)
1468
- model_inputs["use_cache"] = kwargs.pop("use_cache", self.config.use_cache)
1469
  return model_inputs
1470
 
1471
  # TODO: these are required to make the implementation complete.
 
1417
  attention_bias: Optional[torch.Tensor] = None,
1418
  past_key_values: Optional[List[torch.FloatTensor]] = None,
1419
  labels: Optional[torch.LongTensor] = None,
1420
+ use_cache: Optional[bool] = False,
1421
  output_attentions: Optional[bool] = None,
1422
  output_hidden_states: Optional[bool] = None,
1423
  **kwargs
1424
  ) -> Union[Tuple, CausalLMOutputWithPast]:
 
 
1425
 
1426
  if output_attentions:
1427
  raise ValueError("output_attentions is not yet supported in LLaDA")
 
1463
  model_inputs = {"input_ids": input_ids, "past_key_values": past_key_values}
1464
 
1465
  model_inputs.update(kwargs)
1466
+ model_inputs["use_cache"] = kwargs.pop("use_cache", False)
1467
  return model_inputs
1468
 
1469
  # TODO: these are required to make the implementation complete.