fix: compatibility patches for newer transformers (4.50+ / 5.x)

#44
configuration_florence2.py CHANGED
@@ -262,7 +262,7 @@ class Florence2LanguageConfig(PretrainedConfig):
262
  )
263
 
264
  # ensure backward compatibility for BART CNN models
265
- if self.forced_bos_token_id is None and kwargs.get("force_bos_token_to_be_generated", False):
266
  self.forced_bos_token_id = self.bos_token_id
267
  warnings.warn(
268
  f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. "
 
262
  )
263
 
264
  # ensure backward compatibility for BART CNN models
265
+ if getattr(self, "forced_bos_token_id", None) is None and kwargs.get("force_bos_token_to_be_generated", False):
266
  self.forced_bos_token_id = self.bos_token_id
267
  warnings.warn(
268
  f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. "
modeling_florence2.py CHANGED
@@ -554,7 +554,13 @@ class DaViT(nn.Module):
554
  assert self.num_stages == len(self.num_heads) == len(self.num_groups)
555
 
556
  num_stages = len(embed_dims)
557
- dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)*2)]
 
 
 
 
 
 
558
 
559
  depth_offset = 0
560
  convs = []
@@ -2331,21 +2337,8 @@ class Florence2PreTrainedModel(PreTrainedModel):
2331
  supports_gradient_checkpointing = True
2332
  _skip_keys_device_placement = "past_key_values"
2333
 
2334
- @property
2335
- def _supports_flash_attn_2(self):
2336
- """
2337
- Retrieve language_model's attribute to check whether the model supports
2338
- Flash Attention 2 or not.
2339
- """
2340
- return self.language_model._supports_flash_attn_2
2341
-
2342
- @property
2343
- def _supports_sdpa(self):
2344
- """
2345
- Retrieve language_model's attribute to check whether the model supports
2346
- SDPA or not.
2347
- """
2348
- return self.language_model._supports_sdpa
2349
 
2350
 
2351
  FLORENCE2_INPUTS_DOCSTRING = r"""
 
554
  assert self.num_stages == len(self.num_heads) == len(self.num_groups)
555
 
556
  num_stages = len(embed_dims)
557
+ _n_dpr = sum(depths) * 2
558
+ if _n_dpr == 0:
559
+ dpr = []
560
+ elif _n_dpr == 1:
561
+ dpr = [0.0]
562
+ else:
563
+ dpr = [drop_path_rate * i / (_n_dpr - 1) for i in range(_n_dpr)]
564
 
565
  depth_offset = 0
566
  convs = []
 
2337
  supports_gradient_checkpointing = True
2338
  _skip_keys_device_placement = "past_key_values"
2339
 
2340
+ _supports_flash_attn_2 = False
2341
+ _supports_sdpa = True
 
 
 
 
 
 
 
 
 
 
 
 
 
2342
 
2343
 
2344
  FLORENCE2_INPUTS_DOCSTRING = r"""
processing_florence2.py CHANGED
@@ -84,7 +84,7 @@ class Florence2Processor(ProcessorMixin):
84
 
85
  tokens_to_add = {
86
  'additional_special_tokens': \
87
- tokenizer.additional_special_tokens + \
88
  ['<od>', '</od>', '<ocr>', '</ocr>'] + \
89
  [f'<loc_{x}>' for x in range(1000)] + \
90
  ['<cap>', '</cap>', '<ncap>', '</ncap>','<dcap>', '</dcap>', '<grounding>', '</grounding>', '<seg>', '</seg>', '<sep>', '<region_cap>', '</region_cap>', '<region_to_desciption>', '</region_to_desciption>', '<proposal>', '</proposal>', '<poly>', '</poly>', '<and>']
 
84
 
85
  tokens_to_add = {
86
  'additional_special_tokens': \
87
+ list(getattr(tokenizer, 'additional_special_tokens', []) or []) + \
88
  ['<od>', '</od>', '<ocr>', '</ocr>'] + \
89
  [f'<loc_{x}>' for x in range(1000)] + \
90
  ['<cap>', '</cap>', '<ncap>', '</ncap>','<dcap>', '</dcap>', '<grounding>', '</grounding>', '<seg>', '</seg>', '<sep>', '<region_cap>', '</region_cap>', '<region_to_desciption>', '</region_to_desciption>', '<proposal>', '</proposal>', '<poly>', '</poly>', '<and>']