Upload model
Browse files- modeling.py +9 -11
modeling.py
CHANGED
|
@@ -12,25 +12,20 @@ from torch.nn.attention.flex_attention import (
|
|
| 12 |
create_block_mask,
|
| 13 |
flex_attention,
|
| 14 |
)
|
| 15 |
-
from torch.nn.utils.parametrizations import
|
| 16 |
from transformers.modeling_utils import PreTrainedModel
|
| 17 |
from transformers.utils.backbone_utils import load_backbone
|
| 18 |
|
| 19 |
from .configuration import LSPDetrConfig, STAConfig
|
| 20 |
|
| 21 |
|
| 22 |
-
def
|
| 23 |
-
if
|
| 24 |
-
# Skip validation; assume OK (unsafe but necessary for meta init)
|
| 25 |
-
return x
|
| 26 |
|
| 27 |
-
return _right_inverse(self, x)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
_right_inverse = _Orthogonal.right_inverse
|
| 31 |
patch(
|
| 32 |
-
"torch.nn.utils.parametrizations.
|
| 33 |
-
|
| 34 |
).start()
|
| 35 |
|
| 36 |
|
|
@@ -384,7 +379,7 @@ class LSPTransformer(nn.Module):
|
|
| 384 |
def init_weights(self) -> None:
|
| 385 |
prior_prob = 0.01
|
| 386 |
bias_value = -math.log((1 - prior_prob) / prior_prob)
|
| 387 |
-
self.class_head.bias
|
| 388 |
|
| 389 |
# initialize regression layers
|
| 390 |
for head in self.point_head:
|
|
@@ -530,3 +525,6 @@ class LSPDetrModel(PreTrainedModel):
|
|
| 530 |
)
|
| 531 |
|
| 532 |
return self.decode_head(tgt, ref_points, features, h, w)
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
create_block_mask,
|
| 13 |
flex_attention,
|
| 14 |
)
|
| 15 |
+
from torch.nn.utils.parametrizations import _is_orthogonal, orthogonal
|
| 16 |
from transformers.modeling_utils import PreTrainedModel
|
| 17 |
from transformers.utils.backbone_utils import load_backbone
|
| 18 |
|
| 19 |
from .configuration import LSPDetrConfig, STAConfig
|
| 20 |
|
| 21 |
|
| 22 |
+
def _meta_safe_is_orthogonal(Q, eps=None):
|
| 23 |
+
return True if Q.device == torch.device("meta") else _is_orthogonal(Q, eps=eps)
|
|
|
|
|
|
|
| 24 |
|
|
|
|
| 25 |
|
|
|
|
|
|
|
| 26 |
patch(
|
| 27 |
+
"torch.nn.utils.parametrizations._is_orthogonal",
|
| 28 |
+
_meta_safe_is_orthogonal,
|
| 29 |
).start()
|
| 30 |
|
| 31 |
|
|
|
|
| 379 |
def init_weights(self) -> None:
|
| 380 |
prior_prob = 0.01
|
| 381 |
bias_value = -math.log((1 - prior_prob) / prior_prob)
|
| 382 |
+
nn.init.constant_(self.class_head.bias, bias_value)
|
| 383 |
|
| 384 |
# initialize regression layers
|
| 385 |
for head in self.point_head:
|
|
|
|
| 525 |
)
|
| 526 |
|
| 527 |
return self.decode_head(tgt, ref_points, features, h, w)
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
LSPDetrModel.from_pretrained("RationAI/LSP-DETR", trust_remote_code=True)
|