Upload folder using huggingface_hub
Browse files- __init__.py +2 -0
- modeling_onevision_encoder.py +3 -3
__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .configuration_onevision_encoder import OneVisionEncoderConfig
|
| 2 |
+
from .modeling_onevision_encoder import OneVisionEncoderModel
|
modeling_onevision_encoder.py
CHANGED
|
@@ -564,7 +564,7 @@ class OneVisionEncoderModel(OneVisionEncoderPreTrainedModel):
|
|
| 564 |
self,
|
| 565 |
pixel_values: torch.Tensor,
|
| 566 |
visible_indices: Optional[torch.Tensor] = None,
|
| 567 |
-
|
| 568 |
output_attentions: Optional[bool] = None,
|
| 569 |
output_hidden_states: Optional[bool] = None,
|
| 570 |
return_dict: Optional[bool] = None,
|
|
@@ -618,8 +618,8 @@ class OneVisionEncoderModel(OneVisionEncoderPreTrainedModel):
|
|
| 618 |
)
|
| 619 |
|
| 620 |
# 3. RoPE Construction
|
| 621 |
-
if
|
| 622 |
-
freqs_visible = self.video_rope.forward_from_positions(
|
| 623 |
else:
|
| 624 |
freqs_full = self.video_rope(
|
| 625 |
t=t_frames,
|
|
|
|
| 564 |
self,
|
| 565 |
pixel_values: torch.Tensor,
|
| 566 |
visible_indices: Optional[torch.Tensor] = None,
|
| 567 |
+
patch_positions: Optional[torch.Tensor] = None,
|
| 568 |
output_attentions: Optional[bool] = None,
|
| 569 |
output_hidden_states: Optional[bool] = None,
|
| 570 |
return_dict: Optional[bool] = None,
|
|
|
|
| 618 |
)
|
| 619 |
|
| 620 |
# 3. RoPE Construction
|
| 621 |
+
if patch_positions is not None:
|
| 622 |
+
freqs_visible = self.video_rope.forward_from_positions(patch_positions)
|
| 623 |
else:
|
| 624 |
freqs_full = self.video_rope(
|
| 625 |
t=t_frames,
|