allenai
/

MolmoPoint-GUI-8B

Image-Text-to-Text

Model card Files Files and versions

chrisc36 commited on Mar 17

Commit

448b75b

·

verified ·

1 Parent(s): c57624d

Update modeling_molmo_point.py

Files changed (1) hide show

modeling_molmo_point.py +1 -1

modeling_molmo_point.py CHANGED Viewed

@@ -1497,7 +1497,7 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
                 assert last_predicted_patch_id is not None, "Patch should always be generated before a subpatch"
                 for_patches = (last_predicted_patch_id.view(batch_size) + image_token_offset)[input_subpatch_ids.view(batch_size) >= 0]
                 vit_features_to_embed = vit_features_flat[for_patches, input_subpatch_ids]
-                x.view(-1, dim)[is_subpatch.view(-1)] = self.build_vit_embedding(vit_features_to_embed).to(device=device)
         # shape: (batch_size, seq_len, d_model)
         x = self.transformer.emb_drop(x)  # type: ignore

                 assert last_predicted_patch_id is not None, "Patch should always be generated before a subpatch"
                 for_patches = (last_predicted_patch_id.view(batch_size) + image_token_offset)[input_subpatch_ids.view(batch_size) >= 0]
                 vit_features_to_embed = vit_features_flat[for_patches, input_subpatch_ids]
+                x.view(-1, dim)[is_subpatch.view(-1)] = self.build_vit_embedding(vit_features_to_embed).to(device=device, dtype=x.dtype)
         # shape: (batch_size, seq_len, d_model)
         x = self.transformer.emb_drop(x)  # type: ignore