Commit ·
865e4c1
1
Parent(s): 5a34584
Upload model
Browse files- config.json +6 -2
- modeling_veld.py +8 -1
config.json
CHANGED
|
@@ -15,6 +15,7 @@
|
|
| 15 |
"T5DualDecoderDoubleHeadsModel"
|
| 16 |
],
|
| 17 |
"bad_words_ids": null,
|
|
|
|
| 18 |
"bos_token_id": null,
|
| 19 |
"chunk_size_feed_forward": 0,
|
| 20 |
"cross_attention_hidden_size": null,
|
|
@@ -73,6 +74,7 @@
|
|
| 73 |
"return_dict": true,
|
| 74 |
"return_dict_in_generate": false,
|
| 75 |
"sep_token_id": null,
|
|
|
|
| 76 |
"task_specific_params": null,
|
| 77 |
"temperature": 1.0,
|
| 78 |
"tf_legacy_loss": false,
|
|
@@ -83,7 +85,7 @@
|
|
| 83 |
"top_p": 1.0,
|
| 84 |
"torch_dtype": null,
|
| 85 |
"torchscript": false,
|
| 86 |
-
"transformers_version": "4.
|
| 87 |
"typical_p": 1.0,
|
| 88 |
"use_bfloat16": false,
|
| 89 |
"use_cache": true,
|
|
@@ -97,6 +99,7 @@
|
|
| 97 |
],
|
| 98 |
"attention_probs_dropout_prob": 0.0,
|
| 99 |
"bad_words_ids": null,
|
|
|
|
| 100 |
"bos_token_id": null,
|
| 101 |
"chunk_size_feed_forward": 0,
|
| 102 |
"cross_attention_hidden_size": null,
|
|
@@ -153,6 +156,7 @@
|
|
| 153 |
"return_dict": true,
|
| 154 |
"return_dict_in_generate": false,
|
| 155 |
"sep_token_id": null,
|
|
|
|
| 156 |
"task_specific_params": null,
|
| 157 |
"temperature": 1.0,
|
| 158 |
"tf_legacy_loss": false,
|
|
@@ -163,7 +167,7 @@
|
|
| 163 |
"top_p": 1.0,
|
| 164 |
"torch_dtype": null,
|
| 165 |
"torchscript": false,
|
| 166 |
-
"transformers_version": "4.
|
| 167 |
"typical_p": 1.0,
|
| 168 |
"use_bfloat16": false
|
| 169 |
},
|
|
|
|
| 15 |
"T5DualDecoderDoubleHeadsModel"
|
| 16 |
],
|
| 17 |
"bad_words_ids": null,
|
| 18 |
+
"begin_suppress_tokens": null,
|
| 19 |
"bos_token_id": null,
|
| 20 |
"chunk_size_feed_forward": 0,
|
| 21 |
"cross_attention_hidden_size": null,
|
|
|
|
| 74 |
"return_dict": true,
|
| 75 |
"return_dict_in_generate": false,
|
| 76 |
"sep_token_id": null,
|
| 77 |
+
"suppress_tokens": null,
|
| 78 |
"task_specific_params": null,
|
| 79 |
"temperature": 1.0,
|
| 80 |
"tf_legacy_loss": false,
|
|
|
|
| 85 |
"top_p": 1.0,
|
| 86 |
"torch_dtype": null,
|
| 87 |
"torchscript": false,
|
| 88 |
+
"transformers_version": "4.24.0",
|
| 89 |
"typical_p": 1.0,
|
| 90 |
"use_bfloat16": false,
|
| 91 |
"use_cache": true,
|
|
|
|
| 99 |
],
|
| 100 |
"attention_probs_dropout_prob": 0.0,
|
| 101 |
"bad_words_ids": null,
|
| 102 |
+
"begin_suppress_tokens": null,
|
| 103 |
"bos_token_id": null,
|
| 104 |
"chunk_size_feed_forward": 0,
|
| 105 |
"cross_attention_hidden_size": null,
|
|
|
|
| 156 |
"return_dict": true,
|
| 157 |
"return_dict_in_generate": false,
|
| 158 |
"sep_token_id": null,
|
| 159 |
+
"suppress_tokens": null,
|
| 160 |
"task_specific_params": null,
|
| 161 |
"temperature": 1.0,
|
| 162 |
"tf_legacy_loss": false,
|
|
|
|
| 167 |
"top_p": 1.0,
|
| 168 |
"torch_dtype": null,
|
| 169 |
"torchscript": false,
|
| 170 |
+
"transformers_version": "4.24.0",
|
| 171 |
"typical_p": 1.0,
|
| 172 |
"use_bfloat16": false
|
| 173 |
},
|
modeling_veld.py
CHANGED
|
@@ -1886,7 +1886,7 @@ class VELDModel(PreTrainedModel):
|
|
| 1886 |
|
| 1887 |
encoder_hidden_states = None if encoder_outputs is None else encoder_outputs[0]
|
| 1888 |
pooler_output_local = None if encoder_outputs is None else self.local_pooling(encoder_hidden_states)
|
| 1889 |
-
pooler_output_global = None if encoder_outputs is None else self.global_pooling(pooler_output_local).squeeze(1)
|
| 1890 |
|
| 1891 |
# optionally project encoder_hidden_states
|
| 1892 |
if (
|
|
@@ -2083,6 +2083,13 @@ if __name__ == "__main__":
|
|
| 2083 |
print(outputs.loss)
|
| 2084 |
print(outputs.c_loss)
|
| 2085 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2086 |
# print(outputs)
|
| 2087 |
|
| 2088 |
# outputs = model.generate(
|
|
|
|
| 1886 |
|
| 1887 |
encoder_hidden_states = None if encoder_outputs is None else encoder_outputs[0]
|
| 1888 |
pooler_output_local = None if encoder_outputs is None else self.local_pooling(encoder_hidden_states)
|
| 1889 |
+
pooler_output_global = None if encoder_outputs is None or return_contrastive_loss is None else self.global_pooling(pooler_output_local).squeeze(1)
|
| 1890 |
|
| 1891 |
# optionally project encoder_hidden_states
|
| 1892 |
if (
|
|
|
|
| 2083 |
print(outputs.loss)
|
| 2084 |
print(outputs.c_loss)
|
| 2085 |
|
| 2086 |
+
outputs = model(
|
| 2087 |
+
pixel_values=pixel_values,
|
| 2088 |
+
labels=inps.input_ids,
|
| 2089 |
+
decoder_attention_mask=inps.attention_mask)
|
| 2090 |
+
print(outputs.loss)
|
| 2091 |
+
print(outputs.c_loss)
|
| 2092 |
+
|
| 2093 |
# print(outputs)
|
| 2094 |
|
| 2095 |
# outputs = model.generate(
|