Add Neuron-optimized files for deepset/roberta-base-squad2
Browse files馃 Neuron Export Bot: Adding AWS Neuron-optimized model files.
Original model: [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2)
Task: question-answering
Generated by: [badaoui](https://huggingface.co/badaoui)
Generated using: [Optimum Neuron Compiler Space](https://huggingface.co/spaces/optimum/neuron-export)
These files have been pre-compiled for AWS Neuron devices (Inferentia/Trainium) and should provide improved inference performance.
- config.json +7 -13
- model.neuron +2 -2
config.json
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"_attn_implementation_autoset": true,
|
| 3 |
"architectures": [
|
| 4 |
"RobertaForQuestionAnswering"
|
| 5 |
],
|
|
@@ -19,26 +18,21 @@
|
|
| 19 |
"model_type": "roberta",
|
| 20 |
"name": "Roberta",
|
| 21 |
"neuron": {
|
| 22 |
-
"auto_cast":
|
| 23 |
-
"auto_cast_type":
|
| 24 |
"compiler_type": "neuronx-cc",
|
| 25 |
-
"compiler_version": "2.
|
| 26 |
"disable_fallback": false,
|
| 27 |
"disable_fast_relayout": false,
|
| 28 |
"dynamic_batch_size": false,
|
|
|
|
| 29 |
"inline_weights_to_neff": true,
|
| 30 |
-
"
|
| 31 |
-
|
| 32 |
-
"attention_mask"
|
| 33 |
-
],
|
| 34 |
"model_type": "roberta",
|
| 35 |
"optlevel": "2",
|
| 36 |
"output_attentions": false,
|
| 37 |
"output_hidden_states": false,
|
| 38 |
-
"output_names": [
|
| 39 |
-
"start_logits",
|
| 40 |
-
"end_logits"
|
| 41 |
-
],
|
| 42 |
"static_batch_size": 1,
|
| 43 |
"static_sequence_length": 128,
|
| 44 |
"task": "question-answering",
|
|
@@ -50,7 +44,7 @@
|
|
| 50 |
"position_embedding_type": "absolute",
|
| 51 |
"torch_dtype": "float32",
|
| 52 |
"torchscript": true,
|
| 53 |
-
"transformers_version": "4.
|
| 54 |
"type_vocab_size": 1,
|
| 55 |
"use_cache": true,
|
| 56 |
"vocab_size": 50265
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"RobertaForQuestionAnswering"
|
| 4 |
],
|
|
|
|
| 18 |
"model_type": "roberta",
|
| 19 |
"name": "Roberta",
|
| 20 |
"neuron": {
|
| 21 |
+
"auto_cast": "matmul",
|
| 22 |
+
"auto_cast_type": "bf16",
|
| 23 |
"compiler_type": "neuronx-cc",
|
| 24 |
+
"compiler_version": "2.21.18209.0+043b1bf7",
|
| 25 |
"disable_fallback": false,
|
| 26 |
"disable_fast_relayout": false,
|
| 27 |
"dynamic_batch_size": false,
|
| 28 |
+
"float_dtype": "fp32",
|
| 29 |
"inline_weights_to_neff": true,
|
| 30 |
+
"instance_type": "inf2",
|
| 31 |
+
"int_dtype": "int64",
|
|
|
|
|
|
|
| 32 |
"model_type": "roberta",
|
| 33 |
"optlevel": "2",
|
| 34 |
"output_attentions": false,
|
| 35 |
"output_hidden_states": false,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
"static_batch_size": 1,
|
| 37 |
"static_sequence_length": 128,
|
| 38 |
"task": "question-answering",
|
|
|
|
| 44 |
"position_embedding_type": "absolute",
|
| 45 |
"torch_dtype": "float32",
|
| 46 |
"torchscript": true,
|
| 47 |
+
"transformers_version": "4.55.4",
|
| 48 |
"type_vocab_size": 1,
|
| 49 |
"use_cache": true,
|
| 50 |
"vocab_size": 50265
|
model.neuron
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbedbfc13a8c22987b82ee88926e7b066919f766720e1f01b59be128d38176e1
|
| 3 |
+
size 281147642
|