badaoui HF Staff commited on
Commit
399149d
verified
1 Parent(s): 9455243

Add Neuron-optimized files for deepset/roberta-base-squad2

Browse files

馃 Neuron Export Bot: Adding AWS Neuron-optimized model files.

Original model: [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2)
Task: question-answering
Generated by: [badaoui](https://huggingface.co/badaoui)
Generated using: [Optimum Neuron Compiler Space](https://huggingface.co/spaces/optimum/neuron-export)

These files have been pre-compiled for AWS Neuron devices (Inferentia/Trainium) and should provide improved inference performance.

Files changed (2) hide show
  1. config.json +7 -13
  2. model.neuron +2 -2
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_attn_implementation_autoset": true,
3
  "architectures": [
4
  "RobertaForQuestionAnswering"
5
  ],
@@ -19,26 +18,21 @@
19
  "model_type": "roberta",
20
  "name": "Roberta",
21
  "neuron": {
22
- "auto_cast": null,
23
- "auto_cast_type": null,
24
  "compiler_type": "neuronx-cc",
25
- "compiler_version": "2.19.8089.0+8ab9f450",
26
  "disable_fallback": false,
27
  "disable_fast_relayout": false,
28
  "dynamic_batch_size": false,
 
29
  "inline_weights_to_neff": true,
30
- "input_names": [
31
- "input_ids",
32
- "attention_mask"
33
- ],
34
  "model_type": "roberta",
35
  "optlevel": "2",
36
  "output_attentions": false,
37
  "output_hidden_states": false,
38
- "output_names": [
39
- "start_logits",
40
- "end_logits"
41
- ],
42
  "static_batch_size": 1,
43
  "static_sequence_length": 128,
44
  "task": "question-answering",
@@ -50,7 +44,7 @@
50
  "position_embedding_type": "absolute",
51
  "torch_dtype": "float32",
52
  "torchscript": true,
53
- "transformers_version": "4.51.3",
54
  "type_vocab_size": 1,
55
  "use_cache": true,
56
  "vocab_size": 50265
 
1
  {
 
2
  "architectures": [
3
  "RobertaForQuestionAnswering"
4
  ],
 
18
  "model_type": "roberta",
19
  "name": "Roberta",
20
  "neuron": {
21
+ "auto_cast": "matmul",
22
+ "auto_cast_type": "bf16",
23
  "compiler_type": "neuronx-cc",
24
+ "compiler_version": "2.21.18209.0+043b1bf7",
25
  "disable_fallback": false,
26
  "disable_fast_relayout": false,
27
  "dynamic_batch_size": false,
28
+ "float_dtype": "fp32",
29
  "inline_weights_to_neff": true,
30
+ "instance_type": "inf2",
31
+ "int_dtype": "int64",
 
 
32
  "model_type": "roberta",
33
  "optlevel": "2",
34
  "output_attentions": false,
35
  "output_hidden_states": false,
 
 
 
 
36
  "static_batch_size": 1,
37
  "static_sequence_length": 128,
38
  "task": "question-answering",
 
44
  "position_embedding_type": "absolute",
45
  "torch_dtype": "float32",
46
  "torchscript": true,
47
+ "transformers_version": "4.55.4",
48
  "type_vocab_size": 1,
49
  "use_cache": true,
50
  "vocab_size": 50265
model.neuron CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4097955ee0bf7f34ed6765fb716654da572fdc436ca5ce3119b916b0febbfeb5
3
- size 461190778
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbedbfc13a8c22987b82ee88926e7b066919f766720e1f01b59be128d38176e1
3
+ size 281147642