Jingya HF Staff commited on
Commit
36361a2
1 Parent(s): cf74ca1
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ model.neuron filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,5 @@
1
  ---
2
  license: cc-by-4.0
3
  ---
 
 
 
1
  ---
2
  license: cc-by-4.0
3
  ---
4
+
5
+ This repo contains artifacts from [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2/tree/main) but in neuronx format compatible with INF2 and TRN1 devices.
all_metrics.csv ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ timestamp,run_id,name,subgraph,scope,sub_scope,value,unit,
2
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,CanonicalizeIR,0.017635822296142578,Seconds
3
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,ExpandBatchNorm,0.018537044525146484,Seconds
4
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,ResolveComplicatePredicates,0.014629840850830078,Seconds
5
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,AffinePredicateResolution,0.016721248626708984,Seconds
6
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,EliminateDivs,0.01678752899169922,Seconds
7
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TensorOpFusion,0.01868891716003418,Seconds
8
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TensorOpTransform,0.07107663154602051,Seconds
9
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LowerTensorOp,0.022431373596191406,Seconds
10
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,MemcpyElimination,1.7832224369049072,Seconds
11
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,DeadStoreElimination,1.376033067703247,Seconds
12
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SimplifySlice,0.005755901336669922,Seconds
13
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,PadElimination,0.0004241466522216797,Seconds
14
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LoopFusion,0.7519557476043701,Seconds
15
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,ValueNumbering,0.051157236099243164,Seconds
16
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TCTransform,0.03212857246398926,Seconds
17
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,CommuteConcat,0.01803421974182129,Seconds
18
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,RecognizeOpIdiom,0.02785181999206543,Seconds
19
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,MaskPropagation,0.020214080810546875,Seconds
20
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,Recompute,0.0019626617431640625,Seconds
21
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,DoNothing,9.822845458984375e-05,Seconds
22
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,MutateDataType,0.004687070846557617,Seconds
23
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,AutoCastTCInputs,0.008150339126586914,Seconds
24
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,GenericAccessSimplifier,0.02242302894592285,Seconds
25
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,Simplifier,0.8739330768585205,Seconds
26
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,Delinearization,0.0665123462677002,Seconds
27
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,DelinearIndices,0.02688312530517578,Seconds
28
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,DeadCodeElimination,0.013089179992675781,Seconds
29
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LegalizeCCOpLayout,0.005460023880004883,Seconds
30
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,InferIntrinsicOnCC,0.10268187522888184,Seconds
31
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,ResolveAccessConflict,0.050325870513916016,Seconds
32
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LocalLayoutOpt,0.043558597564697266,Seconds
33
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,GlobalLayoutOpt,0.6888353824615479,Seconds
34
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,CanonicalizeDAG,0.014380216598510742,Seconds
35
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,FlattenAxesForTiling,0.015676259994506836,Seconds
36
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SundaSizeTiling,0.9510927200317383,Seconds
37
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,InferTongaTensor,0.34070658683776855,Seconds
38
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,RewriteReplicationMatmul,0.01287698745727539,Seconds
39
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,DataLocalityOpt,2.770150661468506,Seconds
40
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TilingProfiler,0.016517162322998047,Seconds
41
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LegalizeSundaMacro,0.04187893867492676,Seconds
42
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,PerfectLoopNest,0.03031635284423828,Seconds
43
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,RewriteWeights,1.1454477310180664,Seconds
44
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,ReshapeWeights,0.008269309997558594,Seconds
45
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,FlattenMacroLoop,0.1312716007232666,Seconds
46
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SimplifyPredicates,0.46277832984924316,Seconds
47
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,InferInitValue,1.5945630073547363,Seconds
48
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TongaSimplifier,0.13767409324645996,Seconds
49
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SimplifyTensor,0.06466341018676758,Seconds
50
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LICM,0.19189667701721191,Seconds
51
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SundaISel,0.376575231552124,Seconds
52
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TongaLoopFusion,0.2005167007446289,Seconds
53
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,FactorizeBlkDims,0.14684295654296875,Seconds
54
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TongaValueNumbering,0.044389963150024414,Seconds
55
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TongaInstComb,0.5948424339294434,Seconds
56
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,VectorizeMatMult,0.004179239273071289,Seconds
57
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,VectorizeDMA,0.034056663513183594,Seconds
58
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LegalizePartitionReduce,0.007035493850708008,Seconds
59
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,PartialLoopFusion,0.2123870849609375,Seconds
60
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LowerTranspose,0.08278250694274902,Seconds
61
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LateTongaInstComb,0.3932936191558838,Seconds
62
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LowerTongaBatchId,0.004044294357299805,Seconds
63
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SplitAccGrp,0.007302999496459961,Seconds
64
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SpillPSum,0.144026517868042,Seconds
65
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TongaLICM,0.08118033409118652,Seconds
66
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,InferPSumTensor,0.11676788330078125,Seconds
67
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,WeightCoalescing,0.03023529052734375,Seconds
68
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LowerIntrinsics,0.007231950759887695,Seconds
69
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LegalizeSundaAccess,0.07146430015563965,Seconds
70
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TongaAffineLoopXform,0.021844863891601562,Seconds
71
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,VectorizeAllReduce,0.007181406021118164,Seconds
72
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,RelaxPredicates,0.012954950332641602,Seconds
73
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TensorInitialization,0.007806539535522461,Seconds
74
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,TongaSimplifyPredicates,0.03478431701660156,Seconds
75
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,ExpandISAMacro,0.08819413185119629,Seconds
76
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LegalizeType,0.01977705955505371,Seconds
77
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SimplifyTongaTensor,0.02720475196838379,Seconds
78
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,DMALocalityOpt,0.005189418792724609,Seconds
79
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,DataStreaming,0.02120208740234375,Seconds
80
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,PSUMModuloAlloc,0.6114835739135742,Seconds
81
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,LegalizeStreamShuffle,0.009519338607788086,Seconds
82
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,CoalesceCCOp,0.009008407592773438,Seconds
83
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SimpleAllReduceTiling,0.008081197738647461,Seconds
84
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,StaticProfiler,0.03094625473022461,Seconds
85
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SplitAPUnionSets,0.16402959823608398,Seconds
86
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,SundaLowerGenericAccess,0.007534503936767578,Seconds
87
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,DumpGraphAndMetadata,0.02582526206970215,Seconds
88
+ ,roberta-base-squad2-neuronx,CompilationTime,sg0000,Tensorizer,BirCodeGenLoop,0.31493163108825684,Seconds
89
+ ,roberta-base-squad2-neuronx,CompilationTime,root,Tensorizer,All,19.963484287261963,Seconds
90
+ ,roberta-base-squad2-neuronx,TPBCount,None,KELP,None,1,Count
91
+ ,roberta-base-squad2-neuronx,CompilationTime,root,Frontend,0,20.938761949539185,Seconds
92
+ ,roberta-base-squad2-neuronx,CompilationTime,all,Frontend,0,20.943183660507202,Seconds
93
+ ,roberta-base-squad2-neuronx,CompilationTime,sg00,HHChecker,0,1.2636184692382812e-05,Seconds
94
+ ,roberta-base-squad2-neuronx,CompilationTime,all,HHChecker,0,0.004761934280395508,Seconds
95
+ ,roberta-base-squad2-neuronx,CompilationTime,sg00,WalrusDriver,0,1.3777515888214111,Seconds
96
+ ,roberta-base-squad2-neuronx,CompilationTime,all,WalrusDriver,0,1.3848273754119873,Seconds
97
+ ,roberta-base-squad2-neuronx,EstimatedLowerBoundLatency,None,KELP,None,0,Milliseconds
98
+ ,roberta-base-squad2-neuronx,EstimatedUpperBoundThroughput,None,KELP,None,0,Count/Second
99
+ ,roberta-base-squad2-neuronx,InferentiaDRAMUsage,None,KELP,None,0,Bytes
100
+ ,roberta-base-squad2-neuronx,CompilationTime,all,Kelper,0,4.875930070877075,Seconds
101
+ ,roberta-base-squad2-neuronx,CompilationTime,root,Pipeline,0,27.236693382263184,Seconds
102
+ ,roberta-base-squad2-neuronx,CompilationTime,all,production_total,None,27.20870304107666,Seconds
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepset/roberta-base-squad2",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "auto_cast": null,
8
+ "auto_cast_type": null,
9
+ "bos_token_id": 0,
10
+ "classifier_dropout": null,
11
+ "dynamic_batch_size": true,
12
+ "eos_token_id": 2,
13
+ "gradient_checkpointing": false,
14
+ "hidden_act": "gelu",
15
+ "hidden_dropout_prob": 0.1,
16
+ "hidden_size": 768,
17
+ "initializer_range": 0.02,
18
+ "input_names": [
19
+ "input_ids",
20
+ "attention_mask"
21
+ ],
22
+ "intermediate_size": 3072,
23
+ "language": "english",
24
+ "layer_norm_eps": 1e-05,
25
+ "max_position_embeddings": 514,
26
+ "model_type": "roberta",
27
+ "name": "Roberta",
28
+ "neuron_batch_size": 1,
29
+ "neuron_sequence_length": 128,
30
+ "num_attention_heads": 12,
31
+ "num_hidden_layers": 12,
32
+ "output_names": [
33
+ "start_logits",
34
+ "end_logits"
35
+ ],
36
+ "pad_token_id": 1,
37
+ "position_embedding_type": "absolute",
38
+ "torchscript": true,
39
+ "transformers_version": "4.29.2",
40
+ "type_vocab_size": 1,
41
+ "use_cache": true,
42
+ "vocab_size": 50265
43
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.neuron ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf85505e1d991b69f63a776a19faa901900c8b6a1c1ac36900580d060e8419aa
3
+ size 460435000
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "clean_up_tokenization_spaces": true,
12
+ "cls_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "do_lower_case": false,
21
+ "eos_token": {
22
+ "__type": "AddedToken",
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false
28
+ },
29
+ "errors": "replace",
30
+ "full_tokenizer_file": null,
31
+ "mask_token": {
32
+ "__type": "AddedToken",
33
+ "content": "<mask>",
34
+ "lstrip": true,
35
+ "normalized": true,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ },
39
+ "model_max_length": 512,
40
+ "pad_token": {
41
+ "__type": "AddedToken",
42
+ "content": "<pad>",
43
+ "lstrip": false,
44
+ "normalized": true,
45
+ "rstrip": false,
46
+ "single_word": false
47
+ },
48
+ "sep_token": {
49
+ "__type": "AddedToken",
50
+ "content": "</s>",
51
+ "lstrip": false,
52
+ "normalized": true,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ },
56
+ "tokenizer_class": "RobertaTokenizer",
57
+ "trim_offsets": true,
58
+ "unk_token": {
59
+ "__type": "AddedToken",
60
+ "content": "<unk>",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ }
66
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff