oneryalcin's picture
Upload ONNX FP32 + INT8 quantized sparse encoder for financial documents
fb65c5c verified
{
"seq_len": 512,
"threads": 1,
"iters": 30,
"pytorch_fp32": {
"p50_ms": 186.2685834785225,
"p95_ms": 192.7833059511613,
"mean_ms": 187.10802356363274
},
"onnx_fp32": {
"p50_ms": 211.71175049676094,
"p95_ms": 218.8744774510269,
"mean_ms": 211.95321109941383
},
"onnx_int8": {
"p50_ms": 164.41352099354845,
"p95_ms": 166.8924230907578,
"mean_ms": 163.78664293248826
}
}