oneryalcin's picture
Upload ONNX FP32 + INT8 quantized sparse encoder for financial documents
fb65c5c verified
{
"base_model": "opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte",
"finetuned_from": "oneryalcin/fin-sparse-encoder-doc-v1",
"max_seq_length": 512,
"vocab_size": 30522,
"activation": "log1p_relu",
"pooling": "max",
"splade_postprocess": "log(1 + log(1 + ReLU(max_over_seq(logits * attention_mask))))",
"files": {
"model.onnx": "Full precision FP32 ONNX model",
"model_quantized.onnx": "Dynamic INT8 quantized ONNX model (recommended for CPU)"
}
}