Matryoshka Representation Learning
Paper • 2205.13147 • Published • 27
How to use shatonix/granite-embedding-math-cs with sentence-transformers:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("shatonix/granite-embedding-math-cs")
sentences = [
"You are tasked with creating a module in Elm to render an article document. The module should include functions to render the body, navigation, main content, titles, headings, subheadings, paragraphs, code blocks, images, and references. You need to define the necessary types and functions to achieve this.",
"4605",
"Context: \nAnswer: ```elm\nmodule ArticleRenderer exposing (Rendered, renderDocument, body, navigation, mainContent, title, heading, subheading, paragraph, codeBlock, imageBlock, renderReference)\n\nimport Html exposing (Html, div, nav, main, h1, h2, h3, p, img, pre, figure, a, span)\nimport Html.Attributes exposing (style, src, href, target, rel, hidden, type_)\nimport Html.Styled.Attributes as Attributes exposing (css)\nimport Document exposing (Inline, FlatInline, Key, Text, Link, Reference, Code, Image)\n\n\ntype alias Rendered msg =\n Html msg\n\n\n-- Navigation\nnavigation : Rendered msg\nnavigation =\n Html.nav\n [ css [ Css.marginBottom (rem 1) ]\n ]\n [ navLink \"Go back to overview\" Route.Index\n ]\n\n\nnavLink : String -> Route -> Rendered msg\nnavLink text route =\n viewLink\n { url = Route.toPath route\n , text = [ Html.text text ]\n , styles = [ Css.fontStyle Css.italic ]\n }\n\n\n-- Document rendering\nrenderDocument : Article -> Rendered msg\nrenderDocument article =\n article.document\n |> List.map renderBlock\n |> document\n\n\nrenderBlock : Block -> Rendered msg\nrenderBlock block =\n case block of\n Title content ->\n title content\n\n Heading contents ->\n heading contents\n\n Subheading contents ->\n subheading contents\n\n Paragraph contents ->\n paragraph [] contents\n\n CodeBlock code ->\n codeBlock code\n\n ImageBlock image ->\n imageBlock image\n\n\n-- Landmarks\nbody : List (Rendered msg) -> Rendered msg\nbody content =\n Html.div\n [ css\n [ Css.padding (rem 1)\n , Css.maxWidth (rem 48)\n , Css.margin Css.auto\n ]\n ]\n content\n\n\nmainContent : List (Rendered msg) -> Rendered msg\nmainContent contents =\n Html.main_ [] contents\n\n\n-- Text\ntitle : String -> Rendered msg\ntitle text =\n Html.h1\n [ css\n [ headingStyle\n , Css.fontSize (rem 1.5)\n ]\n ]\n [ Html.text text ]\n\n\nheading : List (Inline Path) -> Rendered msg\nheading contents =\n Html.h2\n [ css\n [ headingStyle\n , Css.fontSize (rem 1.25)\n ]\n ]\n (List.map renderInline contents)\n\n\nsubheading : List (Inline Path) -> Rendered msg\nsubheading contents =\n Html.h3\n [ css\n [ headingStyle\n , Css.fontSize (rem 1.1)\n ]\n ]\n (List.map renderInline contents)\n\n\nparagraph : List Css.Style -> List (Inline Path) -> Rendered msg\nparagraph styles content =\n Html.p\n [ css (paragraphStyle :: styles)\n ]\n (List.map renderInline content)\n\n\ncodeBlock : Document.Code -> Rendered msg\ncodeBlock code =\n Html.pre [ css [ codeBackgroundStyle, Css.padding (em 1) ] ]\n [ Html.code [ css [ codeFontStyle ] ] [ Html.text code.src ] ]\n\n\nimageBlock : Document.Image Path -> Rendered msg\nimageBlock image =\n Html.figure\n [ css\n [ Css.margin2 paragraphSpacing zero\n , framedStyle\n ]\n ]\n [ Html.a\n [ Attributes.href <| Path.toAbsolute image.fallbackSource.source.src\n , Attributes.target \"_blank\"\n , Attributes.rel \"noopener\"\n ]\n [ Html.img\n [ Attributes.src (Path.toAbsolute image.fallbackSource.source.src)\n , Attributes.alt image.alt\n , Attributes.width image.fallbackSource.source.width\n , Attributes.height image.fallbackSource.source.height\n , css\n [ Css.display Css.block\n , Css.maxWidth (pct 100)\n , Css.width (pct 100)\n , Css.height Css.auto\n ]\n ]\n ]\n , Html.figcaption\n [ css\n [ Css.boxSizing Css.borderBox\n , Css.width (pct 100)\n , Css.padding (rem 0.5)\n ]\n ]\n [ Html.text image.caption ]\n ]\n\n\n-- Text styles\nheadingStyle : Css.Style\nheadingStyle =\n Css.batch\n [ headingFontFamily\n , Css.fontWeight Css.bold\n , Css.margin zero\n , Css.lineHeight (num 1.2)\n ]\n\n\nparagraphStyle : Css.Style\nparagraphStyle =\n Css.batch\n [ paragraphFontStyle\n , Css.margin zero\n ]\n\n\ncodeFontStyle : Css.Style\ncodeFontStyle =\n Css.batch\n [ Css.whiteSpace Css.preWrap\n , Css.fontFamilies [ \"Source Code Pro\", \"monospace\" ]\n ]\n\n\ncodeBackgroundStyle : Css.Style\ncodeBackgroundStyle =\n Css.batch\n [ Css.borderRadius (em 0.2)\n , Css.backgroundColor (Css.hsla 0 0 0.5 0.15)\n , Css.padding2 (em 0.05) (em 0.2)\n , Css.fontSize (em 0.95)\n ]\n\n\nframedStyle : Css.Style\nframedStyle =\n let\n spacing =\n 0.5\n in\n Css.batch\n [ framedBorderStyle\n , Css.borderRadius (rem spacing)\n ]\n\n\nframedBorderStyle : Css.Style\nframedBorderStyle =\n Css.border3 (px 1) Css.solid borderColor\n\n\nparagraphSpacing : Css.Rem\nparagraphSpacing =\n rem 1\n\n\nborderColor : Css.Color\nborderColor =\n Css.hsla 0 0 0 0.25\n\n\nparagraphFontStyle : Css.Style\nparagraphFontStyle =\n Css.batch\n [ Css.lineHeight (num 1.35)\n ]\n\n\n-- Helper functions\nrenderInline : Inline Path -> Rendered msg\nrenderInline inline =\n case inline of\n TextInline text ->\n renderText [] text\n\n LinkInline link ->\n renderLink link\n\n ReferenceInline reference ->\n renderReference reference\n\n CodeInline code ->\n renderCode code\n\n KeysInline keys ->\n renderKeys keys\n\n\nrenderText : List Css.Style -> Text -> Rendered msg\nrenderText extraStyles text =\n let\n italic =\n if text.style.emphasized then\n [ Css.fontStyle Css.italic ]\n\n else\n []\n\n styles =\n italic ++ extraStyles\n in\n if text.style.emphasized then\n Html.em [ css styles ] [ Html.text text.content ]\n\n else if List.isEmpty styles then\n Html.text text.content\n\n else\n Html.span [ css styles ] [ Html.text text.content ]\n\n\nrenderLink : Link -> Rendered msg\nrenderLink link =\n viewLink\n { text = List.map (renderText []) link.text\n , url = Url.toString link.url\n , styles = []\n }\n\n\nviewLink : { text : List (Rendered msg), url : String, styles : List Css.Style } -> Rendered msg\nviewLink { text, url, styles } =\n let\n unvisitedColor =\n Css.rgb 22 22 162\n\n visitedColor =\n Css.inherit\n in\n Html.a\n [ Attributes.href url\n , css\n ([ Css.color unvisitedColor\n , Css.visited\n [ Css.color visitedColor\n ]\n , hover\n [ Css.textDecorationStyle Css.dotted\n ]\n ]\n ++ styles\n )\n ]\n text\n\n\nrenderReference : Reference Path -> Rendered msg\nrenderReference reference =\n viewLink\n { text =\n List.map\n (renderText\n [ Css.fontWeight Css.bold\n , Css.fontSize (em 0.8)\n ]\n )\n reference.text\n , url = Path.toAbsolute reference.path\n , styles = []\n }\n\n\nrenderCode : Code -> Rendered msg\nrenderCode code =\n Html.code\n [ css [ codeFontStyle, codeBackgroundStyle ]\n ]\n [ Html.text code.src ]\n\n\nrenderKeys : Keys -> Rendered msg\nrenderKeys keys =\n case keys of\n ( first, [] ) ->\n renderKey first\n\n ( first, rest ) ->\n Html.kbd [ css [ Css.whiteSpace Css.preWrap ] ]\n (List.map renderKey (first :: rest)\n |> List.intersperse (Html.text \"+\\u{200B}\")\n )\n\n\nrenderKey : Key -> Rendered msg\nrenderKey key =\n let\n keyBorderColor =\n Css.hsl 0 0 0.75\n\n keyText =\n case key of\n Letter l ->\n String.fromChar l\n\n Ctrl ->\n \"Ctrl\"\n\n Shift ->\n \"Shift\\u{00A0}⇧\"\n\n Enter ->\n \"Enter\\u{00A0}↵\"\n\n Tab ->\n \"Tab\\u{00A0}↹\"\n\n Up ->\n \"↑\\u{00A0}up\"\n\n Down ->\n \"↓\\u{00A0}down\"\n in\n Html.kbd\n [ css\n [ codeFontStyle\n , Css.fontSize (em 0.8)\n , Css.padding2 (em 0) (em 0.1)\n , Css.border3 (px 1) Css.solid keyBorderColor\n , Css.borderRadius (em 0.2)\n , Css.boxShadow5 Css.inset zero (px -1) zero keyBorderColor\n , Css.verticalAlign Css.center\n , Css.whiteSpace Css.pre\n ]\n ]\n [ Html.text keyText ]\n```",
"Context: \nAnswer: 40"
]
embeddings = model.encode(sentences)
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [4, 4]This is a sentence-transformers model finetuned from ibm-granite/granite-embedding-english-r2. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("shatonix/granite-embedding-math-cs")
# Run inference
sentences = [
'Calculate $(-1)^{47} + 2^{(3^3+4^2-6^2)}$.',
'Context: \nAnswer: 127',
'4750',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities)
# tensor([[ 1.0000, 0.5650, -0.0154],
# [ 0.5650, 1.0000, -0.0246],
# [-0.0154, -0.0246, 1.0000]])
dim_768InformationRetrievalEvaluator with these parameters:{
"truncate_dim": 768
}
| Metric | Value |
|---|---|
| cosine_accuracy@1 | 0.626 |
| cosine_accuracy@3 | 0.706 |
| cosine_accuracy@5 | 0.726 |
| cosine_accuracy@10 | 0.758 |
| cosine_precision@1 | 0.626 |
| cosine_precision@3 | 0.2353 |
| cosine_precision@5 | 0.1452 |
| cosine_precision@10 | 0.0758 |
| cosine_recall@1 | 0.626 |
| cosine_recall@3 | 0.706 |
| cosine_recall@5 | 0.726 |
| cosine_recall@10 | 0.758 |
| cosine_ndcg@10 | 0.6916 |
| cosine_mrr@10 | 0.6704 |
| cosine_map@100 | 0.6751 |
dim_512InformationRetrievalEvaluator with these parameters:{
"truncate_dim": 512
}
| Metric | Value |
|---|---|
| cosine_accuracy@1 | 0.636 |
| cosine_accuracy@3 | 0.7 |
| cosine_accuracy@5 | 0.724 |
| cosine_accuracy@10 | 0.758 |
| cosine_precision@1 | 0.636 |
| cosine_precision@3 | 0.2333 |
| cosine_precision@5 | 0.1448 |
| cosine_precision@10 | 0.0758 |
| cosine_recall@1 | 0.636 |
| cosine_recall@3 | 0.7 |
| cosine_recall@5 | 0.724 |
| cosine_recall@10 | 0.758 |
| cosine_ndcg@10 | 0.694 |
| cosine_mrr@10 | 0.6739 |
| cosine_map@100 | 0.6785 |
dim_256InformationRetrievalEvaluator with these parameters:{
"truncate_dim": 256
}
| Metric | Value |
|---|---|
| cosine_accuracy@1 | 0.638 |
| cosine_accuracy@3 | 0.698 |
| cosine_accuracy@5 | 0.712 |
| cosine_accuracy@10 | 0.75 |
| cosine_precision@1 | 0.638 |
| cosine_precision@3 | 0.2327 |
| cosine_precision@5 | 0.1424 |
| cosine_precision@10 | 0.075 |
| cosine_recall@1 | 0.638 |
| cosine_recall@3 | 0.698 |
| cosine_recall@5 | 0.712 |
| cosine_recall@10 | 0.75 |
| cosine_ndcg@10 | 0.6915 |
| cosine_mrr@10 | 0.6731 |
| cosine_map@100 | 0.6781 |
dim_128InformationRetrievalEvaluator with these parameters:{
"truncate_dim": 128
}
| Metric | Value |
|---|---|
| cosine_accuracy@1 | 0.636 |
| cosine_accuracy@3 | 0.698 |
| cosine_accuracy@5 | 0.716 |
| cosine_accuracy@10 | 0.74 |
| cosine_precision@1 | 0.636 |
| cosine_precision@3 | 0.2327 |
| cosine_precision@5 | 0.1432 |
| cosine_precision@10 | 0.074 |
| cosine_recall@1 | 0.636 |
| cosine_recall@3 | 0.698 |
| cosine_recall@5 | 0.716 |
| cosine_recall@10 | 0.74 |
| cosine_ndcg@10 | 0.6863 |
| cosine_mrr@10 | 0.6693 |
| cosine_map@100 | 0.6739 |
dim_64InformationRetrievalEvaluator with these parameters:{
"truncate_dim": 64
}
| Metric | Value |
|---|---|
| cosine_accuracy@1 | 0.628 |
| cosine_accuracy@3 | 0.692 |
| cosine_accuracy@5 | 0.714 |
| cosine_accuracy@10 | 0.734 |
| cosine_precision@1 | 0.628 |
| cosine_precision@3 | 0.2307 |
| cosine_precision@5 | 0.1428 |
| cosine_precision@10 | 0.0734 |
| cosine_recall@1 | 0.628 |
| cosine_recall@3 | 0.692 |
| cosine_recall@5 | 0.714 |
| cosine_recall@10 | 0.734 |
| cosine_ndcg@10 | 0.6806 |
| cosine_mrr@10 | 0.6635 |
| cosine_map@100 | 0.6681 |
anchor, positive, and id| anchor | positive | id | |
|---|---|---|---|
| type | string | string | string |
| details |
|
|
|
| anchor | positive | id |
|---|---|---|
Stella’s antique shop has 3 dolls, 2 clocks and 5 glasses for sale. She sells the dolls for $5 each. The clocks are priced at $15 each. The glasses are priced at $4 each. If she spent $40 to buy everything and she sells all of her merchandise, how much profit will she make? |
Context: |
3430 |
You are tasked with creating a Ruby program that defines a service for creating a project in a Continuous Integration (CI) system. The service should be able to execute with valid parameters and handle specific scenarios. |
Context: |
656 |
Why is the Insertion Sort algorithm considered optimal for nearly sorted datasets, and how does its time complexity compare to other quadratic sorting algorithms? |
Context: |
1305 |
MatryoshkaLoss with these parameters:{
"loss": "MultipleNegativesRankingLoss",
"matryoshka_dims": [
768,
512,
256,
128,
64
],
"matryoshka_weights": [
1,
1,
1,
1,
1
],
"n_dims_per_step": -1
}
eval_strategy: epochper_device_train_batch_size: 64per_device_eval_batch_size: 64gradient_accumulation_steps: 2num_train_epochs: 10lr_scheduler_type: cosinewarmup_ratio: 0.1bf16: Truetf32: Truedataloader_num_workers: 4load_best_model_at_end: Truebatch_sampler: no_duplicatesoverwrite_output_dir: Falsedo_predict: Falseeval_strategy: epochprediction_loss_only: Trueper_device_train_batch_size: 64per_device_eval_batch_size: 64per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 2eval_accumulation_steps: Nonetorch_empty_cache_steps: Nonelearning_rate: 5e-05weight_decay: 0.0adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1.0num_train_epochs: 10max_steps: -1lr_scheduler_type: cosinelr_scheduler_kwargs: {}warmup_ratio: 0.1warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falsebf16: Truefp16: Falsefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Truelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 4dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Trueignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}parallelism_config: Nonedeepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torch_fusedoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthproject: huggingfacetrackio_space_id: trackioddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Nonehub_always_push: Falsehub_revision: Nonegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseinclude_for_metrics: []eval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters: auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: noneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falseeval_on_start: Falseuse_liger_kernel: Falseliger_kernel_config: Noneeval_use_gather_object: Falseaverage_tokens_across_devices: Trueprompts: Nonebatch_sampler: no_duplicatesmulti_dataset_batch_sampler: proportionalrouter_mapping: {}learning_rate_mapping: {}| Epoch | Step | Training Loss | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
|---|---|---|---|---|---|---|---|
| -1 | -1 | - | 0.6227 | 0.6213 | 0.6163 | 0.6036 | 0.5905 |
| 0.2817 | 10 | 10.3671 | - | - | - | - | - |
| 0.5634 | 20 | 8.1302 | - | - | - | - | - |
| 0.8451 | 30 | 6.6781 | - | - | - | - | - |
| 1.0 | 36 | - | 0.6371 | 0.6373 | 0.6368 | 0.6384 | 0.6297 |
| 1.1127 | 40 | 5.6041 | - | - | - | - | - |
| 1.3944 | 50 | 5.3589 | - | - | - | - | - |
| 1.6761 | 60 | 5.2615 | - | - | - | - | - |
| 1.9577 | 70 | 5.1322 | - | - | - | - | - |
| 2.0 | 72 | - | 0.6584 | 0.6599 | 0.6567 | 0.6590 | 0.6588 |
| 2.2254 | 80 | 4.2222 | - | - | - | - | - |
| 2.5070 | 90 | 3.6282 | - | - | - | - | - |
| 2.7887 | 100 | 3.5652 | - | - | - | - | - |
| 3.0 | 108 | - | 0.6679 | 0.6724 | 0.6750 | 0.6699 | 0.6645 |
| 3.0563 | 110 | 3.1212 | - | - | - | - | - |
| 3.3380 | 120 | 1.8016 | - | - | - | - | - |
| 3.6197 | 130 | 1.8941 | - | - | - | - | - |
| 3.9014 | 140 | 1.8576 | - | - | - | - | - |
| 4.0 | 144 | - | 0.6900 | 0.6923 | 0.6937 | 0.6863 | 0.6771 |
| 4.1690 | 150 | 1.0872 | - | - | - | - | - |
| 4.4507 | 160 | 0.7482 | - | - | - | - | - |
| 4.7324 | 170 | 0.7307 | - | - | - | - | - |
| 5.0 | 180 | 0.8322 | 0.6909 | 0.6988 | 0.6947 | 0.6873 | 0.6800 |
| 5.2817 | 190 | 0.329 | - | - | - | - | - |
| 5.5634 | 200 | 0.3246 | - | - | - | - | - |
| 5.8451 | 210 | 0.274 | - | - | - | - | - |
| 6.0 | 216 | - | 0.6898 | 0.6929 | 0.6904 | 0.6900 | 0.6801 |
| 6.1127 | 220 | 0.2161 | - | - | - | - | - |
| 6.3944 | 230 | 0.1178 | - | - | - | - | - |
| 6.6761 | 240 | 0.1418 | - | - | - | - | - |
| 6.9577 | 250 | 0.1319 | - | - | - | - | - |
| 7.0 | 252 | - | 0.6920 | 0.6890 | 0.6910 | 0.6880 | 0.6789 |
| 7.2254 | 260 | 0.0979 | - | - | - | - | - |
| 7.5070 | 270 | 0.0653 | - | - | - | - | - |
| 7.7887 | 280 | 0.0852 | - | - | - | - | - |
| 8.0 | 288 | - | 0.6934 | 0.69 | 0.6934 | 0.6877 | 0.6825 |
| 8.0563 | 290 | 0.08 | - | - | - | - | - |
| 8.3380 | 300 | 0.0526 | - | - | - | - | - |
| 8.6197 | 310 | 0.066 | - | - | - | - | - |
| 8.9014 | 320 | 0.0549 | - | - | - | - | - |
| 9.0 | 324 | - | 0.6911 | 0.6929 | 0.6905 | 0.6858 | 0.6802 |
| 9.1690 | 330 | 0.0384 | - | - | - | - | - |
| 9.4507 | 340 | 0.0523 | - | - | - | - | - |
| 9.7324 | 350 | 0.0333 | - | - | - | - | - |
| 10.0 | 360 | 0.0488 | 0.6916 | 0.6940 | 0.6915 | 0.6863 | 0.6806 |
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
@misc{kusupati2024matryoshka,
title={Matryoshka Representation Learning},
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
year={2024},
eprint={2205.13147},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{henderson2017efficient,
title={Efficient Natural Language Response Suggestion for Smart Reply},
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
year={2017},
eprint={1705.00652},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
Base model
ibm-granite/granite-embedding-english-r2