Shubha Ruidas commited on
Commit
9594580
·
verified ·
1 Parent(s): 659463a

Add new SentenceTransformer model

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,922 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - dense
10
+ - generated_from_trainer
11
+ - dataset_size:900
12
+ - loss:MatryoshkaLoss
13
+ - loss:MultipleNegativesRankingLoss
14
+ base_model: microsoft/codebert-base
15
+ widget:
16
+ - source_sentence: Best practices for _invocation_params
17
+ sentences:
18
+ - "def after_model(self, state: StateT, runtime: Runtime[ContextT]) -> dict[str,\
19
+ \ Any] | None:\n \"\"\"Logic to run after the model is called.\n\n \
20
+ \ Args:\n state: The current agent state.\n runtime:\
21
+ \ The runtime context.\n\n Returns:\n Agent state updates to\
22
+ \ apply after model call.\n \"\"\""
23
+ - "def _get_trace_callbacks(\n project_name: str | None = None,\n example_id:\
24
+ \ str | UUID | None = None,\n callback_manager: CallbackManager | AsyncCallbackManager\
25
+ \ | None = None,\n) -> Callbacks:\n if _tracing_v2_is_enabled():\n project_name_\
26
+ \ = project_name or _get_tracer_project()\n tracer = tracing_v2_callback_var.get()\
27
+ \ or LangChainTracer(\n project_name=project_name_,\n example_id=example_id,\n\
28
+ \ )\n if callback_manager is None:\n cb = cast(\"Callbacks\"\
29
+ , [tracer])\n else:\n if not any(\n isinstance(handler,\
30
+ \ LangChainTracer)\n for handler in callback_manager.handlers\n\
31
+ \ ):\n callback_manager.add_handler(tracer)\n \
32
+ \ # If it already has a LangChainTracer, we don't need to add another\
33
+ \ one.\n # this would likely mess up the trace hierarchy.\n \
34
+ \ cb = callback_manager\n else:\n cb = None\n return cb"
35
+ - "def _invocation_params(self) -> dict[str, Any]:\n params: dict = {\"model\"\
36
+ : self.model, **self.model_kwargs}\n if self.dimensions is not None:\n\
37
+ \ params[\"dimensions\"] = self.dimensions\n return params"
38
+ - source_sentence: How does _approximate_token_counter work in Python?
39
+ sentences:
40
+ - "def _approximate_token_counter(messages: Sequence[BaseMessage]) -> int:\n \
41
+ \ \"\"\"Wrapper for `count_tokens_approximately` that matches expected signature.\"\
42
+ \"\"\n return count_tokens_approximately(messages)"
43
+ - "def remove_request_headers(request: Any) -> Any:\n for k in request.headers:\n\
44
+ \ request.headers[k] = \"**REDACTED**\"\n return request"
45
+ - "def get_format_instructions(self) -> str:\n \"\"\"Returns formatting instructions\
46
+ \ for the given output parser.\"\"\"\n return self.format_instructions"
47
+ - source_sentence: How to implement _create_thread_and_run?
48
+ sentences:
49
+ - "async def on_retriever_end(\n self, documents: Sequence[Document], **kwargs:\
50
+ \ Any\n ) -> None:\n \"\"\"Run when the retriever ends running.\n\n\
51
+ \ Args:\n documents: The retrieved documents.\n **kwargs:\
52
+ \ Additional keyword arguments.\n\n \"\"\"\n if not self.handlers:\n\
53
+ \ return\n await ahandle_event(\n self.handlers,\n\
54
+ \ \"on_retriever_end\",\n \"ignore_retriever\",\n \
55
+ \ documents,\n run_id=self.run_id,\n parent_run_id=self.parent_run_id,\n\
56
+ \ tags=self.tags,\n **kwargs,\n )"
57
+ - "def _create_thread_and_run(self, input_dict: dict, thread: dict) -> Any:\n \
58
+ \ params = {\n k: v\n for k, v in input_dict.items()\n\
59
+ \ if k\n in (\n \"instructions\",\n \
60
+ \ \"model\",\n \"tools\",\n \"parallel_tool_calls\"\
61
+ ,\n \"top_p\",\n \"temperature\",\n \
62
+ \ \"max_completion_tokens\",\n \"max_prompt_tokens\",\n \
63
+ \ \"run_metadata\",\n )\n }\n return self.client.beta.threads.create_and_run(\n\
64
+ \ assistant_id=self.assistant_id,\n thread=thread,\n \
65
+ \ **params,\n )"
66
+ - "def test_pandas_output_parser_col_no_array() -> None:\n with pytest.raises(OutputParserException):\n\
67
+ \ parser.parse(\"column:num_legs\")"
68
+ - source_sentence: Explain the get_token_ids logic
69
+ sentences:
70
+ - "def _runnable(inputs: dict[str, Any]) -> str:\n if inputs[\"text\"] == \"\
71
+ foo\":\n return \"first\"\n if \"exception\" not in inputs:\n \
72
+ \ msg = \"missing exception\"\n raise ValueError(msg)\n if inputs[\"\
73
+ text\"] == \"bar\":\n return \"second\"\n if isinstance(inputs[\"exception\"\
74
+ ], ValueError):\n raise RuntimeError # noqa: TRY004\n return \"third\""
75
+ - "def validate_params(cls, values: dict) -> dict:\n \"\"\"Validate similarity\
76
+ \ parameters.\"\"\"\n if values[\"k\"] is None and values[\"similarity_threshold\"\
77
+ ] is None:\n msg = \"Must specify one of `k` or `similarity_threshold`.\"\
78
+ \n raise ValueError(msg)\n return values"
79
+ - "def get_token_ids(self, text: str) -> list[int]:\n \"\"\"Return the ordered\
80
+ \ IDs of the tokens in a text.\n\n Args:\n text: The string\
81
+ \ input to tokenize.\n\n Returns:\n A list of IDs corresponding\
82
+ \ to the tokens in the text, in order they occur\n in the text.\n\
83
+ \ \"\"\"\n if self.custom_get_token_ids is not None:\n \
84
+ \ return self.custom_get_token_ids(text)\n return _get_token_ids_default_method(text)"
85
+ - source_sentence: How does __init__ work in Python?
86
+ sentences:
87
+ - "def test_loading_few_shot_prompt_from_json() -> None:\n \"\"\"Test loading\
88
+ \ few shot prompt from json.\"\"\"\n with change_directory(EXAMPLE_DIR):\n\
89
+ \ prompt = load_prompt(\"few_shot_prompt.json\")\n expected_prompt\
90
+ \ = FewShotPromptTemplate(\n input_variables=[\"adjective\"],\n \
91
+ \ prefix=\"Write antonyms for the following words.\",\n example_prompt=PromptTemplate(\n\
92
+ \ input_variables=[\"input\", \"output\"],\n template=\"\
93
+ Input: {input}\\nOutput: {output}\",\n ),\n examples=[\n\
94
+ \ {\"input\": \"happy\", \"output\": \"sad\"},\n \
95
+ \ {\"input\": \"tall\", \"output\": \"short\"},\n ],\n \
96
+ \ suffix=\"Input: {adjective}\\nOutput:\",\n )\n assert prompt ==\
97
+ \ expected_prompt"
98
+ - "def __init__(\n self,\n encoding_name: str = \"gpt2\",\n \
99
+ \ model_name: str | None = None,\n allowed_special: Literal[\"all\"] |\
100
+ \ AbstractSet[str] = set(),\n disallowed_special: Literal[\"all\"] | Collection[str]\
101
+ \ = \"all\",\n **kwargs: Any,\n ) -> None:\n \"\"\"Create a new\
102
+ \ `TextSplitter`.\n\n Args:\n encoding_name: The name of the\
103
+ \ tiktoken encoding to use.\n model_name: The name of the model to\
104
+ \ use. If provided, this will\n override the `encoding_name`.\n\
105
+ \ allowed_special: Special tokens that are allowed during encoding.\n\
106
+ \ disallowed_special: Special tokens that are disallowed during encoding.\n\
107
+ \n Raises:\n ImportError: If the tiktoken package is not installed.\n\
108
+ \ \"\"\"\n super().__init__(**kwargs)\n if not _HAS_TIKTOKEN:\n\
109
+ \ msg = (\n \"Could not import tiktoken python package.\
110
+ \ \"\n \"This is needed in order to for TokenTextSplitter. \"\n\
111
+ \ \"Please install it with `pip install tiktoken`.\"\n \
112
+ \ )\n raise ImportError(msg)\n\n if model_name is not None:\n\
113
+ \ enc = tiktoken.encoding_for_model(model_name)\n else:\n \
114
+ \ enc = tiktoken.get_encoding(encoding_name)\n self._tokenizer\
115
+ \ = enc\n self._allowed_special = allowed_special\n self._disallowed_special\
116
+ \ = disallowed_special"
117
+ - "def test_fixed_message_response_when_docs_found() -> None:\n fixed_resp =\
118
+ \ \"I don't know\"\n answer = \"I know the answer!\"\n llm = FakeListLLM(responses=[answer])\n\
119
+ \ retriever = SequentialRetriever(\n sequential_responses=[[Document(page_content=answer)]],\n\
120
+ \ )\n memory = ConversationBufferMemory(\n k=1,\n output_key=\"\
121
+ answer\",\n memory_key=\"chat_history\",\n return_messages=True,\n\
122
+ \ )\n qa_chain = ConversationalRetrievalChain.from_llm(\n llm=llm,\n\
123
+ \ memory=memory,\n retriever=retriever,\n return_source_documents=True,\n\
124
+ \ rephrase_question=False,\n response_if_no_docs_found=fixed_resp,\n\
125
+ \ verbose=True,\n )\n got = qa_chain(\"What is the answer?\")\n \
126
+ \ assert got[\"chat_history\"][1].content == answer\n assert got[\"answer\"\
127
+ ] == answer"
128
+ pipeline_tag: sentence-similarity
129
+ library_name: sentence-transformers
130
+ metrics:
131
+ - cosine_accuracy@1
132
+ - cosine_accuracy@3
133
+ - cosine_accuracy@5
134
+ - cosine_accuracy@10
135
+ - cosine_precision@1
136
+ - cosine_precision@3
137
+ - cosine_precision@5
138
+ - cosine_precision@10
139
+ - cosine_recall@1
140
+ - cosine_recall@3
141
+ - cosine_recall@5
142
+ - cosine_recall@10
143
+ - cosine_ndcg@10
144
+ - cosine_mrr@10
145
+ - cosine_map@100
146
+ model-index:
147
+ - name: codeBert Base
148
+ results:
149
+ - task:
150
+ type: information-retrieval
151
+ name: Information Retrieval
152
+ dataset:
153
+ name: dim 768
154
+ type: dim_768
155
+ metrics:
156
+ - type: cosine_accuracy@1
157
+ value: 0.83
158
+ name: Cosine Accuracy@1
159
+ - type: cosine_accuracy@3
160
+ value: 0.85
161
+ name: Cosine Accuracy@3
162
+ - type: cosine_accuracy@5
163
+ value: 0.86
164
+ name: Cosine Accuracy@5
165
+ - type: cosine_accuracy@10
166
+ value: 0.94
167
+ name: Cosine Accuracy@10
168
+ - type: cosine_precision@1
169
+ value: 0.83
170
+ name: Cosine Precision@1
171
+ - type: cosine_precision@3
172
+ value: 0.83
173
+ name: Cosine Precision@3
174
+ - type: cosine_precision@5
175
+ value: 0.83
176
+ name: Cosine Precision@5
177
+ - type: cosine_precision@10
178
+ value: 0.45299999999999996
179
+ name: Cosine Precision@10
180
+ - type: cosine_recall@1
181
+ value: 0.16599999999999998
182
+ name: Cosine Recall@1
183
+ - type: cosine_recall@3
184
+ value: 0.498
185
+ name: Cosine Recall@3
186
+ - type: cosine_recall@5
187
+ value: 0.83
188
+ name: Cosine Recall@5
189
+ - type: cosine_recall@10
190
+ value: 0.9059999999999999
191
+ name: Cosine Recall@10
192
+ - type: cosine_ndcg@10
193
+ value: 0.8712089918828809
194
+ name: Cosine Ndcg@10
195
+ - type: cosine_mrr@10
196
+ value: 0.8532738095238095
197
+ name: Cosine Mrr@10
198
+ - type: cosine_map@100
199
+ value: 0.861635686929646
200
+ name: Cosine Map@100
201
+ - task:
202
+ type: information-retrieval
203
+ name: Information Retrieval
204
+ dataset:
205
+ name: dim 512
206
+ type: dim_512
207
+ metrics:
208
+ - type: cosine_accuracy@1
209
+ value: 0.85
210
+ name: Cosine Accuracy@1
211
+ - type: cosine_accuracy@3
212
+ value: 0.86
213
+ name: Cosine Accuracy@3
214
+ - type: cosine_accuracy@5
215
+ value: 0.87
216
+ name: Cosine Accuracy@5
217
+ - type: cosine_accuracy@10
218
+ value: 0.95
219
+ name: Cosine Accuracy@10
220
+ - type: cosine_precision@1
221
+ value: 0.85
222
+ name: Cosine Precision@1
223
+ - type: cosine_precision@3
224
+ value: 0.84
225
+ name: Cosine Precision@3
226
+ - type: cosine_precision@5
227
+ value: 0.8419999999999999
228
+ name: Cosine Precision@5
229
+ - type: cosine_precision@10
230
+ value: 0.45299999999999996
231
+ name: Cosine Precision@10
232
+ - type: cosine_recall@1
233
+ value: 0.16999999999999996
234
+ name: Cosine Recall@1
235
+ - type: cosine_recall@3
236
+ value: 0.504
237
+ name: Cosine Recall@3
238
+ - type: cosine_recall@5
239
+ value: 0.8419999999999999
240
+ name: Cosine Recall@5
241
+ - type: cosine_recall@10
242
+ value: 0.9059999999999999
243
+ name: Cosine Recall@10
244
+ - type: cosine_ndcg@10
245
+ value: 0.8775797199885595
246
+ name: Cosine Ndcg@10
247
+ - type: cosine_mrr@10
248
+ value: 0.8699404761904762
249
+ name: Cosine Mrr@10
250
+ - type: cosine_map@100
251
+ value: 0.8692738075020783
252
+ name: Cosine Map@100
253
+ - task:
254
+ type: information-retrieval
255
+ name: Information Retrieval
256
+ dataset:
257
+ name: dim 256
258
+ type: dim_256
259
+ metrics:
260
+ - type: cosine_accuracy@1
261
+ value: 0.86
262
+ name: Cosine Accuracy@1
263
+ - type: cosine_accuracy@3
264
+ value: 0.89
265
+ name: Cosine Accuracy@3
266
+ - type: cosine_accuracy@5
267
+ value: 0.9
268
+ name: Cosine Accuracy@5
269
+ - type: cosine_accuracy@10
270
+ value: 0.93
271
+ name: Cosine Accuracy@10
272
+ - type: cosine_precision@1
273
+ value: 0.86
274
+ name: Cosine Precision@1
275
+ - type: cosine_precision@3
276
+ value: 0.85
277
+ name: Cosine Precision@3
278
+ - type: cosine_precision@5
279
+ value: 0.85
280
+ name: Cosine Precision@5
281
+ - type: cosine_precision@10
282
+ value: 0.45
283
+ name: Cosine Precision@10
284
+ - type: cosine_recall@1
285
+ value: 0.17199999999999996
286
+ name: Cosine Recall@1
287
+ - type: cosine_recall@3
288
+ value: 0.51
289
+ name: Cosine Recall@3
290
+ - type: cosine_recall@5
291
+ value: 0.85
292
+ name: Cosine Recall@5
293
+ - type: cosine_recall@10
294
+ value: 0.9
295
+ name: Cosine Recall@10
296
+ - type: cosine_ndcg@10
297
+ value: 0.8789938349894767
298
+ name: Cosine Ndcg@10
299
+ - type: cosine_mrr@10
300
+ value: 0.8805952380952381
301
+ name: Cosine Mrr@10
302
+ - type: cosine_map@100
303
+ value: 0.8726611807317667
304
+ name: Cosine Map@100
305
+ - task:
306
+ type: information-retrieval
307
+ name: Information Retrieval
308
+ dataset:
309
+ name: dim 128
310
+ type: dim_128
311
+ metrics:
312
+ - type: cosine_accuracy@1
313
+ value: 0.84
314
+ name: Cosine Accuracy@1
315
+ - type: cosine_accuracy@3
316
+ value: 0.87
317
+ name: Cosine Accuracy@3
318
+ - type: cosine_accuracy@5
319
+ value: 0.88
320
+ name: Cosine Accuracy@5
321
+ - type: cosine_accuracy@10
322
+ value: 0.93
323
+ name: Cosine Accuracy@10
324
+ - type: cosine_precision@1
325
+ value: 0.84
326
+ name: Cosine Precision@1
327
+ - type: cosine_precision@3
328
+ value: 0.8366666666666667
329
+ name: Cosine Precision@3
330
+ - type: cosine_precision@5
331
+ value: 0.8419999999999999
332
+ name: Cosine Precision@5
333
+ - type: cosine_precision@10
334
+ value: 0.455
335
+ name: Cosine Precision@10
336
+ - type: cosine_recall@1
337
+ value: 0.16799999999999998
338
+ name: Cosine Recall@1
339
+ - type: cosine_recall@3
340
+ value: 0.502
341
+ name: Cosine Recall@3
342
+ - type: cosine_recall@5
343
+ value: 0.8419999999999999
344
+ name: Cosine Recall@5
345
+ - type: cosine_recall@10
346
+ value: 0.91
347
+ name: Cosine Recall@10
348
+ - type: cosine_ndcg@10
349
+ value: 0.8777095006932575
350
+ name: Cosine Ndcg@10
351
+ - type: cosine_mrr@10
352
+ value: 0.8630000000000001
353
+ name: Cosine Mrr@10
354
+ - type: cosine_map@100
355
+ value: 0.8661619081282643
356
+ name: Cosine Map@100
357
+ - task:
358
+ type: information-retrieval
359
+ name: Information Retrieval
360
+ dataset:
361
+ name: dim 64
362
+ type: dim_64
363
+ metrics:
364
+ - type: cosine_accuracy@1
365
+ value: 0.78
366
+ name: Cosine Accuracy@1
367
+ - type: cosine_accuracy@3
368
+ value: 0.81
369
+ name: Cosine Accuracy@3
370
+ - type: cosine_accuracy@5
371
+ value: 0.81
372
+ name: Cosine Accuracy@5
373
+ - type: cosine_accuracy@10
374
+ value: 0.93
375
+ name: Cosine Accuracy@10
376
+ - type: cosine_precision@1
377
+ value: 0.78
378
+ name: Cosine Precision@1
379
+ - type: cosine_precision@3
380
+ value: 0.7866666666666667
381
+ name: Cosine Precision@3
382
+ - type: cosine_precision@5
383
+ value: 0.7859999999999999
384
+ name: Cosine Precision@5
385
+ - type: cosine_precision@10
386
+ value: 0.44799999999999995
387
+ name: Cosine Precision@10
388
+ - type: cosine_recall@1
389
+ value: 0.15599999999999997
390
+ name: Cosine Recall@1
391
+ - type: cosine_recall@3
392
+ value: 0.472
393
+ name: Cosine Recall@3
394
+ - type: cosine_recall@5
395
+ value: 0.7859999999999999
396
+ name: Cosine Recall@5
397
+ - type: cosine_recall@10
398
+ value: 0.8959999999999999
399
+ name: Cosine Recall@10
400
+ - type: cosine_ndcg@10
401
+ value: 0.8445404597381452
402
+ name: Cosine Ndcg@10
403
+ - type: cosine_mrr@10
404
+ value: 0.8120634920634922
405
+ name: Cosine Mrr@10
406
+ - type: cosine_map@100
407
+ value: 0.8308457034802883
408
+ name: Cosine Map@100
409
+ ---
410
+
411
+ # codeBert Base
412
+
413
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/codebert-base](https://huggingface.co/microsoft/codebert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
414
+
415
+ ## Model Details
416
+
417
+ ### Model Description
418
+ - **Model Type:** Sentence Transformer
419
+ - **Base model:** [microsoft/codebert-base](https://huggingface.co/microsoft/codebert-base) <!-- at revision 3b0952feddeffad0063f274080e3c23d75e7eb39 -->
420
+ - **Maximum Sequence Length:** 512 tokens
421
+ - **Output Dimensionality:** 768 dimensions
422
+ - **Similarity Function:** Cosine Similarity
423
+ <!-- - **Training Dataset:** Unknown -->
424
+ - **Language:** en
425
+ - **License:** apache-2.0
426
+
427
+ ### Model Sources
428
+
429
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
430
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
431
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
432
+
433
+ ### Full Model Architecture
434
+
435
+ ```
436
+ SentenceTransformer(
437
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'RobertaModel'})
438
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
439
+ )
440
+ ```
441
+
442
+ ## Usage
443
+
444
+ ### Direct Usage (Sentence Transformers)
445
+
446
+ First install the Sentence Transformers library:
447
+
448
+ ```bash
449
+ pip install -U sentence-transformers
450
+ ```
451
+
452
+ Then you can load this model and run inference.
453
+ ```python
454
+ from sentence_transformers import SentenceTransformer
455
+
456
+ # Download from the 🤗 Hub
457
+ model = SentenceTransformer("killdollar/codebert-embed-base-dense-retriever")
458
+ # Run inference
459
+ sentences = [
460
+ 'How does __init__ work in Python?',
461
+ 'def __init__(\n self,\n encoding_name: str = "gpt2",\n model_name: str | None = None,\n allowed_special: Literal["all"] | AbstractSet[str] = set(),\n disallowed_special: Literal["all"] | Collection[str] = "all",\n **kwargs: Any,\n ) -> None:\n """Create a new `TextSplitter`.\n\n Args:\n encoding_name: The name of the tiktoken encoding to use.\n model_name: The name of the model to use. If provided, this will\n override the `encoding_name`.\n allowed_special: Special tokens that are allowed during encoding.\n disallowed_special: Special tokens that are disallowed during encoding.\n\n Raises:\n ImportError: If the tiktoken package is not installed.\n """\n super().__init__(**kwargs)\n if not _HAS_TIKTOKEN:\n msg = (\n "Could not import tiktoken python package. "\n "This is needed in order to for TokenTextSplitter. "\n "Please install it with `pip install tiktoken`."\n )\n raise ImportError(msg)\n\n if model_name is not None:\n enc = tiktoken.encoding_for_model(model_name)\n else:\n enc = tiktoken.get_encoding(encoding_name)\n self._tokenizer = enc\n self._allowed_special = allowed_special\n self._disallowed_special = disallowed_special',
462
+ 'def test_fixed_message_response_when_docs_found() -> None:\n fixed_resp = "I don\'t know"\n answer = "I know the answer!"\n llm = FakeListLLM(responses=[answer])\n retriever = SequentialRetriever(\n sequential_responses=[[Document(page_content=answer)]],\n )\n memory = ConversationBufferMemory(\n k=1,\n output_key="answer",\n memory_key="chat_history",\n return_messages=True,\n )\n qa_chain = ConversationalRetrievalChain.from_llm(\n llm=llm,\n memory=memory,\n retriever=retriever,\n return_source_documents=True,\n rephrase_question=False,\n response_if_no_docs_found=fixed_resp,\n verbose=True,\n )\n got = qa_chain("What is the answer?")\n assert got["chat_history"][1].content == answer\n assert got["answer"] == answer',
463
+ ]
464
+ embeddings = model.encode(sentences)
465
+ print(embeddings.shape)
466
+ # [3, 768]
467
+
468
+ # Get the similarity scores for the embeddings
469
+ similarities = model.similarity(embeddings, embeddings)
470
+ print(similarities)
471
+ # tensor([[1.0000, 0.7336, 0.0979],
472
+ # [0.7336, 1.0000, 0.1742],
473
+ # [0.0979, 0.1742, 1.0000]])
474
+ ```
475
+
476
+ <!--
477
+ ### Direct Usage (Transformers)
478
+
479
+ <details><summary>Click to see the direct usage in Transformers</summary>
480
+
481
+ </details>
482
+ -->
483
+
484
+ <!--
485
+ ### Downstream Usage (Sentence Transformers)
486
+
487
+ You can finetune this model on your own dataset.
488
+
489
+ <details><summary>Click to expand</summary>
490
+
491
+ </details>
492
+ -->
493
+
494
+ <!--
495
+ ### Out-of-Scope Use
496
+
497
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
498
+ -->
499
+
500
+ ## Evaluation
501
+
502
+ ### Metrics
503
+
504
+ #### Information Retrieval
505
+
506
+ * Dataset: `dim_768`
507
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
508
+ ```json
509
+ {
510
+ "truncate_dim": 768
511
+ }
512
+ ```
513
+
514
+ | Metric | Value |
515
+ |:--------------------|:-----------|
516
+ | cosine_accuracy@1 | 0.83 |
517
+ | cosine_accuracy@3 | 0.85 |
518
+ | cosine_accuracy@5 | 0.86 |
519
+ | cosine_accuracy@10 | 0.94 |
520
+ | cosine_precision@1 | 0.83 |
521
+ | cosine_precision@3 | 0.83 |
522
+ | cosine_precision@5 | 0.83 |
523
+ | cosine_precision@10 | 0.453 |
524
+ | cosine_recall@1 | 0.166 |
525
+ | cosine_recall@3 | 0.498 |
526
+ | cosine_recall@5 | 0.83 |
527
+ | cosine_recall@10 | 0.906 |
528
+ | **cosine_ndcg@10** | **0.8712** |
529
+ | cosine_mrr@10 | 0.8533 |
530
+ | cosine_map@100 | 0.8616 |
531
+
532
+ #### Information Retrieval
533
+
534
+ * Dataset: `dim_512`
535
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
536
+ ```json
537
+ {
538
+ "truncate_dim": 512
539
+ }
540
+ ```
541
+
542
+ | Metric | Value |
543
+ |:--------------------|:-----------|
544
+ | cosine_accuracy@1 | 0.85 |
545
+ | cosine_accuracy@3 | 0.86 |
546
+ | cosine_accuracy@5 | 0.87 |
547
+ | cosine_accuracy@10 | 0.95 |
548
+ | cosine_precision@1 | 0.85 |
549
+ | cosine_precision@3 | 0.84 |
550
+ | cosine_precision@5 | 0.842 |
551
+ | cosine_precision@10 | 0.453 |
552
+ | cosine_recall@1 | 0.17 |
553
+ | cosine_recall@3 | 0.504 |
554
+ | cosine_recall@5 | 0.842 |
555
+ | cosine_recall@10 | 0.906 |
556
+ | **cosine_ndcg@10** | **0.8776** |
557
+ | cosine_mrr@10 | 0.8699 |
558
+ | cosine_map@100 | 0.8693 |
559
+
560
+ #### Information Retrieval
561
+
562
+ * Dataset: `dim_256`
563
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
564
+ ```json
565
+ {
566
+ "truncate_dim": 256
567
+ }
568
+ ```
569
+
570
+ | Metric | Value |
571
+ |:--------------------|:----------|
572
+ | cosine_accuracy@1 | 0.86 |
573
+ | cosine_accuracy@3 | 0.89 |
574
+ | cosine_accuracy@5 | 0.9 |
575
+ | cosine_accuracy@10 | 0.93 |
576
+ | cosine_precision@1 | 0.86 |
577
+ | cosine_precision@3 | 0.85 |
578
+ | cosine_precision@5 | 0.85 |
579
+ | cosine_precision@10 | 0.45 |
580
+ | cosine_recall@1 | 0.172 |
581
+ | cosine_recall@3 | 0.51 |
582
+ | cosine_recall@5 | 0.85 |
583
+ | cosine_recall@10 | 0.9 |
584
+ | **cosine_ndcg@10** | **0.879** |
585
+ | cosine_mrr@10 | 0.8806 |
586
+ | cosine_map@100 | 0.8727 |
587
+
588
+ #### Information Retrieval
589
+
590
+ * Dataset: `dim_128`
591
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
592
+ ```json
593
+ {
594
+ "truncate_dim": 128
595
+ }
596
+ ```
597
+
598
+ | Metric | Value |
599
+ |:--------------------|:-----------|
600
+ | cosine_accuracy@1 | 0.84 |
601
+ | cosine_accuracy@3 | 0.87 |
602
+ | cosine_accuracy@5 | 0.88 |
603
+ | cosine_accuracy@10 | 0.93 |
604
+ | cosine_precision@1 | 0.84 |
605
+ | cosine_precision@3 | 0.8367 |
606
+ | cosine_precision@5 | 0.842 |
607
+ | cosine_precision@10 | 0.455 |
608
+ | cosine_recall@1 | 0.168 |
609
+ | cosine_recall@3 | 0.502 |
610
+ | cosine_recall@5 | 0.842 |
611
+ | cosine_recall@10 | 0.91 |
612
+ | **cosine_ndcg@10** | **0.8777** |
613
+ | cosine_mrr@10 | 0.863 |
614
+ | cosine_map@100 | 0.8662 |
615
+
616
+ #### Information Retrieval
617
+
618
+ * Dataset: `dim_64`
619
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
620
+ ```json
621
+ {
622
+ "truncate_dim": 64
623
+ }
624
+ ```
625
+
626
+ | Metric | Value |
627
+ |:--------------------|:-----------|
628
+ | cosine_accuracy@1 | 0.78 |
629
+ | cosine_accuracy@3 | 0.81 |
630
+ | cosine_accuracy@5 | 0.81 |
631
+ | cosine_accuracy@10 | 0.93 |
632
+ | cosine_precision@1 | 0.78 |
633
+ | cosine_precision@3 | 0.7867 |
634
+ | cosine_precision@5 | 0.786 |
635
+ | cosine_precision@10 | 0.448 |
636
+ | cosine_recall@1 | 0.156 |
637
+ | cosine_recall@3 | 0.472 |
638
+ | cosine_recall@5 | 0.786 |
639
+ | cosine_recall@10 | 0.896 |
640
+ | **cosine_ndcg@10** | **0.8445** |
641
+ | cosine_mrr@10 | 0.8121 |
642
+ | cosine_map@100 | 0.8308 |
643
+
644
+ <!--
645
+ ## Bias, Risks and Limitations
646
+
647
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
648
+ -->
649
+
650
+ <!--
651
+ ### Recommendations
652
+
653
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
654
+ -->
655
+
656
+ ## Training Details
657
+
658
+ ### Training Dataset
659
+
660
+ #### Unnamed Dataset
661
+
662
+ * Size: 900 training samples
663
+ * Columns: <code>anchor</code> and <code>positive</code>
664
+ * Approximate statistics based on the first 900 samples:
665
+ | | anchor | positive |
666
+ |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
667
+ | type | string | string |
668
+ | details | <ul><li>min: 6 tokens</li><li>mean: 13.15 tokens</li><li>max: 42 tokens</li></ul> | <ul><li>min: 25 tokens</li><li>mean: 239.87 tokens</li><li>max: 512 tokens</li></ul> |
669
+ * Samples:
670
+ | anchor | positive |
671
+ |:-----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
672
+ | <code>Explain the test_qdrant_similarity_search_with_relevance_scores logic</code> | <code>def test_qdrant_similarity_search_with_relevance_scores(<br> batch_size: int,<br> content_payload_key: str,<br> metadata_payload_key: str,<br> vector_name: str \| None,<br>) -> None:<br> """Test end to end construction and search."""<br> texts = ["foo", "bar", "baz"]<br> docsearch = Qdrant.from_texts(<br> texts,<br> ConsistentFakeEmbeddings(),<br> location=":memory:",<br> content_payload_key=content_payload_key,<br> metadata_payload_key=metadata_payload_key,<br> batch_size=batch_size,<br> vector_name=vector_name,<br> )<br> output = docsearch.similarity_search_with_relevance_scores("foo", k=3)<br><br> assert all(<br> (score <= 1 or np.isclose(score, 1)) and score >= 0 for _, score in output<br> )</code> |
673
+ | <code>How to implement LangChainPendingDeprecationWarning?</code> | <code>class LangChainPendingDeprecationWarning(PendingDeprecationWarning):<br> """A class for issuing deprecation warnings for LangChain users."""</code> |
674
+ | <code>Example usage of random_name</code> | <code>def random_name() -> str:<br> """Generate a random name."""<br> adjective = random.choice(adjectives) # noqa: S311<br> noun = random.choice(nouns) # noqa: S311<br> number = random.randint(1, 100) # noqa: S311<br> return f"{adjective}-{noun}-{number}"</code> |
675
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
676
+ ```json
677
+ {
678
+ "loss": "MultipleNegativesRankingLoss",
679
+ "matryoshka_dims": [
680
+ 768,
681
+ 512,
682
+ 256,
683
+ 128,
684
+ 64
685
+ ],
686
+ "matryoshka_weights": [
687
+ 1,
688
+ 1,
689
+ 1,
690
+ 1,
691
+ 1
692
+ ],
693
+ "n_dims_per_step": -1
694
+ }
695
+ ```
696
+
697
+ ### Training Hyperparameters
698
+ #### Non-Default Hyperparameters
699
+
700
+ - `eval_strategy`: epoch
701
+ - `per_device_train_batch_size`: 4
702
+ - `per_device_eval_batch_size`: 4
703
+ - `gradient_accumulation_steps`: 16
704
+ - `learning_rate`: 2e-05
705
+ - `num_train_epochs`: 4
706
+ - `lr_scheduler_type`: cosine
707
+ - `warmup_ratio`: 0.1
708
+ - `fp16`: True
709
+ - `load_best_model_at_end`: True
710
+ - `optim`: adamw_torch
711
+ - `batch_sampler`: no_duplicates
712
+
713
+ #### All Hyperparameters
714
+ <details><summary>Click to expand</summary>
715
+
716
+ - `overwrite_output_dir`: False
717
+ - `do_predict`: False
718
+ - `eval_strategy`: epoch
719
+ - `prediction_loss_only`: True
720
+ - `per_device_train_batch_size`: 4
721
+ - `per_device_eval_batch_size`: 4
722
+ - `per_gpu_train_batch_size`: None
723
+ - `per_gpu_eval_batch_size`: None
724
+ - `gradient_accumulation_steps`: 16
725
+ - `eval_accumulation_steps`: None
726
+ - `torch_empty_cache_steps`: None
727
+ - `learning_rate`: 2e-05
728
+ - `weight_decay`: 0.0
729
+ - `adam_beta1`: 0.9
730
+ - `adam_beta2`: 0.999
731
+ - `adam_epsilon`: 1e-08
732
+ - `max_grad_norm`: 1.0
733
+ - `num_train_epochs`: 4
734
+ - `max_steps`: -1
735
+ - `lr_scheduler_type`: cosine
736
+ - `lr_scheduler_kwargs`: {}
737
+ - `warmup_ratio`: 0.1
738
+ - `warmup_steps`: 0
739
+ - `log_level`: passive
740
+ - `log_level_replica`: warning
741
+ - `log_on_each_node`: True
742
+ - `logging_nan_inf_filter`: True
743
+ - `save_safetensors`: True
744
+ - `save_on_each_node`: False
745
+ - `save_only_model`: False
746
+ - `restore_callback_states_from_checkpoint`: False
747
+ - `no_cuda`: False
748
+ - `use_cpu`: False
749
+ - `use_mps_device`: False
750
+ - `seed`: 42
751
+ - `data_seed`: None
752
+ - `jit_mode_eval`: False
753
+ - `bf16`: False
754
+ - `fp16`: True
755
+ - `fp16_opt_level`: O1
756
+ - `half_precision_backend`: auto
757
+ - `bf16_full_eval`: False
758
+ - `fp16_full_eval`: False
759
+ - `tf32`: None
760
+ - `local_rank`: 0
761
+ - `ddp_backend`: None
762
+ - `tpu_num_cores`: None
763
+ - `tpu_metrics_debug`: False
764
+ - `debug`: []
765
+ - `dataloader_drop_last`: False
766
+ - `dataloader_num_workers`: 0
767
+ - `dataloader_prefetch_factor`: None
768
+ - `past_index`: -1
769
+ - `disable_tqdm`: False
770
+ - `remove_unused_columns`: True
771
+ - `label_names`: None
772
+ - `load_best_model_at_end`: True
773
+ - `ignore_data_skip`: False
774
+ - `fsdp`: []
775
+ - `fsdp_min_num_params`: 0
776
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
777
+ - `fsdp_transformer_layer_cls_to_wrap`: None
778
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
779
+ - `parallelism_config`: None
780
+ - `deepspeed`: None
781
+ - `label_smoothing_factor`: 0.0
782
+ - `optim`: adamw_torch
783
+ - `optim_args`: None
784
+ - `adafactor`: False
785
+ - `group_by_length`: False
786
+ - `length_column_name`: length
787
+ - `project`: huggingface
788
+ - `trackio_space_id`: trackio
789
+ - `ddp_find_unused_parameters`: None
790
+ - `ddp_bucket_cap_mb`: None
791
+ - `ddp_broadcast_buffers`: False
792
+ - `dataloader_pin_memory`: True
793
+ - `dataloader_persistent_workers`: False
794
+ - `skip_memory_metrics`: True
795
+ - `use_legacy_prediction_loop`: False
796
+ - `push_to_hub`: False
797
+ - `resume_from_checkpoint`: None
798
+ - `hub_model_id`: None
799
+ - `hub_strategy`: every_save
800
+ - `hub_private_repo`: None
801
+ - `hub_always_push`: False
802
+ - `hub_revision`: None
803
+ - `gradient_checkpointing`: False
804
+ - `gradient_checkpointing_kwargs`: None
805
+ - `include_inputs_for_metrics`: False
806
+ - `include_for_metrics`: []
807
+ - `eval_do_concat_batches`: True
808
+ - `fp16_backend`: auto
809
+ - `push_to_hub_model_id`: None
810
+ - `push_to_hub_organization`: None
811
+ - `mp_parameters`:
812
+ - `auto_find_batch_size`: False
813
+ - `full_determinism`: False
814
+ - `torchdynamo`: None
815
+ - `ray_scope`: last
816
+ - `ddp_timeout`: 1800
817
+ - `torch_compile`: False
818
+ - `torch_compile_backend`: None
819
+ - `torch_compile_mode`: None
820
+ - `include_tokens_per_second`: False
821
+ - `include_num_input_tokens_seen`: no
822
+ - `neftune_noise_alpha`: None
823
+ - `optim_target_modules`: None
824
+ - `batch_eval_metrics`: False
825
+ - `eval_on_start`: False
826
+ - `use_liger_kernel`: False
827
+ - `liger_kernel_config`: None
828
+ - `eval_use_gather_object`: False
829
+ - `average_tokens_across_devices`: True
830
+ - `prompts`: None
831
+ - `batch_sampler`: no_duplicates
832
+ - `multi_dataset_batch_sampler`: proportional
833
+ - `router_mapping`: {}
834
+ - `learning_rate_mapping`: {}
835
+
836
+ </details>
837
+
838
+ ### Training Logs
839
+ | Epoch | Step | Training Loss | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
840
+ |:-------:|:------:|:-------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|
841
+ | 0.7111 | 10 | 6.8447 | - | - | - | - | - |
842
+ | 1.0 | 15 | - | 0.1025 | 0.0367 | 0.0548 | 0.0502 | 0.1185 |
843
+ | 0.7111 | 10 | 4.8545 | - | - | - | - | - |
844
+ | 1.0 | 15 | - | 0.2250 | 0.3047 | 0.2895 | 0.2892 | 0.3178 |
845
+ | 0.7111 | 10 | 1.9011 | - | - | - | - | - |
846
+ | 1.0 | 15 | - | 0.6530 | 0.6393 | 0.6269 | 0.6631 | 0.6658 |
847
+ | 1.3556 | 20 | 0.6349 | - | - | - | - | - |
848
+ | 2.0 | 30 | 0.1887 | 0.8480 | 0.8643 | 0.8641 | 0.8532 | 0.7974 |
849
+ | 2.7111 | 40 | 0.0959 | - | - | - | - | - |
850
+ | 3.0 | 45 | - | 0.8688 | 0.8774 | 0.8754 | 0.8725 | 0.8457 |
851
+ | 3.3556 | 50 | 0.0359 | - | - | - | - | - |
852
+ | **4.0** | **60** | **0.0515** | **0.8712** | **0.8776** | **0.879** | **0.8777** | **0.8445** |
853
+
854
+ * The bold row denotes the saved checkpoint.
855
+
856
+ ### Framework Versions
857
+ - Python: 3.12.12
858
+ - Sentence Transformers: 5.2.0
859
+ - Transformers: 4.57.3
860
+ - PyTorch: 2.9.0+cu126
861
+ - Accelerate: 1.12.0
862
+ - Datasets: 4.0.0
863
+ - Tokenizers: 0.22.2
864
+
865
+ ## Citation
866
+
867
+ ### BibTeX
868
+
869
+ #### Sentence Transformers
870
+ ```bibtex
871
+ @inproceedings{reimers-2019-sentence-bert,
872
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
873
+ author = "Reimers, Nils and Gurevych, Iryna",
874
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
875
+ month = "11",
876
+ year = "2019",
877
+ publisher = "Association for Computational Linguistics",
878
+ url = "https://arxiv.org/abs/1908.10084",
879
+ }
880
+ ```
881
+
882
+ #### MatryoshkaLoss
883
+ ```bibtex
884
+ @misc{kusupati2024matryoshka,
885
+ title={Matryoshka Representation Learning},
886
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
887
+ year={2024},
888
+ eprint={2205.13147},
889
+ archivePrefix={arXiv},
890
+ primaryClass={cs.LG}
891
+ }
892
+ ```
893
+
894
+ #### MultipleNegativesRankingLoss
895
+ ```bibtex
896
+ @misc{henderson2017efficient,
897
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
898
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
899
+ year={2017},
900
+ eprint={1705.00652},
901
+ archivePrefix={arXiv},
902
+ primaryClass={cs.CL}
903
+ }
904
+ ```
905
+
906
+ <!--
907
+ ## Glossary
908
+
909
+ *Clearly define terms in order to be accessible across audiences.*
910
+ -->
911
+
912
+ <!--
913
+ ## Model Card Authors
914
+
915
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
916
+ -->
917
+
918
+ <!--
919
+ ## Model Card Contact
920
+
921
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
922
+ -->
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "transformers_version": "4.57.3",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.2.0",
5
+ "transformers": "4.57.3",
6
+ "pytorch": "2.9.0+cu126"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a25cab114301581bb32a078cb3d4af63721738dbd380069070ca2605c7b4b44
3
+ size 498604904
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff