anaghaj111 commited on
Commit
aeb55ce
·
verified ·
1 Parent(s): f5a06eb

Add new SentenceTransformer model

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,969 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - dense
10
+ - generated_from_trainer
11
+ - dataset_size:180
12
+ - loss:MatryoshkaLoss
13
+ - loss:MultipleNegativesRankingLoss
14
+ base_model: shubharuidas/codebert-embed-base-dense-retriever
15
+ widget:
16
+ - source_sentence: Explain the tool1 logic
17
+ sentences:
18
+ - "def stream(\n self,\n thread_id: str,\n assistant_id: str,\n\
19
+ \ *,\n input: Input | None = None,\n command: Command | None\
20
+ \ = None,\n stream_mode: StreamMode | Sequence[StreamMode] = \"values\"\
21
+ ,\n stream_subgraphs: bool = False,\n stream_resumable: bool = False,\n\
22
+ \ metadata: Mapping[str, Any] | None = None,\n config: Config |\
23
+ \ None = None,\n context: Context | None = None,\n checkpoint: Checkpoint\
24
+ \ | None = None,\n checkpoint_id: str | None = None,\n checkpoint_during:\
25
+ \ bool | None = None,\n interrupt_before: All | Sequence[str] | None =\
26
+ \ None,\n interrupt_after: All | Sequence[str] | None = None,\n \
27
+ \ feedback_keys: Sequence[str] | None = None,\n on_disconnect: DisconnectMode\
28
+ \ | None = None,\n webhook: str | None = None,\n multitask_strategy:\
29
+ \ MultitaskStrategy | None = None,\n if_not_exists: IfNotExists | None\
30
+ \ = None,\n after_seconds: int | None = None,\n headers: Mapping[str,\
31
+ \ str] | None = None,\n params: QueryParamTypes | None = None,\n \
32
+ \ on_run_created: Callable[[RunCreateMetadata], None] | None = None,\n ) ->\
33
+ \ AsyncIterator[StreamPart]: ..."
34
+ - "def tool1(some_val: int, some_other_val: str) -> str:\n \"\"\"Tool 1 docstring.\"\
35
+ \"\"\n if some_val == 0:\n msg = \"Test error\"\n raise ValueError(msg)\n\
36
+ \ return f\"{some_val} - {some_other_val}\""
37
+ - "class IndexConfig(TypedDict, total=False):\n \"\"\"Configuration for indexing\
38
+ \ documents for semantic search in the store.\n\n If not provided to the store,\
39
+ \ the store will not support vector search.\n In that case, all `index` arguments\
40
+ \ to `put()` and `aput()` operations will be ignored.\n \"\"\"\n\n dims:\
41
+ \ int\n \"\"\"Number of dimensions in the embedding vectors.\n \n Common\
42
+ \ embedding models have the following dimensions:\n - `openai:text-embedding-3-large`:\
43
+ \ `3072`\n - `openai:text-embedding-3-small`: `1536`\n - `openai:text-embedding-ada-002`:\
44
+ \ `1536`\n - `cohere:embed-english-v3.0`: `1024`\n - `cohere:embed-english-light-v3.0`:\
45
+ \ `384`\n - `cohere:embed-multilingual-v3.0`: `1024`\n - `cohere:embed-multilingual-light-v3.0`:\
46
+ \ `384`\n \"\"\"\n\n embed: Embeddings | EmbeddingsFunc | AEmbeddingsFunc\
47
+ \ | str\n \"\"\"Optional function to generate embeddings from text.\n \n\
48
+ \ Can be specified in three ways:\n 1. A LangChain `Embeddings` instance\n\
49
+ \ 2. A synchronous embedding function (`EmbeddingsFunc`)\n 3. An\
50
+ \ asynchronous embedding function (`AEmbeddingsFunc`)\n 4. A provider string\
51
+ \ (e.g., `\"openai:text-embedding-3-small\"`)\n \n ???+ example \"Examples\"\
52
+ \n\n Using LangChain's initialization with `InMemoryStore`:\n\n \
53
+ \ ```python\n from langchain.embeddings import init_embeddings\n \
54
+ \ from langgraph.store.memory import InMemoryStore\n \n store =\
55
+ \ InMemoryStore(\n index={\n \"dims\": 1536,\n \
56
+ \ \"embed\": init_embeddings(\"openai:text-embedding-3-small\")\n \
57
+ \ }\n )\n ```\n \n Using a custom embedding\
58
+ \ function with `InMemoryStore`:\n\n ```python\n from openai import\
59
+ \ OpenAI\n from langgraph.store.memory import InMemoryStore\n \n\
60
+ \ client = OpenAI()\n \n def embed_texts(texts: list[str])\
61
+ \ -> list[list[float]]:\n response = client.embeddings.create(\n \
62
+ \ model=\"text-embedding-3-small\",\n input=texts\n\
63
+ \ )\n return [e.embedding for e in response.data]\n \
64
+ \ \n store = InMemoryStore(\n index={\n \
65
+ \ \"dims\": 1536,\n \"embed\": embed_texts\n }\n \
66
+ \ )\n ```\n \n Using an asynchronous embedding function\
67
+ \ with `InMemoryStore`:\n\n ```python\n from openai import AsyncOpenAI\n\
68
+ \ from langgraph.store.memory import InMemoryStore\n \n client\
69
+ \ = AsyncOpenAI()\n \n async def aembed_texts(texts: list[str])\
70
+ \ -> list[list[float]]:\n response = await client.embeddings.create(\n\
71
+ \ model=\"text-embedding-3-small\",\n input=texts\n\
72
+ \ )\n return [e.embedding for e in response.data]\n \
73
+ \ \n store = InMemoryStore(\n index={\n \
74
+ \ \"dims\": 1536,\n \"embed\": aembed_texts\n }\n\
75
+ \ )\n ```\n \"\"\"\n\n fields: list[str] | None\n \"\"\"\
76
+ Fields to extract text from for embedding generation.\n \n Controls which\
77
+ \ parts of stored items are embedded for semantic search. Follows JSON path syntax:\n\
78
+ \n - `[\"$\"]`: Embeds the entire JSON object as one vector (default)\n \
79
+ \ - `[\"field1\", \"field2\"]`: Embeds specific top-level fields\n - `[\"\
80
+ parent.child\"]`: Embeds nested fields using dot notation\n - `[\"array[*].field\"\
81
+ ]`: Embeds field from each array element separately\n \n Note:\n \
82
+ \ You can always override this behavior when storing an item using the\n \
83
+ \ `index` parameter in the `put` or `aput` operations.\n \n ???+ example\
84
+ \ \"Examples\"\n\n ```python\n # Embed entire document (default)\n\
85
+ \ fields=[\"$\"]\n \n # Embed specific fields\n fields=[\"\
86
+ text\", \"summary\"]\n \n # Embed nested fields\n fields=[\"\
87
+ metadata.title\", \"content.body\"]\n \n # Embed from arrays\n \
88
+ \ fields=[\"messages[*].content\"] # Each message content separately\n\
89
+ \ fields=[\"context[0].text\"] # First context item's text\n \
90
+ \ ```\n \n Note:\n - Fields missing from a document are skipped\n\
91
+ \ - Array notation creates separate embeddings for each element\n \
92
+ \ - Complex nested paths are supported (e.g., `\"a.b[*].c.d\"`)\n \"\"\""
93
+ - source_sentence: Explain the UpdateType logic
94
+ sentences:
95
+ - "def test_subgraph_checkpoint_true(\n sync_checkpointer: BaseCheckpointSaver,\
96
+ \ durability: Durability\n) -> None:\n class InnerState(TypedDict):\n \
97
+ \ my_key: Annotated[str, operator.add]\n my_other_key: str\n\n def\
98
+ \ inner_1(state: InnerState):\n return {\"my_key\": \" got here\", \"my_other_key\"\
99
+ : state[\"my_key\"]}\n\n def inner_2(state: InnerState):\n return {\"\
100
+ my_key\": \" and there\"}\n\n inner = StateGraph(InnerState)\n inner.add_node(\"\
101
+ inner_1\", inner_1)\n inner.add_node(\"inner_2\", inner_2)\n inner.add_edge(\"\
102
+ inner_1\", \"inner_2\")\n inner.set_entry_point(\"inner_1\")\n inner.set_finish_point(\"\
103
+ inner_2\")\n\n class State(TypedDict):\n my_key: str\n\n graph =\
104
+ \ StateGraph(State)\n graph.add_node(\"inner\", inner.compile(checkpointer=True))\n\
105
+ \ graph.add_edge(START, \"inner\")\n graph.add_conditional_edges(\n \
106
+ \ \"inner\", lambda s: \"inner\" if s[\"my_key\"].count(\"there\") < 2 else\
107
+ \ END\n )\n app = graph.compile(checkpointer=sync_checkpointer)\n\n config\
108
+ \ = {\"configurable\": {\"thread_id\": \"2\"}}\n assert [\n c\n \
109
+ \ for c in app.stream(\n {\"my_key\": \"\"}, config, subgraphs=True,\
110
+ \ durability=durability\n )\n ] == [\n ((\"inner\",), {\"inner_1\"\
111
+ : {\"my_key\": \" got here\", \"my_other_key\": \"\"}}),\n ((\"inner\"\
112
+ ,), {\"inner_2\": {\"my_key\": \" and there\"}}),\n ((), {\"inner\": {\"\
113
+ my_key\": \" got here and there\"}}),\n (\n (\"inner\",),\n\
114
+ \ {\n \"inner_1\": {\n \"my_key\"\
115
+ : \" got here\",\n \"my_other_key\": \" got here and there\
116
+ \ got here and there\",\n }\n },\n ),\n \
117
+ \ ((\"inner\",), {\"inner_2\": {\"my_key\": \" and there\"}}),\n (\n\
118
+ \ (),\n {\n \"inner\": {\n \
119
+ \ \"my_key\": \" got here and there got here and there got here and there\"\
120
+ \n }\n },\n ),\n ]\n\n checkpoints = list(app.get_state_history(config))\n\
121
+ \ if durability != \"exit\":\n assert len(checkpoints) == 4\n else:\n\
122
+ \ assert len(checkpoints) == 1"
123
+ - "def is_available(self) -> bool:\n return self.value is not MISSING"
124
+ - "def UpdateType(self) -> type[Value]:\n \"\"\"The type of the update received\
125
+ \ by the channel.\"\"\"\n return self.typ"
126
+ - source_sentence: "Example usage of ToolOutputMixin: # type: ignore[no-redef]\n\
127
+ \ pass"
128
+ sentences:
129
+ - 'def task(__func_or_none__: Callable[P, Awaitable[T]]) -> _TaskFunction[P, T]:
130
+ ...'
131
+ - "def test_graph_with_jitter_retry_policy():\n \"\"\"Test a graph with a RetryPolicy\
132
+ \ that uses jitter.\"\"\"\n\n class State(TypedDict):\n foo: str\n\n\
133
+ \ attempt_count = 0\n\n def failing_node(state):\n nonlocal attempt_count\n\
134
+ \ attempt_count += 1\n if attempt_count < 2: # Fail the first attempt\n\
135
+ \ raise ValueError(\"Intentional failure\")\n return {\"foo\"\
136
+ : \"success\"}\n\n # Create a retry policy with jitter enabled\n retry_policy\
137
+ \ = RetryPolicy(\n max_attempts=3,\n initial_interval=0.01,\n \
138
+ \ jitter=True, # Enable jitter for randomized backoff\n retry_on=ValueError,\n\
139
+ \ )\n\n # Create and compile the graph\n graph = (\n StateGraph(State)\n\
140
+ \ .add_node(\"failing_node\", failing_node, retry_policy=retry_policy)\n\
141
+ \ .add_edge(START, \"failing_node\")\n .compile()\n )\n\n \
142
+ \ # Test graph execution with mocked random and sleep\n with (\n patch(\"\
143
+ random.uniform\", return_value=0.05) as mock_random,\n patch(\"time.sleep\"\
144
+ ) as mock_sleep,\n ):\n result = graph.invoke({\"foo\": \"\"})\n\n \
145
+ \ # Verify retry behavior\n assert attempt_count == 2 # The node should\
146
+ \ have been tried twice\n assert result[\"foo\"] == \"success\"\n\n # Verify\
147
+ \ jitter was applied\n mock_random.assert_called_with(0, 1) # Jitter should\
148
+ \ use random.uniform(0, 1)\n mock_sleep.assert_called_with(0.01 + 0.05)"
149
+ - "class ToolOutputMixin: # type: ignore[no-redef]\n pass"
150
+ - source_sentence: Best practices for async test_async_entrypoint_without_checkpointer
151
+ sentences:
152
+ - "def __init__(\n self,\n assistant_id: str, # graph_id\n \
153
+ \ /,\n *,\n url: str | None = None,\n api_key: str | None\
154
+ \ = None,\n headers: dict[str, str] | None = None,\n client: LangGraphClient\
155
+ \ | None = None,\n sync_client: SyncLangGraphClient | None = None,\n \
156
+ \ config: RunnableConfig | None = None,\n name: str | None = None,\n\
157
+ \ distributed_tracing: bool = False,\n ):\n \"\"\"Specify `url`,\
158
+ \ `api_key`, and/or `headers` to create default sync and async clients.\n\n \
159
+ \ If `client` or `sync_client` are provided, they will be used instead of\
160
+ \ the default clients.\n See `LangGraphClient` and `SyncLangGraphClient`\
161
+ \ for details on the default clients. At least\n one of `url`, `client`,\
162
+ \ or `sync_client` must be provided.\n\n Args:\n assistant_id:\
163
+ \ The assistant ID or graph name of the remote graph to use.\n url:\
164
+ \ The URL of the remote API.\n api_key: The API key to use for authentication.\
165
+ \ If not provided, it will be read from the environment (`LANGGRAPH_API_KEY`,\
166
+ \ `LANGSMITH_API_KEY`, or `LANGCHAIN_API_KEY`).\n headers: Additional\
167
+ \ headers to include in the requests.\n client: A `LangGraphClient`\
168
+ \ instance to use instead of creating a default client.\n sync_client:\
169
+ \ A `SyncLangGraphClient` instance to use instead of creating a default client.\n\
170
+ \ config: An optional `RunnableConfig` instance with additional configuration.\n\
171
+ \ name: Human-readable name to attach to the RemoteGraph instance.\n\
172
+ \ This is useful for adding `RemoteGraph` as a subgraph via `graph.add_node(remote_graph)`.\n\
173
+ \ If not provided, defaults to the assistant ID.\n distributed_tracing:\
174
+ \ Whether to enable sending LangSmith distributed tracing headers.\n \"\
175
+ \"\"\n self.assistant_id = assistant_id\n if name is None:\n \
176
+ \ self.name = assistant_id\n else:\n self.name = name\n\
177
+ \ self.config = config\n self.distributed_tracing = distributed_tracing\n\
178
+ \n if client is None and url is not None:\n client = get_client(url=url,\
179
+ \ api_key=api_key, headers=headers)\n self.client = client\n\n if\
180
+ \ sync_client is None and url is not None:\n sync_client = get_sync_client(url=url,\
181
+ \ api_key=api_key, headers=headers)\n self.sync_client = sync_client"
182
+ - "async def test_async_entrypoint_without_checkpointer() -> None:\n \"\"\"Test\
183
+ \ no checkpointer.\"\"\"\n states = []\n config = {\"configurable\": {\"\
184
+ thread_id\": \"1\"}}\n\n # Test without previous\n @entrypoint()\n async\
185
+ \ def foo(inputs: Any) -> Any:\n states.append(inputs)\n return\
186
+ \ inputs\n\n assert (await foo.ainvoke({\"a\": \"1\"}, config)) == {\"a\":\
187
+ \ \"1\"}\n\n @entrypoint()\n async def foo(inputs: Any, *, previous: Any)\
188
+ \ -> Any:\n states.append(previous)\n return {\"previous\": previous,\
189
+ \ \"current\": inputs}\n\n assert (await foo.ainvoke({\"a\": \"1\"}, config))\
190
+ \ == {\n \"current\": {\"a\": \"1\"},\n \"previous\": None,\n \
191
+ \ }\n assert (await foo.ainvoke({\"a\": \"1\"}, config)) == {\n \"\
192
+ current\": {\"a\": \"1\"},\n \"previous\": None,\n }"
193
+ - "class _InjectedStatePydanticV2Schema(BaseModel):\n messages: list\n foo:\
194
+ \ str"
195
+ - source_sentence: Explain the validate_autoresponse logic
196
+ sentences:
197
+ - "def task_path_str(tup: str | int | tuple) -> str:\n \"\"\"Generate a string\
198
+ \ representation of the task path.\"\"\"\n return (\n f\"~{', '.join(task_path_str(x)\
199
+ \ for x in tup)}\"\n if isinstance(tup, (tuple, list))\n else f\"\
200
+ {tup:010d}\"\n if isinstance(tup, int)\n else str(tup)\n )"
201
+ - "def ValueType(self) -> type[Value]:\n \"\"\"The type of the value stored\
202
+ \ in the channel.\"\"\"\n return self.typ"
203
+ - "def validate_autoresponse(cls, v):\n if v is not None and not isinstance(v,\
204
+ \ dict):\n raise TypeError(\"autoresponse must be a dict or None\"\
205
+ )\n return v"
206
+ pipeline_tag: sentence-similarity
207
+ library_name: sentence-transformers
208
+ metrics:
209
+ - cosine_accuracy@1
210
+ - cosine_accuracy@3
211
+ - cosine_accuracy@5
212
+ - cosine_accuracy@10
213
+ - cosine_precision@1
214
+ - cosine_precision@3
215
+ - cosine_precision@5
216
+ - cosine_precision@10
217
+ - cosine_recall@1
218
+ - cosine_recall@3
219
+ - cosine_recall@5
220
+ - cosine_recall@10
221
+ - cosine_ndcg@10
222
+ - cosine_mrr@10
223
+ - cosine_map@100
224
+ model-index:
225
+ - name: codeBert dense retriever
226
+ results:
227
+ - task:
228
+ type: information-retrieval
229
+ name: Information Retrieval
230
+ dataset:
231
+ name: dim 768
232
+ type: dim_768
233
+ metrics:
234
+ - type: cosine_accuracy@1
235
+ value: 0.65
236
+ name: Cosine Accuracy@1
237
+ - type: cosine_accuracy@3
238
+ value: 0.8
239
+ name: Cosine Accuracy@3
240
+ - type: cosine_accuracy@5
241
+ value: 0.85
242
+ name: Cosine Accuracy@5
243
+ - type: cosine_accuracy@10
244
+ value: 1.0
245
+ name: Cosine Accuracy@10
246
+ - type: cosine_precision@1
247
+ value: 0.65
248
+ name: Cosine Precision@1
249
+ - type: cosine_precision@3
250
+ value: 0.2666666666666666
251
+ name: Cosine Precision@3
252
+ - type: cosine_precision@5
253
+ value: 0.17000000000000007
254
+ name: Cosine Precision@5
255
+ - type: cosine_precision@10
256
+ value: 0.10000000000000002
257
+ name: Cosine Precision@10
258
+ - type: cosine_recall@1
259
+ value: 0.65
260
+ name: Cosine Recall@1
261
+ - type: cosine_recall@3
262
+ value: 0.8
263
+ name: Cosine Recall@3
264
+ - type: cosine_recall@5
265
+ value: 0.85
266
+ name: Cosine Recall@5
267
+ - type: cosine_recall@10
268
+ value: 1.0
269
+ name: Cosine Recall@10
270
+ - type: cosine_ndcg@10
271
+ value: 0.8047507161733674
272
+ name: Cosine Ndcg@10
273
+ - type: cosine_mrr@10
274
+ value: 0.7455555555555555
275
+ name: Cosine Mrr@10
276
+ - type: cosine_map@100
277
+ value: 0.7455555555555555
278
+ name: Cosine Map@100
279
+ - task:
280
+ type: information-retrieval
281
+ name: Information Retrieval
282
+ dataset:
283
+ name: dim 512
284
+ type: dim_512
285
+ metrics:
286
+ - type: cosine_accuracy@1
287
+ value: 0.7
288
+ name: Cosine Accuracy@1
289
+ - type: cosine_accuracy@3
290
+ value: 0.75
291
+ name: Cosine Accuracy@3
292
+ - type: cosine_accuracy@5
293
+ value: 0.85
294
+ name: Cosine Accuracy@5
295
+ - type: cosine_accuracy@10
296
+ value: 0.95
297
+ name: Cosine Accuracy@10
298
+ - type: cosine_precision@1
299
+ value: 0.7
300
+ name: Cosine Precision@1
301
+ - type: cosine_precision@3
302
+ value: 0.24999999999999994
303
+ name: Cosine Precision@3
304
+ - type: cosine_precision@5
305
+ value: 0.17000000000000007
306
+ name: Cosine Precision@5
307
+ - type: cosine_precision@10
308
+ value: 0.09500000000000001
309
+ name: Cosine Precision@10
310
+ - type: cosine_recall@1
311
+ value: 0.7
312
+ name: Cosine Recall@1
313
+ - type: cosine_recall@3
314
+ value: 0.75
315
+ name: Cosine Recall@3
316
+ - type: cosine_recall@5
317
+ value: 0.85
318
+ name: Cosine Recall@5
319
+ - type: cosine_recall@10
320
+ value: 0.95
321
+ name: Cosine Recall@10
322
+ - type: cosine_ndcg@10
323
+ value: 0.7959488813947496
324
+ name: Cosine Ndcg@10
325
+ - type: cosine_mrr@10
326
+ value: 0.7499999999999999
327
+ name: Cosine Mrr@10
328
+ - type: cosine_map@100
329
+ value: 0.7545454545454545
330
+ name: Cosine Map@100
331
+ - task:
332
+ type: information-retrieval
333
+ name: Information Retrieval
334
+ dataset:
335
+ name: dim 256
336
+ type: dim_256
337
+ metrics:
338
+ - type: cosine_accuracy@1
339
+ value: 0.65
340
+ name: Cosine Accuracy@1
341
+ - type: cosine_accuracy@3
342
+ value: 0.75
343
+ name: Cosine Accuracy@3
344
+ - type: cosine_accuracy@5
345
+ value: 0.8
346
+ name: Cosine Accuracy@5
347
+ - type: cosine_accuracy@10
348
+ value: 0.95
349
+ name: Cosine Accuracy@10
350
+ - type: cosine_precision@1
351
+ value: 0.65
352
+ name: Cosine Precision@1
353
+ - type: cosine_precision@3
354
+ value: 0.24999999999999994
355
+ name: Cosine Precision@3
356
+ - type: cosine_precision@5
357
+ value: 0.16000000000000006
358
+ name: Cosine Precision@5
359
+ - type: cosine_precision@10
360
+ value: 0.09500000000000001
361
+ name: Cosine Precision@10
362
+ - type: cosine_recall@1
363
+ value: 0.65
364
+ name: Cosine Recall@1
365
+ - type: cosine_recall@3
366
+ value: 0.75
367
+ name: Cosine Recall@3
368
+ - type: cosine_recall@5
369
+ value: 0.8
370
+ name: Cosine Recall@5
371
+ - type: cosine_recall@10
372
+ value: 0.95
373
+ name: Cosine Recall@10
374
+ - type: cosine_ndcg@10
375
+ value: 0.7682506698908595
376
+ name: Cosine Ndcg@10
377
+ - type: cosine_mrr@10
378
+ value: 0.7141666666666666
379
+ name: Cosine Mrr@10
380
+ - type: cosine_map@100
381
+ value: 0.7180128205128204
382
+ name: Cosine Map@100
383
+ - task:
384
+ type: information-retrieval
385
+ name: Information Retrieval
386
+ dataset:
387
+ name: dim 128
388
+ type: dim_128
389
+ metrics:
390
+ - type: cosine_accuracy@1
391
+ value: 0.6
392
+ name: Cosine Accuracy@1
393
+ - type: cosine_accuracy@3
394
+ value: 0.75
395
+ name: Cosine Accuracy@3
396
+ - type: cosine_accuracy@5
397
+ value: 0.9
398
+ name: Cosine Accuracy@5
399
+ - type: cosine_accuracy@10
400
+ value: 0.9
401
+ name: Cosine Accuracy@10
402
+ - type: cosine_precision@1
403
+ value: 0.6
404
+ name: Cosine Precision@1
405
+ - type: cosine_precision@3
406
+ value: 0.24999999999999994
407
+ name: Cosine Precision@3
408
+ - type: cosine_precision@5
409
+ value: 0.18000000000000005
410
+ name: Cosine Precision@5
411
+ - type: cosine_precision@10
412
+ value: 0.09000000000000002
413
+ name: Cosine Precision@10
414
+ - type: cosine_recall@1
415
+ value: 0.6
416
+ name: Cosine Recall@1
417
+ - type: cosine_recall@3
418
+ value: 0.75
419
+ name: Cosine Recall@3
420
+ - type: cosine_recall@5
421
+ value: 0.9
422
+ name: Cosine Recall@5
423
+ - type: cosine_recall@10
424
+ value: 0.9
425
+ name: Cosine Recall@10
426
+ - type: cosine_ndcg@10
427
+ value: 0.7417655963056966
428
+ name: Cosine Ndcg@10
429
+ - type: cosine_mrr@10
430
+ value: 0.6908333333333333
431
+ name: Cosine Mrr@10
432
+ - type: cosine_map@100
433
+ value: 0.6987121212121211
434
+ name: Cosine Map@100
435
+ - task:
436
+ type: information-retrieval
437
+ name: Information Retrieval
438
+ dataset:
439
+ name: dim 64
440
+ type: dim_64
441
+ metrics:
442
+ - type: cosine_accuracy@1
443
+ value: 0.55
444
+ name: Cosine Accuracy@1
445
+ - type: cosine_accuracy@3
446
+ value: 0.7
447
+ name: Cosine Accuracy@3
448
+ - type: cosine_accuracy@5
449
+ value: 0.75
450
+ name: Cosine Accuracy@5
451
+ - type: cosine_accuracy@10
452
+ value: 0.95
453
+ name: Cosine Accuracy@10
454
+ - type: cosine_precision@1
455
+ value: 0.55
456
+ name: Cosine Precision@1
457
+ - type: cosine_precision@3
458
+ value: 0.2333333333333333
459
+ name: Cosine Precision@3
460
+ - type: cosine_precision@5
461
+ value: 0.15000000000000005
462
+ name: Cosine Precision@5
463
+ - type: cosine_precision@10
464
+ value: 0.09500000000000001
465
+ name: Cosine Precision@10
466
+ - type: cosine_recall@1
467
+ value: 0.55
468
+ name: Cosine Recall@1
469
+ - type: cosine_recall@3
470
+ value: 0.7
471
+ name: Cosine Recall@3
472
+ - type: cosine_recall@5
473
+ value: 0.75
474
+ name: Cosine Recall@5
475
+ - type: cosine_recall@10
476
+ value: 0.95
477
+ name: Cosine Recall@10
478
+ - type: cosine_ndcg@10
479
+ value: 0.7155704014087189
480
+ name: Cosine Ndcg@10
481
+ - type: cosine_mrr@10
482
+ value: 0.6454166666666665
483
+ name: Cosine Mrr@10
484
+ - type: cosine_map@100
485
+ value: 0.647202380952381
486
+ name: Cosine Map@100
487
+ ---
488
+
489
+ # codeBert dense retriever
490
+
491
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [shubharuidas/codebert-embed-base-dense-retriever](https://huggingface.co/shubharuidas/codebert-embed-base-dense-retriever). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
492
+
493
+ ## Model Details
494
+
495
+ ### Model Description
496
+ - **Model Type:** Sentence Transformer
497
+ - **Base model:** [shubharuidas/codebert-embed-base-dense-retriever](https://huggingface.co/shubharuidas/codebert-embed-base-dense-retriever) <!-- at revision 9594580ae943039d0b85feb304404f9b2bb203ce -->
498
+ - **Maximum Sequence Length:** 512 tokens
499
+ - **Output Dimensionality:** 768 dimensions
500
+ - **Similarity Function:** Cosine Similarity
501
+ <!-- - **Training Dataset:** Unknown -->
502
+ - **Language:** en
503
+ - **License:** apache-2.0
504
+
505
+ ### Model Sources
506
+
507
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
508
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
509
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
510
+
511
+ ### Full Model Architecture
512
+
513
+ ```
514
+ SentenceTransformer(
515
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'RobertaModel'})
516
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
517
+ )
518
+ ```
519
+
520
+ ## Usage
521
+
522
+ ### Direct Usage (Sentence Transformers)
523
+
524
+ First install the Sentence Transformers library:
525
+
526
+ ```bash
527
+ pip install -U sentence-transformers
528
+ ```
529
+
530
+ Then you can load this model and run inference.
531
+ ```python
532
+ from sentence_transformers import SentenceTransformer
533
+
534
+ # Download from the 🤗 Hub
535
+ model = SentenceTransformer("anaghaj111/codebert-base-code-embed-mrl-langchain-langgraph")
536
+ # Run inference
537
+ sentences = [
538
+ 'Explain the validate_autoresponse logic',
539
+ 'def validate_autoresponse(cls, v):\n if v is not None and not isinstance(v, dict):\n raise TypeError("autoresponse must be a dict or None")\n return v',
540
+ 'def task_path_str(tup: str | int | tuple) -> str:\n """Generate a string representation of the task path."""\n return (\n f"~{\', \'.join(task_path_str(x) for x in tup)}"\n if isinstance(tup, (tuple, list))\n else f"{tup:010d}"\n if isinstance(tup, int)\n else str(tup)\n )',
541
+ ]
542
+ embeddings = model.encode(sentences)
543
+ print(embeddings.shape)
544
+ # [3, 768]
545
+
546
+ # Get the similarity scores for the embeddings
547
+ similarities = model.similarity(embeddings, embeddings)
548
+ print(similarities)
549
+ # tensor([[1.0000, 0.8070, 0.2282],
550
+ # [0.8070, 1.0000, 0.3158],
551
+ # [0.2282, 0.3158, 1.0000]])
552
+ ```
553
+
554
+ <!--
555
+ ### Direct Usage (Transformers)
556
+
557
+ <details><summary>Click to see the direct usage in Transformers</summary>
558
+
559
+ </details>
560
+ -->
561
+
562
+ <!--
563
+ ### Downstream Usage (Sentence Transformers)
564
+
565
+ You can finetune this model on your own dataset.
566
+
567
+ <details><summary>Click to expand</summary>
568
+
569
+ </details>
570
+ -->
571
+
572
+ <!--
573
+ ### Out-of-Scope Use
574
+
575
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
576
+ -->
577
+
578
+ ## Evaluation
579
+
580
+ ### Metrics
581
+
582
+ #### Information Retrieval
583
+
584
+ * Dataset: `dim_768`
585
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
586
+ ```json
587
+ {
588
+ "truncate_dim": 768
589
+ }
590
+ ```
591
+
592
+ | Metric | Value |
593
+ |:--------------------|:-----------|
594
+ | cosine_accuracy@1 | 0.65 |
595
+ | cosine_accuracy@3 | 0.8 |
596
+ | cosine_accuracy@5 | 0.85 |
597
+ | cosine_accuracy@10 | 1.0 |
598
+ | cosine_precision@1 | 0.65 |
599
+ | cosine_precision@3 | 0.2667 |
600
+ | cosine_precision@5 | 0.17 |
601
+ | cosine_precision@10 | 0.1 |
602
+ | cosine_recall@1 | 0.65 |
603
+ | cosine_recall@3 | 0.8 |
604
+ | cosine_recall@5 | 0.85 |
605
+ | cosine_recall@10 | 1.0 |
606
+ | **cosine_ndcg@10** | **0.8048** |
607
+ | cosine_mrr@10 | 0.7456 |
608
+ | cosine_map@100 | 0.7456 |
609
+
610
+ #### Information Retrieval
611
+
612
+ * Dataset: `dim_512`
613
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
614
+ ```json
615
+ {
616
+ "truncate_dim": 512
617
+ }
618
+ ```
619
+
620
+ | Metric | Value |
621
+ |:--------------------|:-----------|
622
+ | cosine_accuracy@1 | 0.7 |
623
+ | cosine_accuracy@3 | 0.75 |
624
+ | cosine_accuracy@5 | 0.85 |
625
+ | cosine_accuracy@10 | 0.95 |
626
+ | cosine_precision@1 | 0.7 |
627
+ | cosine_precision@3 | 0.25 |
628
+ | cosine_precision@5 | 0.17 |
629
+ | cosine_precision@10 | 0.095 |
630
+ | cosine_recall@1 | 0.7 |
631
+ | cosine_recall@3 | 0.75 |
632
+ | cosine_recall@5 | 0.85 |
633
+ | cosine_recall@10 | 0.95 |
634
+ | **cosine_ndcg@10** | **0.7959** |
635
+ | cosine_mrr@10 | 0.75 |
636
+ | cosine_map@100 | 0.7545 |
637
+
638
+ #### Information Retrieval
639
+
640
+ * Dataset: `dim_256`
641
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
642
+ ```json
643
+ {
644
+ "truncate_dim": 256
645
+ }
646
+ ```
647
+
648
+ | Metric | Value |
649
+ |:--------------------|:-----------|
650
+ | cosine_accuracy@1 | 0.65 |
651
+ | cosine_accuracy@3 | 0.75 |
652
+ | cosine_accuracy@5 | 0.8 |
653
+ | cosine_accuracy@10 | 0.95 |
654
+ | cosine_precision@1 | 0.65 |
655
+ | cosine_precision@3 | 0.25 |
656
+ | cosine_precision@5 | 0.16 |
657
+ | cosine_precision@10 | 0.095 |
658
+ | cosine_recall@1 | 0.65 |
659
+ | cosine_recall@3 | 0.75 |
660
+ | cosine_recall@5 | 0.8 |
661
+ | cosine_recall@10 | 0.95 |
662
+ | **cosine_ndcg@10** | **0.7683** |
663
+ | cosine_mrr@10 | 0.7142 |
664
+ | cosine_map@100 | 0.718 |
665
+
666
+ #### Information Retrieval
667
+
668
+ * Dataset: `dim_128`
669
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
670
+ ```json
671
+ {
672
+ "truncate_dim": 128
673
+ }
674
+ ```
675
+
676
+ | Metric | Value |
677
+ |:--------------------|:-----------|
678
+ | cosine_accuracy@1 | 0.6 |
679
+ | cosine_accuracy@3 | 0.75 |
680
+ | cosine_accuracy@5 | 0.9 |
681
+ | cosine_accuracy@10 | 0.9 |
682
+ | cosine_precision@1 | 0.6 |
683
+ | cosine_precision@3 | 0.25 |
684
+ | cosine_precision@5 | 0.18 |
685
+ | cosine_precision@10 | 0.09 |
686
+ | cosine_recall@1 | 0.6 |
687
+ | cosine_recall@3 | 0.75 |
688
+ | cosine_recall@5 | 0.9 |
689
+ | cosine_recall@10 | 0.9 |
690
+ | **cosine_ndcg@10** | **0.7418** |
691
+ | cosine_mrr@10 | 0.6908 |
692
+ | cosine_map@100 | 0.6987 |
693
+
694
+ #### Information Retrieval
695
+
696
+ * Dataset: `dim_64`
697
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
698
+ ```json
699
+ {
700
+ "truncate_dim": 64
701
+ }
702
+ ```
703
+
704
+ | Metric | Value |
705
+ |:--------------------|:-----------|
706
+ | cosine_accuracy@1 | 0.55 |
707
+ | cosine_accuracy@3 | 0.7 |
708
+ | cosine_accuracy@5 | 0.75 |
709
+ | cosine_accuracy@10 | 0.95 |
710
+ | cosine_precision@1 | 0.55 |
711
+ | cosine_precision@3 | 0.2333 |
712
+ | cosine_precision@5 | 0.15 |
713
+ | cosine_precision@10 | 0.095 |
714
+ | cosine_recall@1 | 0.55 |
715
+ | cosine_recall@3 | 0.7 |
716
+ | cosine_recall@5 | 0.75 |
717
+ | cosine_recall@10 | 0.95 |
718
+ | **cosine_ndcg@10** | **0.7156** |
719
+ | cosine_mrr@10 | 0.6454 |
720
+ | cosine_map@100 | 0.6472 |
721
+
722
+ <!--
723
+ ## Bias, Risks and Limitations
724
+
725
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
726
+ -->
727
+
728
+ <!--
729
+ ### Recommendations
730
+
731
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
732
+ -->
733
+
734
+ ## Training Details
735
+
736
+ ### Training Dataset
737
+
738
+ #### Unnamed Dataset
739
+
740
+ * Size: 180 training samples
741
+ * Columns: <code>anchor</code> and <code>positive</code>
742
+ * Approximate statistics based on the first 180 samples:
743
+ | | anchor | positive |
744
+ |:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
745
+ | type | string | string |
746
+ | details | <ul><li>min: 6 tokens</li><li>mean: 14.07 tokens</li><li>max: 354 tokens</li></ul> | <ul><li>min: 14 tokens</li><li>mean: 272.19 tokens</li><li>max: 512 tokens</li></ul> |
747
+ * Samples:
748
+ | anchor | positive |
749
+ |:--------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
750
+ | <code>Best practices for test_search_items</code> | <code>def test_search_items(<br> fake_embeddings: CharacterEmbeddings,<br>) -> None:<br> """Test search_items functionality by calling store methods directly."""<br> base = "test_search_items"<br> test_namespaces = [<br> (base, "documents", "user1"),<br> (base, "documents", "user2"),<br> (base, "reports", "department1"),<br> (base, "reports", "department2"),<br> ]<br> test_items = [<br> {"title": "Doc 1", "author": "John Doe", "tags": ["important"]},<br> {"title": "Doc 2", "author": "Jane Smith", "tags": ["draft"]},<br> {"title": "Report A", "author": "John Doe", "tags": ["final"]},<br> {"title": "Report B", "author": "Alice Johnson", "tags": ["draft"]},<br> ]<br><br> with create_vector_store(<br> fake_embeddings, text_fields=["key0", "key1", "key3"]<br> ) as store:<br> # Insert test data<br> for ns, item in zip(test_namespaces, test_items, strict=False):<br> key = f"item_{ns[-1]}"<br> store.put(ns, key, item)<br><br> # 1. Search documen...</code> |
751
+ | <code>How does async store work in Python?</code> | <code>async def store(request) -> AsyncIterator[AsyncPostgresStore]:<br> database = f"test_{uuid.uuid4().hex[:16]}"<br> uri_parts = DEFAULT_URI.split("/")<br> uri_base = "/".join(uri_parts[:-1])<br> query_params = ""<br> if "?" in uri_parts[-1]:<br> db_name, query_params = uri_parts[-1].split("?", 1)<br> query_params = "?" + query_params<br><br> conn_string = f"{uri_base}/{database}{query_params}"<br> admin_conn_string = DEFAULT_URI<br> ttl_config = {<br> "default_ttl": TTL_MINUTES,<br> "refresh_on_read": True,<br> "sweep_interval_minutes": TTL_MINUTES / 2,<br> }<br> async with await AsyncConnection.connect(<br> admin_conn_string, autocommit=True<br> ) as conn:<br> await conn.execute(f"CREATE DATABASE {database}")<br> try:<br> async with AsyncPostgresStore.from_conn_string(<br> conn_string, ttl=ttl_config<br> ) as store:<br> store.MIGRATIONS = [<br> (<br> mig.replace("ttl_minutes INT;", "ttl_minutes FLOAT;")<br> ...</code> |
752
+ | <code>How to implement list?</code> | <code>def list(<br> self,<br> config: RunnableConfig \| None,<br> *,<br> filter: dict[str, Any] \| None = None,<br> before: RunnableConfig \| None = None,<br> limit: int \| None = None,<br> ) -> Iterator[CheckpointTuple]:<br> """List checkpoints from the database.<br><br> This method retrieves a list of checkpoint tuples from the Postgres database based<br> on the provided config. For ShallowPostgresSaver, this method returns a list with<br> ONLY the most recent checkpoint.<br> """<br> aiter_ = self.alist(config, filter=filter, before=before, limit=limit)<br> while True:<br> try:<br> yield asyncio.run_coroutine_threadsafe(<br> anext(aiter_), # type: ignore[arg-type] # noqa: F821<br> self.loop,<br> ).result()<br> except StopAsyncIteration:<br> break</code> |
753
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
754
+ ```json
755
+ {
756
+ "loss": "MultipleNegativesRankingLoss",
757
+ "matryoshka_dims": [
758
+ 768,
759
+ 512,
760
+ 256,
761
+ 128,
762
+ 64
763
+ ],
764
+ "matryoshka_weights": [
765
+ 1,
766
+ 1,
767
+ 1,
768
+ 1,
769
+ 1
770
+ ],
771
+ "n_dims_per_step": -1
772
+ }
773
+ ```
774
+
775
+ ### Training Hyperparameters
776
+ #### Non-Default Hyperparameters
777
+
778
+ - `eval_strategy`: epoch
779
+ - `per_device_train_batch_size`: 4
780
+ - `per_device_eval_batch_size`: 4
781
+ - `gradient_accumulation_steps`: 16
782
+ - `learning_rate`: 2e-05
783
+ - `num_train_epochs`: 2
784
+ - `lr_scheduler_type`: cosine
785
+ - `warmup_ratio`: 0.1
786
+ - `warmup_steps`: 0.1
787
+ - `fp16`: True
788
+ - `load_best_model_at_end`: True
789
+ - `optim`: adamw_torch
790
+ - `batch_sampler`: no_duplicates
791
+
792
+ #### All Hyperparameters
793
+ <details><summary>Click to expand</summary>
794
+
795
+ - `do_predict`: False
796
+ - `eval_strategy`: epoch
797
+ - `prediction_loss_only`: True
798
+ - `per_device_train_batch_size`: 4
799
+ - `per_device_eval_batch_size`: 4
800
+ - `gradient_accumulation_steps`: 16
801
+ - `eval_accumulation_steps`: None
802
+ - `torch_empty_cache_steps`: None
803
+ - `learning_rate`: 2e-05
804
+ - `weight_decay`: 0.0
805
+ - `adam_beta1`: 0.9
806
+ - `adam_beta2`: 0.999
807
+ - `adam_epsilon`: 1e-08
808
+ - `max_grad_norm`: 1.0
809
+ - `num_train_epochs`: 2
810
+ - `max_steps`: -1
811
+ - `lr_scheduler_type`: cosine
812
+ - `lr_scheduler_kwargs`: None
813
+ - `warmup_ratio`: 0.1
814
+ - `warmup_steps`: 0.1
815
+ - `log_level`: passive
816
+ - `log_level_replica`: warning
817
+ - `log_on_each_node`: True
818
+ - `logging_nan_inf_filter`: True
819
+ - `enable_jit_checkpoint`: False
820
+ - `save_on_each_node`: False
821
+ - `save_only_model`: False
822
+ - `restore_callback_states_from_checkpoint`: False
823
+ - `use_cpu`: False
824
+ - `seed`: 42
825
+ - `data_seed`: None
826
+ - `bf16`: False
827
+ - `fp16`: True
828
+ - `bf16_full_eval`: False
829
+ - `fp16_full_eval`: False
830
+ - `tf32`: None
831
+ - `local_rank`: -1
832
+ - `ddp_backend`: None
833
+ - `debug`: []
834
+ - `dataloader_drop_last`: False
835
+ - `dataloader_num_workers`: 0
836
+ - `dataloader_prefetch_factor`: None
837
+ - `disable_tqdm`: False
838
+ - `remove_unused_columns`: True
839
+ - `label_names`: None
840
+ - `load_best_model_at_end`: True
841
+ - `ignore_data_skip`: False
842
+ - `fsdp`: []
843
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
844
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
845
+ - `parallelism_config`: None
846
+ - `deepspeed`: None
847
+ - `label_smoothing_factor`: 0.0
848
+ - `optim`: adamw_torch
849
+ - `optim_args`: None
850
+ - `group_by_length`: False
851
+ - `length_column_name`: length
852
+ - `project`: huggingface
853
+ - `trackio_space_id`: trackio
854
+ - `ddp_find_unused_parameters`: None
855
+ - `ddp_bucket_cap_mb`: None
856
+ - `ddp_broadcast_buffers`: False
857
+ - `dataloader_pin_memory`: True
858
+ - `dataloader_persistent_workers`: False
859
+ - `skip_memory_metrics`: True
860
+ - `push_to_hub`: False
861
+ - `resume_from_checkpoint`: None
862
+ - `hub_model_id`: None
863
+ - `hub_strategy`: every_save
864
+ - `hub_private_repo`: None
865
+ - `hub_always_push`: False
866
+ - `hub_revision`: None
867
+ - `gradient_checkpointing`: False
868
+ - `gradient_checkpointing_kwargs`: None
869
+ - `include_for_metrics`: []
870
+ - `eval_do_concat_batches`: True
871
+ - `auto_find_batch_size`: False
872
+ - `full_determinism`: False
873
+ - `ddp_timeout`: 1800
874
+ - `torch_compile`: False
875
+ - `torch_compile_backend`: None
876
+ - `torch_compile_mode`: None
877
+ - `include_num_input_tokens_seen`: no
878
+ - `neftune_noise_alpha`: None
879
+ - `optim_target_modules`: None
880
+ - `batch_eval_metrics`: False
881
+ - `eval_on_start`: False
882
+ - `use_liger_kernel`: False
883
+ - `liger_kernel_config`: None
884
+ - `eval_use_gather_object`: False
885
+ - `average_tokens_across_devices`: True
886
+ - `use_cache`: False
887
+ - `prompts`: None
888
+ - `batch_sampler`: no_duplicates
889
+ - `multi_dataset_batch_sampler`: proportional
890
+ - `router_mapping`: {}
891
+ - `learning_rate_mapping`: {}
892
+
893
+ </details>
894
+
895
+ ### Training Logs
896
+ | Epoch | Step | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
897
+ |:-------:|:-----:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|
898
+ | 1.0 | 3 | 0.7612 | 0.7137 | 0.7083 | 0.6926 | 0.6624 |
899
+ | **2.0** | **6** | **0.8048** | **0.7959** | **0.7683** | **0.7418** | **0.7156** |
900
+
901
+ * The bold row denotes the saved checkpoint.
902
+
903
+ ### Framework Versions
904
+ - Python: 3.14.0
905
+ - Sentence Transformers: 5.2.1
906
+ - Transformers: 5.0.0
907
+ - PyTorch: 2.10.0
908
+ - Accelerate: 1.12.0
909
+ - Datasets: 4.5.0
910
+ - Tokenizers: 0.22.2
911
+
912
+ ## Citation
913
+
914
+ ### BibTeX
915
+
916
+ #### Sentence Transformers
917
+ ```bibtex
918
+ @inproceedings{reimers-2019-sentence-bert,
919
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
920
+ author = "Reimers, Nils and Gurevych, Iryna",
921
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
922
+ month = "11",
923
+ year = "2019",
924
+ publisher = "Association for Computational Linguistics",
925
+ url = "https://arxiv.org/abs/1908.10084",
926
+ }
927
+ ```
928
+
929
+ #### MatryoshkaLoss
930
+ ```bibtex
931
+ @misc{kusupati2024matryoshka,
932
+ title={Matryoshka Representation Learning},
933
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
934
+ year={2024},
935
+ eprint={2205.13147},
936
+ archivePrefix={arXiv},
937
+ primaryClass={cs.LG}
938
+ }
939
+ ```
940
+
941
+ #### MultipleNegativesRankingLoss
942
+ ```bibtex
943
+ @misc{henderson2017efficient,
944
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
945
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
946
+ year={2017},
947
+ eprint={1705.00652},
948
+ archivePrefix={arXiv},
949
+ primaryClass={cs.CL}
950
+ }
951
+ ```
952
+
953
+ <!--
954
+ ## Glossary
955
+
956
+ *Clearly define terms in order to be accessible across audiences.*
957
+ -->
958
+
959
+ <!--
960
+ ## Model Card Authors
961
+
962
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
963
+ -->
964
+
965
+ <!--
966
+ ## Model Card Contact
967
+
968
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
969
+ -->
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "RobertaModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "is_decoder": false,
17
+ "layer_norm_eps": 1e-05,
18
+ "max_position_embeddings": 514,
19
+ "model_type": "roberta",
20
+ "num_attention_heads": 12,
21
+ "num_hidden_layers": 12,
22
+ "output_past": true,
23
+ "pad_token_id": 1,
24
+ "position_embedding_type": "absolute",
25
+ "tie_word_embeddings": true,
26
+ "transformers_version": "5.0.0",
27
+ "type_vocab_size": 1,
28
+ "use_cache": true,
29
+ "vocab_size": 50265
30
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.2.1",
5
+ "transformers": "5.0.0",
6
+ "pytorch": "2.10.0"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:180fd754acf3876c3ede6d2ba40e6ba3eaecee1115d82481953408051451d3ca
3
+ size 498604880
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "cls_token": "<s>",
7
+ "eos_token": "</s>",
8
+ "errors": "replace",
9
+ "is_local": false,
10
+ "mask_token": "<mask>",
11
+ "model_max_length": 512,
12
+ "model_specific_special_tokens": {},
13
+ "pad_token": "<pad>",
14
+ "sep_token": "</s>",
15
+ "tokenizer_class": "RobertaTokenizer",
16
+ "trim_offsets": true,
17
+ "unk_token": "<unk>"
18
+ }