itsanan commited on
Commit
28ae705
·
verified ·
1 Parent(s): 68e2da0

Add new SentenceTransformer model

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,925 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - dense
10
+ - generated_from_trainer
11
+ - dataset_size:900
12
+ - loss:MatryoshkaLoss
13
+ - loss:MultipleNegativesRankingLoss
14
+ base_model: microsoft/codebert-base
15
+ widget:
16
+ - source_sentence: How to implement __del__?
17
+ sentences:
18
+ - "class SampleMultiCrewFlow(Flow[SimpleState]):\n @start()\n def\
19
+ \ first_crew(self):\n \"\"\"Run first crew.\"\"\"\n agent\
20
+ \ = Agent(\n role=\"first agent\",\n goal=\"first\
21
+ \ task\",\n backstory=\"first agent\",\n llm=mock_llm_1,\n\
22
+ \ )\n task = Task(\n description=\"First\
23
+ \ task\",\n expected_output=\"first result\",\n \
24
+ \ agent=agent,\n )\n crew = Crew(\n agents=[agent],\n\
25
+ \ tasks=[task],\n share_crew=True,\n \
26
+ \ )\n\n result = crew.kickoff()\n\n assert crew._execution_span\
27
+ \ is not None\n return str(result.raw)\n\n @listen(first_crew)\n\
28
+ \ def second_crew(self, first_result: str):\n \"\"\"Run second\
29
+ \ crew.\"\"\"\n agent = Agent(\n role=\"second agent\"\
30
+ ,\n goal=\"second task\",\n backstory=\"second agent\"\
31
+ ,\n llm=mock_llm_2,\n )\n task = Task(\n\
32
+ \ description=\"Second task\",\n expected_output=\"\
33
+ second result\",\n agent=agent,\n )\n crew\
34
+ \ = Crew(\n agents=[agent],\n tasks=[task],\n \
35
+ \ share_crew=True,\n )\n\n result = crew.kickoff()\n\
36
+ \n assert crew._execution_span is not None\n\n self.state.result\
37
+ \ = f\"{first_result} + {result.raw}\"\n return self.state.result"
38
+ - "async def test_anthropic_async_with_tools():\n \"\"\"Test async call with\
39
+ \ tools.\"\"\"\n llm = AnthropicCompletion(model=\"claude-sonnet-4-0\")\n\n\
40
+ \ tools = [\n {\n \"type\": \"function\",\n \"\
41
+ function\": {\n \"name\": \"get_weather\",\n \"\
42
+ description\": \"Get the current weather for a location\",\n \"\
43
+ parameters\": {\n \"type\": \"object\",\n \
44
+ \ \"properties\": {\n \"location\": {\n \
45
+ \ \"type\": \"string\",\n \"description\"\
46
+ : \"The city and state, e.g. San Francisco, CA\"\n }\n\
47
+ \ },\n \"required\": [\"location\"]\n \
48
+ \ }\n }\n }\n ]\n\n result = await llm.acall(\n\
49
+ \ \"What's the weather in San Francisco?\",\n tools=tools\n )\n\
50
+ \ logging.debug(\"result: %s\", result)\n\n assert result is not None\n\
51
+ \ assert isinstance(result, str)"
52
+ - "def __del__(self):\n \"\"\"Cleanup connections on deletion.\"\"\"\n \
53
+ \ try:\n if self._connection_pool:\n for conn in\
54
+ \ self._connection_pool:\n try:\n conn.close()\n\
55
+ \ except Exception: # noqa: PERF203, S110\n \
56
+ \ pass\n if self._thread_pool:\n self._thread_pool.shutdown()\n\
57
+ \ except Exception: # noqa: S110\n pass"
58
+ - source_sentence: How does route_to_cycle work in Python?
59
+ sentences:
60
+ - "def route_to_cycle(self):\n execution_log.append(\"router_initial\"\
61
+ )\n return \"loop\""
62
+ - "def _register_system_event_handlers(self, event_bus: CrewAIEventsBus) -> None:\n\
63
+ \ \"\"\"Register handlers for system signal events (SIGTERM, SIGINT, etc.).\"\
64
+ \"\"\n\n @on_signal\n def handle_signal(source: Any, event: SignalEvent)\
65
+ \ -> None:\n \"\"\"Flush trace batch on system signals to prevent data\
66
+ \ loss.\"\"\"\n if self.batch_manager.is_batch_initialized():\n \
67
+ \ self.batch_manager.finalize_batch()"
68
+ - "async def aadd(self) -> None:\n \"\"\"Add JSON file content asynchronously.\"\
69
+ \"\"\n content_str = (\n str(self.content) if isinstance(self.content,\
70
+ \ dict) else self.content\n )\n new_chunks = self._chunk_text(content_str)\n\
71
+ \ self.chunks.extend(new_chunks)\n await self._asave_documents()"
72
+ - source_sentence: Explain the test_evaluate logic
73
+ sentences:
74
+ - "def test_flow_copy_state_with_unpickleable_objects():\n \"\"\"Test that _copy_state\
75
+ \ handles unpickleable objects like RLock.\n\n Regression test for issue #3828:\
76
+ \ Flow should not crash when state contains\n objects that cannot be deep copied\
77
+ \ (like threading.RLock).\n \"\"\"\n\n class StateWithRLock(BaseModel):\n\
78
+ \ counter: int = 0\n lock: Optional[threading.RLock] = None\n\n\
79
+ \ class FlowWithRLock(Flow[StateWithRLock]):\n @start()\n def\
80
+ \ step_1(self):\n self.state.counter += 1\n\n @listen(step_1)\n\
81
+ \ def step_2(self):\n self.state.counter += 1\n\n flow =\
82
+ \ FlowWithRLock(initial_state=StateWithRLock())\n flow._state.lock = threading.RLock()\n\
83
+ \n copied_state = flow._copy_state()\n assert copied_state.counter == 0\n\
84
+ \ assert copied_state.lock is not None"
85
+ - "def test_evaluate(self, crew_planner):\n task_output = TaskOutput(\n \
86
+ \ description=\"Task 1\", agent=str(crew_planner.crew.agents[0])\n \
87
+ \ )\n\n with mock.patch.object(Task, \"execute_sync\") as execute:\n\
88
+ \ execute().pydantic = TaskEvaluationPydanticOutput(quality=9.5)\n\
89
+ \ crew_planner.evaluate(task_output)\n assert crew_planner.tasks_scores[0]\
90
+ \ == [9.5]"
91
+ - "class SlowAsyncTool(BaseTool):\n name: str = \"slow_async\"\n \
92
+ \ description: str = \"Simulates slow I/O\"\n\n def _run(self,\
93
+ \ task_id: int, delay: float) -> str:\n return f\"Task {task_id}\
94
+ \ done\"\n\n async def _arun(self, task_id: int, delay: float) -> str:\n\
95
+ \ await asyncio.sleep(delay)\n return f\"Task {task_id}\
96
+ \ done\""
97
+ - source_sentence: Explain the test_clean_action_no_formatting logic
98
+ sentences:
99
+ - "def test_task_interpolation_with_hyphens():\n agent = Agent(\n role=\"\
100
+ Researcher\",\n goal=\"be an assistant that responds with {interpolation-with-hyphens}\"\
101
+ ,\n backstory=\"You're an expert researcher, specialized in technology,\
102
+ \ software engineering, AI and startups. You work as a freelancer and is now working\
103
+ \ on doing research and analysis for a new customer.\",\n allow_delegation=False,\n\
104
+ \ )\n task = Task(\n description=\"be an assistant that responds\
105
+ \ with {interpolation-with-hyphens}\",\n expected_output=\"The response\
106
+ \ should be addressing: {interpolation-with-hyphens}\",\n agent=agent,\n\
107
+ \ )\n crew = Crew(\n agents=[agent],\n tasks=[task],\n \
108
+ \ verbose=True,\n )\n result = crew.kickoff(inputs={\"interpolation-with-hyphens\"\
109
+ : \"say hello world\"})\n assert \"say hello world\" in task.prompt()\n\n \
110
+ \ assert result.raw == \"Hello, World!\""
111
+ - "class LLMCallCompletedEvent(LLMEventBase):\n \"\"\"Event emitted when a LLM\
112
+ \ call completes\"\"\"\n\n type: str = \"llm_call_completed\"\n messages:\
113
+ \ str | list[dict[str, Any]] | None = None\n response: Any\n call_type:\
114
+ \ LLMCallType\n model: str | None = None"
115
+ - "def test_clean_action_no_formatting():\n action = \"Ask question to senior\
116
+ \ researcher\"\n cleaned_action = parser._clean_action(action)\n assert\
117
+ \ cleaned_action == \"Ask question to senior researcher\""
118
+ - source_sentence: Example usage of test_status_code_and_content_type
119
+ sentences:
120
+ - "class NavigateBackToolInput(BaseModel):\n \"\"\"Input for NavigateBackTool.\"\
121
+ \"\"\n\n thread_id: str = Field(\n default=\"default\", description=\"\
122
+ Thread ID for the browser session\"\n )"
123
+ - "def test_status_code_and_content_type(self, mock_bs, mock_get):\n for\
124
+ \ status in [200, 201, 301]:\n mock_get.return_value = self.setup_mock_response(\n\
125
+ \ f\"<html><body>Status {status}</body></html>\", status_code=status\n\
126
+ \ )\n mock_bs.return_value = self.setup_mock_soup(f\"Status\
127
+ \ {status}\")\n result = WebPageLoader().load(\n SourceContent(f\"\
128
+ https://example.com/{status}\")\n )\n assert result.metadata[\"\
129
+ status_code\"] == status\n\n for ctype in [\"text/html\", \"text/plain\"\
130
+ , \"application/xhtml+xml\"]:\n mock_get.return_value = self.setup_mock_response(\n\
131
+ \ \"<html><body>Content</body></html>\", content_type=ctype\n \
132
+ \ )\n mock_bs.return_value = self.setup_mock_soup(\"Content\"\
133
+ )\n result = WebPageLoader().load(SourceContent(\"https://example.com\"\
134
+ ))\n assert result.metadata[\"content_type\"] == ctype"
135
+ - "def set_crew(self, crew: Any) -> Memory:\n \"\"\"Set the crew for this\
136
+ \ memory instance.\"\"\"\n self.crew = crew\n return self"
137
+ pipeline_tag: sentence-similarity
138
+ library_name: sentence-transformers
139
+ metrics:
140
+ - cosine_accuracy@1
141
+ - cosine_accuracy@3
142
+ - cosine_accuracy@5
143
+ - cosine_accuracy@10
144
+ - cosine_precision@1
145
+ - cosine_precision@3
146
+ - cosine_precision@5
147
+ - cosine_precision@10
148
+ - cosine_recall@1
149
+ - cosine_recall@3
150
+ - cosine_recall@5
151
+ - cosine_recall@10
152
+ - cosine_ndcg@10
153
+ - cosine_mrr@10
154
+ - cosine_map@100
155
+ model-index:
156
+ - name: CodeBERT Fine-tuned on CrewAI (LR=2e-05)
157
+ results:
158
+ - task:
159
+ type: information-retrieval
160
+ name: Information Retrieval
161
+ dataset:
162
+ name: dim 768
163
+ type: dim_768
164
+ metrics:
165
+ - type: cosine_accuracy@1
166
+ value: 0.04
167
+ name: Cosine Accuracy@1
168
+ - type: cosine_accuracy@3
169
+ value: 0.04
170
+ name: Cosine Accuracy@3
171
+ - type: cosine_accuracy@5
172
+ value: 0.04
173
+ name: Cosine Accuracy@5
174
+ - type: cosine_accuracy@10
175
+ value: 0.06
176
+ name: Cosine Accuracy@10
177
+ - type: cosine_precision@1
178
+ value: 0.04
179
+ name: Cosine Precision@1
180
+ - type: cosine_precision@3
181
+ value: 0.04
182
+ name: Cosine Precision@3
183
+ - type: cosine_precision@5
184
+ value: 0.04
185
+ name: Cosine Precision@5
186
+ - type: cosine_precision@10
187
+ value: 0.03
188
+ name: Cosine Precision@10
189
+ - type: cosine_recall@1
190
+ value: 0.008
191
+ name: Cosine Recall@1
192
+ - type: cosine_recall@3
193
+ value: 0.024
194
+ name: Cosine Recall@3
195
+ - type: cosine_recall@5
196
+ value: 0.04
197
+ name: Cosine Recall@5
198
+ - type: cosine_recall@10
199
+ value: 0.06
200
+ name: Cosine Recall@10
201
+ - type: cosine_ndcg@10
202
+ value: 0.050819890355577976
203
+ name: Cosine Ndcg@10
204
+ - type: cosine_mrr@10
205
+ value: 0.04333333333333334
206
+ name: Cosine Mrr@10
207
+ - type: cosine_map@100
208
+ value: 0.06130275691848844
209
+ name: Cosine Map@100
210
+ - task:
211
+ type: information-retrieval
212
+ name: Information Retrieval
213
+ dataset:
214
+ name: dim 512
215
+ type: dim_512
216
+ metrics:
217
+ - type: cosine_accuracy@1
218
+ value: 0.01
219
+ name: Cosine Accuracy@1
220
+ - type: cosine_accuracy@3
221
+ value: 0.01
222
+ name: Cosine Accuracy@3
223
+ - type: cosine_accuracy@5
224
+ value: 0.01
225
+ name: Cosine Accuracy@5
226
+ - type: cosine_accuracy@10
227
+ value: 0.01
228
+ name: Cosine Accuracy@10
229
+ - type: cosine_precision@1
230
+ value: 0.01
231
+ name: Cosine Precision@1
232
+ - type: cosine_precision@3
233
+ value: 0.01
234
+ name: Cosine Precision@3
235
+ - type: cosine_precision@5
236
+ value: 0.01
237
+ name: Cosine Precision@5
238
+ - type: cosine_precision@10
239
+ value: 0.005
240
+ name: Cosine Precision@10
241
+ - type: cosine_recall@1
242
+ value: 0.002
243
+ name: Cosine Recall@1
244
+ - type: cosine_recall@3
245
+ value: 0.006
246
+ name: Cosine Recall@3
247
+ - type: cosine_recall@5
248
+ value: 0.01
249
+ name: Cosine Recall@5
250
+ - type: cosine_recall@10
251
+ value: 0.01
252
+ name: Cosine Recall@10
253
+ - type: cosine_ndcg@10
254
+ value: 0.01
255
+ name: Cosine Ndcg@10
256
+ - type: cosine_mrr@10
257
+ value: 0.01
258
+ name: Cosine Mrr@10
259
+ - type: cosine_map@100
260
+ value: 0.019316331411936505
261
+ name: Cosine Map@100
262
+ - task:
263
+ type: information-retrieval
264
+ name: Information Retrieval
265
+ dataset:
266
+ name: dim 256
267
+ type: dim_256
268
+ metrics:
269
+ - type: cosine_accuracy@1
270
+ value: 0.01
271
+ name: Cosine Accuracy@1
272
+ - type: cosine_accuracy@3
273
+ value: 0.01
274
+ name: Cosine Accuracy@3
275
+ - type: cosine_accuracy@5
276
+ value: 0.01
277
+ name: Cosine Accuracy@5
278
+ - type: cosine_accuracy@10
279
+ value: 0.03
280
+ name: Cosine Accuracy@10
281
+ - type: cosine_precision@1
282
+ value: 0.01
283
+ name: Cosine Precision@1
284
+ - type: cosine_precision@3
285
+ value: 0.01
286
+ name: Cosine Precision@3
287
+ - type: cosine_precision@5
288
+ value: 0.01
289
+ name: Cosine Precision@5
290
+ - type: cosine_precision@10
291
+ value: 0.015
292
+ name: Cosine Precision@10
293
+ - type: cosine_recall@1
294
+ value: 0.002
295
+ name: Cosine Recall@1
296
+ - type: cosine_recall@3
297
+ value: 0.006
298
+ name: Cosine Recall@3
299
+ - type: cosine_recall@5
300
+ value: 0.01
301
+ name: Cosine Recall@5
302
+ - type: cosine_recall@10
303
+ value: 0.03
304
+ name: Cosine Recall@10
305
+ - type: cosine_ndcg@10
306
+ value: 0.020819890355577977
307
+ name: Cosine Ndcg@10
308
+ - type: cosine_mrr@10
309
+ value: 0.013333333333333334
310
+ name: Cosine Mrr@10
311
+ - type: cosine_map@100
312
+ value: 0.028978936077832484
313
+ name: Cosine Map@100
314
+ - task:
315
+ type: information-retrieval
316
+ name: Information Retrieval
317
+ dataset:
318
+ name: dim 128
319
+ type: dim_128
320
+ metrics:
321
+ - type: cosine_accuracy@1
322
+ value: 0.01
323
+ name: Cosine Accuracy@1
324
+ - type: cosine_accuracy@3
325
+ value: 0.01
326
+ name: Cosine Accuracy@3
327
+ - type: cosine_accuracy@5
328
+ value: 0.01
329
+ name: Cosine Accuracy@5
330
+ - type: cosine_accuracy@10
331
+ value: 0.01
332
+ name: Cosine Accuracy@10
333
+ - type: cosine_precision@1
334
+ value: 0.01
335
+ name: Cosine Precision@1
336
+ - type: cosine_precision@3
337
+ value: 0.01
338
+ name: Cosine Precision@3
339
+ - type: cosine_precision@5
340
+ value: 0.01
341
+ name: Cosine Precision@5
342
+ - type: cosine_precision@10
343
+ value: 0.005
344
+ name: Cosine Precision@10
345
+ - type: cosine_recall@1
346
+ value: 0.002
347
+ name: Cosine Recall@1
348
+ - type: cosine_recall@3
349
+ value: 0.006
350
+ name: Cosine Recall@3
351
+ - type: cosine_recall@5
352
+ value: 0.01
353
+ name: Cosine Recall@5
354
+ - type: cosine_recall@10
355
+ value: 0.01
356
+ name: Cosine Recall@10
357
+ - type: cosine_ndcg@10
358
+ value: 0.01
359
+ name: Cosine Ndcg@10
360
+ - type: cosine_mrr@10
361
+ value: 0.01
362
+ name: Cosine Mrr@10
363
+ - type: cosine_map@100
364
+ value: 0.027544667112101906
365
+ name: Cosine Map@100
366
+ - task:
367
+ type: information-retrieval
368
+ name: Information Retrieval
369
+ dataset:
370
+ name: dim 64
371
+ type: dim_64
372
+ metrics:
373
+ - type: cosine_accuracy@1
374
+ value: 0.05
375
+ name: Cosine Accuracy@1
376
+ - type: cosine_accuracy@3
377
+ value: 0.05
378
+ name: Cosine Accuracy@3
379
+ - type: cosine_accuracy@5
380
+ value: 0.05
381
+ name: Cosine Accuracy@5
382
+ - type: cosine_accuracy@10
383
+ value: 0.07
384
+ name: Cosine Accuracy@10
385
+ - type: cosine_precision@1
386
+ value: 0.05
387
+ name: Cosine Precision@1
388
+ - type: cosine_precision@3
389
+ value: 0.05
390
+ name: Cosine Precision@3
391
+ - type: cosine_precision@5
392
+ value: 0.05
393
+ name: Cosine Precision@5
394
+ - type: cosine_precision@10
395
+ value: 0.035
396
+ name: Cosine Precision@10
397
+ - type: cosine_recall@1
398
+ value: 0.01
399
+ name: Cosine Recall@1
400
+ - type: cosine_recall@3
401
+ value: 0.03
402
+ name: Cosine Recall@3
403
+ - type: cosine_recall@5
404
+ value: 0.05
405
+ name: Cosine Recall@5
406
+ - type: cosine_recall@10
407
+ value: 0.07
408
+ name: Cosine Recall@10
409
+ - type: cosine_ndcg@10
410
+ value: 0.06081989035557797
411
+ name: Cosine Ndcg@10
412
+ - type: cosine_mrr@10
413
+ value: 0.05333333333333334
414
+ name: Cosine Mrr@10
415
+ - type: cosine_map@100
416
+ value: 0.0838507480466874
417
+ name: Cosine Map@100
418
+ ---
419
+
420
+ # CodeBERT Fine-tuned on CrewAI (LR=2e-05)
421
+
422
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/codebert-base](https://huggingface.co/microsoft/codebert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
423
+
424
+ ## Model Details
425
+
426
+ ### Model Description
427
+ - **Model Type:** Sentence Transformer
428
+ - **Base model:** [microsoft/codebert-base](https://huggingface.co/microsoft/codebert-base) <!-- at revision 3b0952feddeffad0063f274080e3c23d75e7eb39 -->
429
+ - **Maximum Sequence Length:** 512 tokens
430
+ - **Output Dimensionality:** 768 dimensions
431
+ - **Similarity Function:** Cosine Similarity
432
+ <!-- - **Training Dataset:** Unknown -->
433
+ - **Language:** en
434
+ - **License:** apache-2.0
435
+
436
+ ### Model Sources
437
+
438
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
439
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
440
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
441
+
442
+ ### Full Model Architecture
443
+
444
+ ```
445
+ SentenceTransformer(
446
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'RobertaModel'})
447
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
448
+ )
449
+ ```
450
+
451
+ ## Usage
452
+
453
+ ### Direct Usage (Sentence Transformers)
454
+
455
+ First install the Sentence Transformers library:
456
+
457
+ ```bash
458
+ pip install -U sentence-transformers
459
+ ```
460
+
461
+ Then you can load this model and run inference.
462
+ ```python
463
+ from sentence_transformers import SentenceTransformer
464
+
465
+ # Download from the 🤗 Hub
466
+ model = SentenceTransformer("itsanan/codebert-finetuned-crewai-base")
467
+ # Run inference
468
+ sentences = [
469
+ 'Example usage of test_status_code_and_content_type',
470
+ 'def test_status_code_and_content_type(self, mock_bs, mock_get):\n for status in [200, 201, 301]:\n mock_get.return_value = self.setup_mock_response(\n f"<html><body>Status {status}</body></html>", status_code=status\n )\n mock_bs.return_value = self.setup_mock_soup(f"Status {status}")\n result = WebPageLoader().load(\n SourceContent(f"https://example.com/{status}")\n )\n assert result.metadata["status_code"] == status\n\n for ctype in ["text/html", "text/plain", "application/xhtml+xml"]:\n mock_get.return_value = self.setup_mock_response(\n "<html><body>Content</body></html>", content_type=ctype\n )\n mock_bs.return_value = self.setup_mock_soup("Content")\n result = WebPageLoader().load(SourceContent("https://example.com"))\n assert result.metadata["content_type"] == ctype',
471
+ 'def set_crew(self, crew: Any) -> Memory:\n """Set the crew for this memory instance."""\n self.crew = crew\n return self',
472
+ ]
473
+ embeddings = model.encode(sentences)
474
+ print(embeddings.shape)
475
+ # [3, 768]
476
+
477
+ # Get the similarity scores for the embeddings
478
+ similarities = model.similarity(embeddings, embeddings)
479
+ print(similarities)
480
+ # tensor([[1.0000, 0.9009, 0.9087],
481
+ # [0.9009, 1.0000, 0.9053],
482
+ # [0.9087, 0.9053, 1.0000]])
483
+ ```
484
+
485
+ <!--
486
+ ### Direct Usage (Transformers)
487
+
488
+ <details><summary>Click to see the direct usage in Transformers</summary>
489
+
490
+ </details>
491
+ -->
492
+
493
+ <!--
494
+ ### Downstream Usage (Sentence Transformers)
495
+
496
+ You can finetune this model on your own dataset.
497
+
498
+ <details><summary>Click to expand</summary>
499
+
500
+ </details>
501
+ -->
502
+
503
+ <!--
504
+ ### Out-of-Scope Use
505
+
506
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
507
+ -->
508
+
509
+ ## Evaluation
510
+
511
+ ### Metrics
512
+
513
+ #### Information Retrieval
514
+
515
+ * Dataset: `dim_768`
516
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
517
+ ```json
518
+ {
519
+ "truncate_dim": 768
520
+ }
521
+ ```
522
+
523
+ | Metric | Value |
524
+ |:--------------------|:-----------|
525
+ | cosine_accuracy@1 | 0.04 |
526
+ | cosine_accuracy@3 | 0.04 |
527
+ | cosine_accuracy@5 | 0.04 |
528
+ | cosine_accuracy@10 | 0.06 |
529
+ | cosine_precision@1 | 0.04 |
530
+ | cosine_precision@3 | 0.04 |
531
+ | cosine_precision@5 | 0.04 |
532
+ | cosine_precision@10 | 0.03 |
533
+ | cosine_recall@1 | 0.008 |
534
+ | cosine_recall@3 | 0.024 |
535
+ | cosine_recall@5 | 0.04 |
536
+ | cosine_recall@10 | 0.06 |
537
+ | **cosine_ndcg@10** | **0.0508** |
538
+ | cosine_mrr@10 | 0.0433 |
539
+ | cosine_map@100 | 0.0613 |
540
+
541
+ #### Information Retrieval
542
+
543
+ * Dataset: `dim_512`
544
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
545
+ ```json
546
+ {
547
+ "truncate_dim": 512
548
+ }
549
+ ```
550
+
551
+ | Metric | Value |
552
+ |:--------------------|:---------|
553
+ | cosine_accuracy@1 | 0.01 |
554
+ | cosine_accuracy@3 | 0.01 |
555
+ | cosine_accuracy@5 | 0.01 |
556
+ | cosine_accuracy@10 | 0.01 |
557
+ | cosine_precision@1 | 0.01 |
558
+ | cosine_precision@3 | 0.01 |
559
+ | cosine_precision@5 | 0.01 |
560
+ | cosine_precision@10 | 0.005 |
561
+ | cosine_recall@1 | 0.002 |
562
+ | cosine_recall@3 | 0.006 |
563
+ | cosine_recall@5 | 0.01 |
564
+ | cosine_recall@10 | 0.01 |
565
+ | **cosine_ndcg@10** | **0.01** |
566
+ | cosine_mrr@10 | 0.01 |
567
+ | cosine_map@100 | 0.0193 |
568
+
569
+ #### Information Retrieval
570
+
571
+ * Dataset: `dim_256`
572
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
573
+ ```json
574
+ {
575
+ "truncate_dim": 256
576
+ }
577
+ ```
578
+
579
+ | Metric | Value |
580
+ |:--------------------|:-----------|
581
+ | cosine_accuracy@1 | 0.01 |
582
+ | cosine_accuracy@3 | 0.01 |
583
+ | cosine_accuracy@5 | 0.01 |
584
+ | cosine_accuracy@10 | 0.03 |
585
+ | cosine_precision@1 | 0.01 |
586
+ | cosine_precision@3 | 0.01 |
587
+ | cosine_precision@5 | 0.01 |
588
+ | cosine_precision@10 | 0.015 |
589
+ | cosine_recall@1 | 0.002 |
590
+ | cosine_recall@3 | 0.006 |
591
+ | cosine_recall@5 | 0.01 |
592
+ | cosine_recall@10 | 0.03 |
593
+ | **cosine_ndcg@10** | **0.0208** |
594
+ | cosine_mrr@10 | 0.0133 |
595
+ | cosine_map@100 | 0.029 |
596
+
597
+ #### Information Retrieval
598
+
599
+ * Dataset: `dim_128`
600
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
601
+ ```json
602
+ {
603
+ "truncate_dim": 128
604
+ }
605
+ ```
606
+
607
+ | Metric | Value |
608
+ |:--------------------|:---------|
609
+ | cosine_accuracy@1 | 0.01 |
610
+ | cosine_accuracy@3 | 0.01 |
611
+ | cosine_accuracy@5 | 0.01 |
612
+ | cosine_accuracy@10 | 0.01 |
613
+ | cosine_precision@1 | 0.01 |
614
+ | cosine_precision@3 | 0.01 |
615
+ | cosine_precision@5 | 0.01 |
616
+ | cosine_precision@10 | 0.005 |
617
+ | cosine_recall@1 | 0.002 |
618
+ | cosine_recall@3 | 0.006 |
619
+ | cosine_recall@5 | 0.01 |
620
+ | cosine_recall@10 | 0.01 |
621
+ | **cosine_ndcg@10** | **0.01** |
622
+ | cosine_mrr@10 | 0.01 |
623
+ | cosine_map@100 | 0.0275 |
624
+
625
+ #### Information Retrieval
626
+
627
+ * Dataset: `dim_64`
628
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
629
+ ```json
630
+ {
631
+ "truncate_dim": 64
632
+ }
633
+ ```
634
+
635
+ | Metric | Value |
636
+ |:--------------------|:-----------|
637
+ | cosine_accuracy@1 | 0.05 |
638
+ | cosine_accuracy@3 | 0.05 |
639
+ | cosine_accuracy@5 | 0.05 |
640
+ | cosine_accuracy@10 | 0.07 |
641
+ | cosine_precision@1 | 0.05 |
642
+ | cosine_precision@3 | 0.05 |
643
+ | cosine_precision@5 | 0.05 |
644
+ | cosine_precision@10 | 0.035 |
645
+ | cosine_recall@1 | 0.01 |
646
+ | cosine_recall@3 | 0.03 |
647
+ | cosine_recall@5 | 0.05 |
648
+ | cosine_recall@10 | 0.07 |
649
+ | **cosine_ndcg@10** | **0.0608** |
650
+ | cosine_mrr@10 | 0.0533 |
651
+ | cosine_map@100 | 0.0839 |
652
+
653
+ <!--
654
+ ## Bias, Risks and Limitations
655
+
656
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
657
+ -->
658
+
659
+ <!--
660
+ ### Recommendations
661
+
662
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
663
+ -->
664
+
665
+ ## Training Details
666
+
667
+ ### Training Dataset
668
+
669
+ #### Unnamed Dataset
670
+
671
+ * Size: 900 training samples
672
+ * Columns: <code>anchor</code> and <code>positive</code>
673
+ * Approximate statistics based on the first 900 samples:
674
+ | | anchor | positive |
675
+ |:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
676
+ | type | string | string |
677
+ | details | <ul><li>min: 6 tokens</li><li>mean: 13.86 tokens</li><li>max: 141 tokens</li></ul> | <ul><li>min: 20 tokens</li><li>mean: 253.07 tokens</li><li>max: 512 tokens</li></ul> |
678
+ * Samples:
679
+ | anchor | positive |
680
+ |:-------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
681
+ | <code>How to implement LLMCallCompletedEvent?</code> | <code>class LLMCallCompletedEvent(LLMEventBase):<br> """Event emitted when a LLM call completes"""<br><br> type: str = "llm_call_completed"<br> messages: str \| list[dict[str, Any]] \| None = None<br> response: Any<br> call_type: LLMCallType<br> model: str \| None = None</code> |
682
+ | <code>How does get_llm_response work in Python?</code> | <code>def get_llm_response(<br> llm: LLM \| BaseLLM,<br> messages: list[LLMMessage],<br> callbacks: list[TokenCalcHandler],<br> printer: Printer,<br> from_task: Task \| None = None,<br> from_agent: Agent \| LiteAgent \| None = None,<br> response_model: type[BaseModel] \| None = None,<br> executor_context: CrewAgentExecutor \| LiteAgent \| None = None,<br>) -> str:<br> """Call the LLM and return the response, handling any invalid responses.<br><br> Args:<br> llm: The LLM instance to call.<br> messages: The messages to send to the LLM.<br> callbacks: List of callbacks for the LLM call.<br> printer: Printer instance for output.<br> from_task: Optional task context for the LLM call.<br> from_agent: Optional agent context for the LLM call.<br> response_model: Optional Pydantic model for structured outputs.<br> executor_context: Optional executor context for hook invocation.<br><br> Returns:<br> The response from the LLM as a string.<br><br> Raises:<br> Exception: If an error ...</code> |
683
+ | <code>Example usage of _run</code> | <code>def _run(<br> self,<br> **kwargs: Any,<br> ) -> Any:<br> website_url: str \| None = kwargs.get("website_url", self.website_url)<br> if website_url is None:<br> raise ValueError("Website URL must be provided.")<br><br> page = requests.get(<br> website_url,<br> timeout=15,<br> headers=self.headers,<br> cookies=self.cookies if self.cookies else {},<br> )<br><br> page.encoding = page.apparent_encoding<br> parsed = BeautifulSoup(page.text, "html.parser")<br><br> text = "The following text is scraped website content:\n\n"<br> text += parsed.get_text(" ")<br> text = re.sub("[ \t]+", " ", text)<br> return re.sub("\\s+\n\\s+", "\n", text)</code> |
684
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
685
+ ```json
686
+ {
687
+ "loss": "MultipleNegativesRankingLoss",
688
+ "matryoshka_dims": [
689
+ 768,
690
+ 512,
691
+ 256,
692
+ 128,
693
+ 64
694
+ ],
695
+ "matryoshka_weights": [
696
+ 1,
697
+ 1,
698
+ 1,
699
+ 1,
700
+ 1
701
+ ],
702
+ "n_dims_per_step": -1
703
+ }
704
+ ```
705
+
706
+ ### Training Hyperparameters
707
+ #### Non-Default Hyperparameters
708
+
709
+ - `eval_strategy`: steps
710
+ - `per_device_train_batch_size`: 4
711
+ - `per_device_eval_batch_size`: 4
712
+ - `gradient_accumulation_steps`: 32
713
+ - `learning_rate`: 2e-05
714
+ - `weight_decay`: 0.01
715
+ - `num_train_epochs`: 20
716
+ - `lr_scheduler_type`: cosine
717
+ - `warmup_ratio`: 0.1
718
+ - `fp16`: True
719
+ - `load_best_model_at_end`: True
720
+ - `optim`: adamw_torch
721
+ - `batch_sampler`: no_duplicates
722
+
723
+ #### All Hyperparameters
724
+ <details><summary>Click to expand</summary>
725
+
726
+ - `overwrite_output_dir`: False
727
+ - `do_predict`: False
728
+ - `eval_strategy`: steps
729
+ - `prediction_loss_only`: True
730
+ - `per_device_train_batch_size`: 4
731
+ - `per_device_eval_batch_size`: 4
732
+ - `per_gpu_train_batch_size`: None
733
+ - `per_gpu_eval_batch_size`: None
734
+ - `gradient_accumulation_steps`: 32
735
+ - `eval_accumulation_steps`: None
736
+ - `torch_empty_cache_steps`: None
737
+ - `learning_rate`: 2e-05
738
+ - `weight_decay`: 0.01
739
+ - `adam_beta1`: 0.9
740
+ - `adam_beta2`: 0.999
741
+ - `adam_epsilon`: 1e-08
742
+ - `max_grad_norm`: 1.0
743
+ - `num_train_epochs`: 20
744
+ - `max_steps`: -1
745
+ - `lr_scheduler_type`: cosine
746
+ - `lr_scheduler_kwargs`: None
747
+ - `warmup_ratio`: 0.1
748
+ - `warmup_steps`: 0
749
+ - `log_level`: passive
750
+ - `log_level_replica`: warning
751
+ - `log_on_each_node`: True
752
+ - `logging_nan_inf_filter`: True
753
+ - `save_safetensors`: True
754
+ - `save_on_each_node`: False
755
+ - `save_only_model`: False
756
+ - `restore_callback_states_from_checkpoint`: False
757
+ - `no_cuda`: False
758
+ - `use_cpu`: False
759
+ - `use_mps_device`: False
760
+ - `seed`: 42
761
+ - `data_seed`: None
762
+ - `jit_mode_eval`: False
763
+ - `bf16`: False
764
+ - `fp16`: True
765
+ - `fp16_opt_level`: O1
766
+ - `half_precision_backend`: auto
767
+ - `bf16_full_eval`: False
768
+ - `fp16_full_eval`: False
769
+ - `tf32`: None
770
+ - `local_rank`: 0
771
+ - `ddp_backend`: None
772
+ - `tpu_num_cores`: None
773
+ - `tpu_metrics_debug`: False
774
+ - `debug`: []
775
+ - `dataloader_drop_last`: False
776
+ - `dataloader_num_workers`: 0
777
+ - `dataloader_prefetch_factor`: None
778
+ - `past_index`: -1
779
+ - `disable_tqdm`: False
780
+ - `remove_unused_columns`: True
781
+ - `label_names`: None
782
+ - `load_best_model_at_end`: True
783
+ - `ignore_data_skip`: False
784
+ - `fsdp`: []
785
+ - `fsdp_min_num_params`: 0
786
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
787
+ - `fsdp_transformer_layer_cls_to_wrap`: None
788
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
789
+ - `parallelism_config`: None
790
+ - `deepspeed`: None
791
+ - `label_smoothing_factor`: 0.0
792
+ - `optim`: adamw_torch
793
+ - `optim_args`: None
794
+ - `adafactor`: False
795
+ - `group_by_length`: False
796
+ - `length_column_name`: length
797
+ - `project`: huggingface
798
+ - `trackio_space_id`: trackio
799
+ - `ddp_find_unused_parameters`: None
800
+ - `ddp_bucket_cap_mb`: None
801
+ - `ddp_broadcast_buffers`: False
802
+ - `dataloader_pin_memory`: True
803
+ - `dataloader_persistent_workers`: False
804
+ - `skip_memory_metrics`: True
805
+ - `use_legacy_prediction_loop`: False
806
+ - `push_to_hub`: False
807
+ - `resume_from_checkpoint`: None
808
+ - `hub_model_id`: None
809
+ - `hub_strategy`: every_save
810
+ - `hub_private_repo`: None
811
+ - `hub_always_push`: False
812
+ - `hub_revision`: None
813
+ - `gradient_checkpointing`: False
814
+ - `gradient_checkpointing_kwargs`: None
815
+ - `include_inputs_for_metrics`: False
816
+ - `include_for_metrics`: []
817
+ - `eval_do_concat_batches`: True
818
+ - `fp16_backend`: auto
819
+ - `push_to_hub_model_id`: None
820
+ - `push_to_hub_organization`: None
821
+ - `mp_parameters`:
822
+ - `auto_find_batch_size`: False
823
+ - `full_determinism`: False
824
+ - `torchdynamo`: None
825
+ - `ray_scope`: last
826
+ - `ddp_timeout`: 1800
827
+ - `torch_compile`: False
828
+ - `torch_compile_backend`: None
829
+ - `torch_compile_mode`: None
830
+ - `include_tokens_per_second`: False
831
+ - `include_num_input_tokens_seen`: no
832
+ - `neftune_noise_alpha`: None
833
+ - `optim_target_modules`: None
834
+ - `batch_eval_metrics`: False
835
+ - `eval_on_start`: False
836
+ - `use_liger_kernel`: False
837
+ - `liger_kernel_config`: None
838
+ - `eval_use_gather_object`: False
839
+ - `average_tokens_across_devices`: True
840
+ - `prompts`: None
841
+ - `batch_sampler`: no_duplicates
842
+ - `multi_dataset_batch_sampler`: proportional
843
+ - `router_mapping`: {}
844
+ - `learning_rate_mapping`: {}
845
+
846
+ </details>
847
+
848
+ ### Training Logs
849
+ | Epoch | Step | Training Loss | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
850
+ |:----------:|:-----:|:-------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|
851
+ | **0.9956** | **7** | **-** | **0.04** | **0.04** | **0.03** | **0.0262** | **0.0308** |
852
+ | 1.2844 | 10 | 7.098 | - | - | - | - | - |
853
+ | 1.8533 | 14 | - | 0.0362 | 0.02 | 0.0354 | 0.0154 | 0.0508 |
854
+ | 2.5689 | 20 | 6.5515 | - | - | - | - | - |
855
+ | 2.7111 | 21 | - | 0.0508 | 0.01 | 0.0208 | 0.01 | 0.0608 |
856
+
857
+ * The bold row denotes the saved checkpoint.
858
+
859
+ ### Framework Versions
860
+ - Python: 3.12.12
861
+ - Sentence Transformers: 5.2.2
862
+ - Transformers: 4.57.6
863
+ - PyTorch: 2.9.0+cu126
864
+ - Accelerate: 1.12.0
865
+ - Datasets: 4.0.0
866
+ - Tokenizers: 0.22.2
867
+
868
+ ## Citation
869
+
870
+ ### BibTeX
871
+
872
+ #### Sentence Transformers
873
+ ```bibtex
874
+ @inproceedings{reimers-2019-sentence-bert,
875
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
876
+ author = "Reimers, Nils and Gurevych, Iryna",
877
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
878
+ month = "11",
879
+ year = "2019",
880
+ publisher = "Association for Computational Linguistics",
881
+ url = "https://arxiv.org/abs/1908.10084",
882
+ }
883
+ ```
884
+
885
+ #### MatryoshkaLoss
886
+ ```bibtex
887
+ @misc{kusupati2024matryoshka,
888
+ title={Matryoshka Representation Learning},
889
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
890
+ year={2024},
891
+ eprint={2205.13147},
892
+ archivePrefix={arXiv},
893
+ primaryClass={cs.LG}
894
+ }
895
+ ```
896
+
897
+ #### MultipleNegativesRankingLoss
898
+ ```bibtex
899
+ @misc{henderson2017efficient,
900
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
901
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
902
+ year={2017},
903
+ eprint={1705.00652},
904
+ archivePrefix={arXiv},
905
+ primaryClass={cs.CL}
906
+ }
907
+ ```
908
+
909
+ <!--
910
+ ## Glossary
911
+
912
+ *Clearly define terms in order to be accessible across audiences.*
913
+ -->
914
+
915
+ <!--
916
+ ## Model Card Authors
917
+
918
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
919
+ -->
920
+
921
+ <!--
922
+ ## Model Card Contact
923
+
924
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
925
+ -->
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "transformers_version": "4.57.6",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.2.2",
5
+ "transformers": "4.57.6",
6
+ "pytorch": "2.9.0+cu126"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5537e7b6f78ecfc3d89e96962b30e25d1242b3d138de128fa2dd5cbaf314f8ec
3
+ size 498604904
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "max_length": 512,
53
+ "model_max_length": 512,
54
+ "pad_to_multiple_of": null,
55
+ "pad_token": "<pad>",
56
+ "pad_token_type_id": 0,
57
+ "padding_side": "right",
58
+ "sep_token": "</s>",
59
+ "stride": 0,
60
+ "tokenizer_class": "RobertaTokenizer",
61
+ "trim_offsets": true,
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "<unk>"
65
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff