Add new SentenceTransformer model

f320c0b verified 6 days ago

49.6 kB

metadata

language:
  - en
license: apache-2.0
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - dense
  - generated_from_trainer
  - dataset_size:900
  - loss:MatryoshkaLoss
  - loss:MultipleNegativesRankingLoss
base_model: shubharuidas/codebert-embed-base-dense-retriever
widget:
  - source_sentence: Best practices for __init__
    sentences:
      - |-
        def close(self) -> None:
                self.sync()
                self.clear()
      - |-
        class MyClass:
                def __call__(self, state):
                    return

                def class_method(self, state):
                    return
      - |-
        def __init__(self, name: str):
                    self.name = name
                    self.lock = threading.Lock()
  - source_sentence: Explain the close logic
    sentences:
      - |-
        def close(self) -> None:
                self.sync()
                self.clear()
      - >-
        def attach_node(self, key: str, node: StateNodeSpec[Any, ContextT] |
        None) -> None:
                if key == START:
                    output_keys = [
                        k
                        for k, v in self.builder.schemas[self.builder.input_schema].items()
                        if not is_managed_value(v)
                    ]
                else:
                    output_keys = list(self.builder.channels) + [
                        k for k, v in self.builder.managed.items()
                    ]

                def _get_updates(
                    input: None | dict | Any,
                ) -> Sequence[tuple[str, Any]] | None:
                    if input is None:
                        return None
                    elif isinstance(input, dict):
                        return [(k, v) for k, v in input.items() if k in output_keys]
                    elif isinstance(input, Command):
                        if input.graph == Command.PARENT:
                            return None
                        return [
                            (k, v) for k, v in input._update_as_tuples() if k in output_keys
                        ]
                    elif (
                        isinstance(input, (list, tuple))
                        and input
                        and any(isinstance(i, Command) for i in input)
                    ):
                        updates: list[tuple[str, Any]] = []
                        for i in input:
                            if isinstance(i, Command):
                                if i.graph == Command.PARENT:
                                    continue
                                updates.extend(
                                    (k, v) for k, v in i._update_as_tuples() if k in output_keys
                                )
                            else:
                                updates.extend(_get_updates(i) or ())
                        return updates
                    elif (t := type(input)) and get_cached_annotated_keys(t):
                        return get_update_as_tuples(input, output_keys)
                    else:
                        msg = create_error_message(
                            message=f"Expected dict, got {input}",
                            error_code=ErrorCode.INVALID_GRAPH_NODE_RETURN_VALUE,
                        )
                        raise InvalidUpdateError(msg)

                # state updaters
                write_entries: tuple[ChannelWriteEntry | ChannelWriteTupleEntry, ...] = (
                    ChannelWriteTupleEntry(
                        mapper=_get_root if output_keys == ["__root__"] else _get_updates
                    ),
                    ChannelWriteTupleEntry(
                        mapper=_control_branch,
                        static=_control_static(node.ends)
                        if node is not None and node.ends is not None
                        else None,
                    ),
                )

                # add node and output channel
                if key == START:
                    self.nodes[key] = PregelNode(
                        tags=[TAG_HIDDEN],
                        triggers=[START],
                        channels=START,
                        writers=[ChannelWrite(write_entries)],
                    )
                elif node is not None:
                    input_schema = node.input_schema if node else self.builder.state_schema
                    input_channels = list(self.builder.schemas[input_schema])
                    is_single_input = len(input_channels) == 1 and "__root__" in input_channels
                    if input_schema in self.schema_to_mapper:
                        mapper = self.schema_to_mapper[input_schema]
                    else:
                        mapper = _pick_mapper(input_channels, input_schema)
                        self.schema_to_mapper[input_schema] = mapper

                    branch_channel = _CHANNEL_BRANCH_TO.format(key)
                    self.channels[branch_channel] = (
                        LastValueAfterFinish(Any)
                        if node.defer
                        else EphemeralValue(Any, guard=False)
                    )
                    self.nodes[key] = PregelNode(
                        triggers=[branch_channel],
                        # read state keys and managed values
                        channels=("__root__" if is_single_input else input_channels),
                        # coerce state dict to schema class (eg. pydantic model)
                        mapper=mapper,
                        # publish to state keys
                        writers=[ChannelWrite(write_entries)],
                        metadata=node.metadata,
                        retry_policy=node.retry_policy,
                        cache_policy=node.cache_policy,
                        bound=node.runnable,  # type: ignore[arg-type]
                    )
                else:
                    raise RuntimeError
      - |-
        def tick(
                self,
                tasks: Iterable[PregelExecutableTask],
                *,
                reraise: bool = True,
                timeout: float | None = None,
                retry_policy: Sequence[RetryPolicy] | None = None,
                get_waiter: Callable[[], concurrent.futures.Future[None]] | None = None,
                schedule_task: Callable[
                    [PregelExecutableTask, int, Call | None],
                    PregelExecutableTask | None,
                ],
            ) -> Iterator[None]:
                tasks = tuple(tasks)
                futures = FuturesDict(
                    callback=weakref.WeakMethod(self.commit),
                    event=threading.Event(),
                    future_type=concurrent.futures.Future,
                )
                # give control back to the caller
                yield
                # fast path if single task with no timeout and no waiter
                if len(tasks) == 0:
                    return
                elif len(tasks) == 1 and timeout is None and get_waiter is None:
                    t = tasks[0]
                    try:
                        run_with_retry(
                            t,
                            retry_policy,
                            configurable={
                                CONFIG_KEY_CALL: partial(
                                    _call,
                                    weakref.ref(t),
                                    retry_policy=retry_policy,
                                    futures=weakref.ref(futures),
                                    schedule_task=schedule_task,
                                    submit=self.submit,
                                ),
                            },
                        )
                        self.commit(t, None)
                    except Exception as exc:
                        self.commit(t, exc)
                        if reraise and futures:
                            # will be re-raised after futures are done
                            fut: concurrent.futures.Future = concurrent.futures.Future()
                            fut.set_exception(exc)
                            futures.done.add(fut)
                        elif reraise:
                            if tb := exc.__traceback__:
                                while tb.tb_next is not None and any(
                                    tb.tb_frame.f_code.co_filename.endswith(name)
                                    for name in EXCLUDED_FRAME_FNAMES
                                ):
                                    tb = tb.tb_next
                                exc.__traceback__ = tb
                            raise
                    if not futures:  # maybe `t` scheduled another task
                        return
                    else:
                        tasks = ()  # don't reschedule this task
                # add waiter task if requested
                if get_waiter is not None:
                    futures[get_waiter()] = None
                # schedule tasks
                for t in tasks:
                    fut = self.submit()(  # type: ignore[misc]
                        run_with_retry,
                        t,
                        retry_policy,
                        configurable={
                            CONFIG_KEY_CALL: partial(
                                _call,
                                weakref.ref(t),
                                retry_policy=retry_policy,
                                futures=weakref.ref(futures),
                                schedule_task=schedule_task,
                                submit=self.submit,
                            ),
                        },
                        __reraise_on_exit__=reraise,
                    )
                    futures[fut] = t
                # execute tasks, and wait for one to fail or all to finish.
                # each task is independent from all other concurrent tasks
                # yield updates/debug output as each task finishes
                end_time = timeout + time.monotonic() if timeout else None
                while len(futures) > (1 if get_waiter is not None else 0):
                    done, inflight = concurrent.futures.wait(
                        futures,
                        return_when=concurrent.futures.FIRST_COMPLETED,
                        timeout=(max(0, end_time - time.monotonic()) if end_time else None),
                    )
                    if not done:
                        break  # timed out
                    for fut in done:
                        task = futures.pop(fut)
                        if task is None:
                            # waiter task finished, schedule another
                            if inflight and get_waiter is not None:
                                futures[get_waiter()] = None
                    else:
                        # remove references to loop vars
                        del fut, task
                    # maybe stop other tasks
                    if _should_stop_others(done):
                        break
                    # give control back to the caller
                    yield
                # wait for done callbacks
                futures.event.wait(
                    timeout=(max(0, end_time - time.monotonic()) if end_time else None)
                )
                # give control back to the caller
                yield
                # panic on failure or timeout
                try:
                    _panic_or_proceed(
                        futures.done.union(f for f, t in futures.items() if t is not None),
                        panic=reraise,
                    )
                except Exception as exc:
                    if tb := exc.__traceback__:
                        while tb.tb_next is not None and any(
                            tb.tb_frame.f_code.co_filename.endswith(name)
                            for name in EXCLUDED_FRAME_FNAMES
                        ):
                            tb = tb.tb_next
                        exc.__traceback__ = tb
                    raise
  - source_sentence: Explain the async aupdate_state logic
    sentences:
      - |-
        class MyClass:
                def __call__(self, state):
                    return

                def class_method(self, state):
                    return
      - |-
        async def aupdate_state(
                self,
                config: RunnableConfig,
                values: dict[str, Any] | Any | None,
                as_node: str | None = None,
                *,
                headers: dict[str, str] | None = None,
                params: QueryParamTypes | None = None,
            ) -> RunnableConfig:
                """Update the state of a thread.

                This method calls `POST /threads/{thread_id}/state`.

                Args:
                    config: A `RunnableConfig` that includes `thread_id` in the
                        `configurable` field.
                    values: Values to update to the state.
                    as_node: Update the state as if this node had just executed.

                Returns:
                    `RunnableConfig` for the updated thread.
                """
                client = self._validate_client()
                merged_config = merge_configs(self.config, config)

                response: dict = await client.threads.update_state(  # type: ignore
                    thread_id=merged_config["configurable"]["thread_id"],
                    values=values,
                    as_node=as_node,
                    checkpoint=self._get_checkpoint(merged_config),
                    headers=headers,
                    params=params,
                )
                return self._get_config(response["checkpoint"])
      - |-
        def __init__(self, typ: Any, guard: bool = True) -> None:
                super().__init__(typ)
                self.guard = guard
                self.value = MISSING
  - source_sentence: How to implement langchain_to_openai_messages?
    sentences:
      - |-
        def __init__(
                self,
                message: str,
                *args: object,
                since: tuple[int, int],
                expected_removal: tuple[int, int] | None = None,
            ) -> None:
                super().__init__(message, *args)
                self.message = message.rstrip(".")
                self.since = since
                self.expected_removal = (
                    expected_removal if expected_removal is not None else (since[0] + 1, 0)
                )
      - |-
        def test_batch_get_ops(store: PostgresStore) -> None:
            # Setup test data
            store.put(("test",), "key1", {"data": "value1"})
            store.put(("test",), "key2", {"data": "value2"})

            ops = [
                GetOp(namespace=("test",), key="key1"),
                GetOp(namespace=("test",), key="key2"),
                GetOp(namespace=("test",), key="key3"),  # Non-existent key
            ]

            results = store.batch(ops)

            assert len(results) == 3
            assert results[0] is not None
            assert results[1] is not None
            assert results[2] is None
            assert results[0].key == "key1"
            assert results[1].key == "key2"
      - |-
        def langchain_to_openai_messages(messages: List[BaseMessage]):
            """
            Convert a list of langchain base messages to a list of openai messages.

            Parameters:
                messages (List[BaseMessage]): A list of langchain base messages.

            Returns:
                List[dict]: A list of openai messages.
            """

            return [
                convert_message_to_dict(m) if isinstance(m, BaseMessage) else m
                for m in messages
            ]
  - source_sentence: Explain the CheckpointPayload logic
    sentences:
      - |-
        class LocalDeps(NamedTuple):
            """A container for referencing and managing local Python dependencies.

            A "local dependency" is any entry in the config's `dependencies` list
            that starts with "." (dot), denoting a relative path
            to a local directory containing Python code.

            For each local dependency, the system inspects its directory to
            determine how it should be installed inside the Docker container.

            Specifically, we detect:

            - **Real packages**: Directories containing a `pyproject.toml` or a `setup.py`.
              These can be installed with pip as a regular Python package.
            - **Faux packages**: Directories that do not include a `pyproject.toml` or
              `setup.py` but do contain Python files and possibly an `__init__.py`. For
              these, the code dynamically generates a minimal `pyproject.toml` in the
              Docker image so that they can still be installed with pip.
            - **Requirements files**: If a local dependency directory
              has a `requirements.txt`, it is tracked so that those dependencies
              can be installed within the Docker container before installing the local package.

            Attributes:
                pip_reqs: A list of (host_requirements_path, container_requirements_path)
                    tuples. Each entry points to a local `requirements.txt` file and where
                    it should be placed inside the Docker container before running `pip install`.

                real_pkgs: A dictionary mapping a local directory path (host side) to a
                    tuple of (dependency_string, container_package_path). These directories
                    contain the necessary files (e.g., `pyproject.toml` or `setup.py`) to be
                    installed as a standard Python package with pip.

                faux_pkgs: A dictionary mapping a local directory path (host side) to a
                    tuple of (dependency_string, container_package_path). For these
                    directories—called "faux packages"—the code will generate a minimal
                    `pyproject.toml` inside the Docker image. This ensures that pip
                    recognizes them as installable packages, even though they do not
                    natively include packaging metadata.

                working_dir: The path inside the Docker container to use as the working
                    directory. If the local dependency `"."` is present in the config, this
                    field captures the path where that dependency will appear in the
                    container (e.g., `/deps/<name>` or similar). Otherwise, it may be `None`.

                additional_contexts: A list of paths to directories that contain local
                    dependencies in parent directories. These directories are added to the
                    Docker build context to ensure that the Dockerfile can access them.
            """

            pip_reqs: list[tuple[pathlib.Path, str]]
            real_pkgs: dict[pathlib.Path, tuple[str, str]]
            faux_pkgs: dict[pathlib.Path, tuple[str, str]]
            # if . is in dependencies, use it as working_dir
            working_dir: str | None = None
            # if there are local dependencies in parent directories, use additional_contexts
            additional_contexts: list[pathlib.Path] = None
      - |-
        class CheckpointPayload(TypedDict):
            config: RunnableConfig | None
            metadata: CheckpointMetadata
            values: dict[str, Any]
            next: list[str]
            parent_config: RunnableConfig | None
            tasks: list[CheckpointTask]
      - |-
        class _RuntimeOverrides(TypedDict, Generic[ContextT], total=False):
            context: ContextT
            store: BaseStore | None
            stream_writer: StreamWriter
            previous: Any
pipeline_tag: sentence-similarity
library_name: sentence-transformers
metrics:
  - cosine_accuracy@1
  - cosine_accuracy@3
  - cosine_accuracy@5
  - cosine_accuracy@10
  - cosine_precision@1
  - cosine_precision@3
  - cosine_precision@5
  - cosine_precision@10
  - cosine_recall@1
  - cosine_recall@3
  - cosine_recall@5
  - cosine_recall@10
  - cosine_ndcg@10
  - cosine_mrr@10
  - cosine_map@100
model-index:
  - name: codeBert dense retriever
    results:
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 768
          type: dim_768
        metrics:
          - type: cosine_accuracy@1
            value: 0.84
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.84
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.84
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.93
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.84
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.84
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.84
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.465
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.16799999999999998
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.504
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.84
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.93
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.8886895066001008
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.855
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.877942533867708
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 512
          type: dim_512
        metrics:
          - type: cosine_accuracy@1
            value: 0.88
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.88
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.88
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.93
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.88
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.88
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.88
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.465
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.17599999999999993
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.528
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.88
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.93
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.907049725888945
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.8883333333333333
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.9038835868016827
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 256
          type: dim_256
        metrics:
          - type: cosine_accuracy@1
            value: 0.87
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.87
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.87
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.92
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.87
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.87
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.87
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.46
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.17399999999999996
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.522
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.87
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.92
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.8970497258889449
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.8783333333333334
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.8959313741265157
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 128
          type: dim_128
        metrics:
          - type: cosine_accuracy@1
            value: 0.86
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.86
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.86
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.95
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.86
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.86
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.86
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.475
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.17199999999999996
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.516
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.86
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.95
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.9086895066001008
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.875
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.8949791356739454
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 64
          type: dim_64
        metrics:
          - type: cosine_accuracy@1
            value: 0.84
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.84
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.84
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.93
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.84
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.84
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.84
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.465
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.16799999999999998
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.504
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.84
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.93
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.8886895066001008
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.855
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.8791923582191525
            name: Cosine Map@100

codeBert dense retriever

This is a sentence-transformers model finetuned from shubharuidas/codebert-embed-base-dense-retriever. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Type: Sentence Transformer
Base model: shubharuidas/codebert-embed-base-dense-retriever
Maximum Sequence Length: 512 tokens
Output Dimensionality: 768 dimensions
Similarity Function: Cosine Similarity
Language: en
License: apache-2.0

Model Sources

Documentation: Sentence Transformers Documentation
Repository: Sentence Transformers on GitHub
Hugging Face: Sentence Transformers on Hugging Face

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'RobertaModel'})
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("shubharuidas/codebert-base-code-embed-mrl-langchain-langgraph")
# Run inference
sentences = [
    'Explain the CheckpointPayload logic',
    'class CheckpointPayload(TypedDict):\n    config: RunnableConfig | None\n    metadata: CheckpointMetadata\n    values: dict[str, Any]\n    next: list[str]\n    parent_config: RunnableConfig | None\n    tasks: list[CheckpointTask]',
    'class _RuntimeOverrides(TypedDict, Generic[ContextT], total=False):\n    context: ContextT\n    store: BaseStore | None\n    stream_writer: StreamWriter\n    previous: Any',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities)
# tensor([[1.0000, 0.7282, 0.2122],
#         [0.7282, 1.0000, 0.3511],
#         [0.2122, 0.3511, 1.0000]])

Evaluation

Metrics

Information Retrieval

Dataset: dim_768
Evaluated with InformationRetrievalEvaluator with these parameters:
```
{
    "truncate_dim": 768
}
```

Metric	Value
cosine_accuracy@1	0.84
cosine_accuracy@3	0.84
cosine_accuracy@5	0.84
cosine_accuracy@10	0.93
cosine_precision@1	0.84
cosine_precision@3	0.84
cosine_precision@5	0.84
cosine_precision@10	0.465
cosine_recall@1	0.168
cosine_recall@3	0.504
cosine_recall@5	0.84
cosine_recall@10	0.93
cosine_ndcg@10	0.8887
cosine_mrr@10	0.855
cosine_map@100	0.8779

Information Retrieval

Dataset: dim_512
Evaluated with InformationRetrievalEvaluator with these parameters:
```
{
    "truncate_dim": 512
}
```

Metric	Value
cosine_accuracy@1	0.88
cosine_accuracy@3	0.88
cosine_accuracy@5	0.88
cosine_accuracy@10	0.93
cosine_precision@1	0.88
cosine_precision@3	0.88
cosine_precision@5	0.88
cosine_precision@10	0.465
cosine_recall@1	0.176
cosine_recall@3	0.528
cosine_recall@5	0.88
cosine_recall@10	0.93
cosine_ndcg@10	0.907
cosine_mrr@10	0.8883
cosine_map@100	0.9039

Information Retrieval

Dataset: dim_256
Evaluated with InformationRetrievalEvaluator with these parameters:
```
{
    "truncate_dim": 256
}
```

Metric	Value
cosine_accuracy@1	0.87
cosine_accuracy@3	0.87
cosine_accuracy@5	0.87
cosine_accuracy@10	0.92
cosine_precision@1	0.87
cosine_precision@3	0.87
cosine_precision@5	0.87
cosine_precision@10	0.46
cosine_recall@1	0.174
cosine_recall@3	0.522
cosine_recall@5	0.87
cosine_recall@10	0.92
cosine_ndcg@10	0.897
cosine_mrr@10	0.8783
cosine_map@100	0.8959

Information Retrieval

Dataset: dim_128
Evaluated with InformationRetrievalEvaluator with these parameters:
```
{
    "truncate_dim": 128
}
```

Metric	Value
cosine_accuracy@1	0.86
cosine_accuracy@3	0.86
cosine_accuracy@5	0.86
cosine_accuracy@10	0.95
cosine_precision@1	0.86
cosine_precision@3	0.86
cosine_precision@5	0.86
cosine_precision@10	0.475
cosine_recall@1	0.172
cosine_recall@3	0.516
cosine_recall@5	0.86
cosine_recall@10	0.95
cosine_ndcg@10	0.9087
cosine_mrr@10	0.875
cosine_map@100	0.895

Information Retrieval

Dataset: dim_64
Evaluated with InformationRetrievalEvaluator with these parameters:
```
{
    "truncate_dim": 64
}
```

Metric	Value
cosine_accuracy@1	0.84
cosine_accuracy@3	0.84
cosine_accuracy@5	0.84
cosine_accuracy@10	0.93
cosine_precision@1	0.84
cosine_precision@3	0.84
cosine_precision@5	0.84
cosine_precision@10	0.465
cosine_recall@1	0.168
cosine_recall@3	0.504
cosine_recall@5	0.84
cosine_recall@10	0.93
cosine_ndcg@10	0.8887
cosine_mrr@10	0.855
cosine_map@100	0.8792

Training Details

Training Dataset

Unnamed Dataset

Size: 900 training samples
Columns: anchor and positive
Approximate statistics based on the first 900 samples:
anchor positive
type string string
details
min: 6 tokens
mean: 13.77 tokens
max: 356 tokens

min: 14 tokens
mean: 267.71 tokens
max: 512 tokens

	anchor	positive
type	string	string
details	min: 6 tokens mean: 13.77 tokens max: 356 tokens	min: 14 tokens mean: 267.71 tokens max: 512 tokens

Samples:

anchor	positive
`How does put_item work in Python?`	def put_item( self, namespace: Sequence[str], /, key: str, value: Mapping[str, Any], index: Literal[False] \| list[str] \| None = None, ttl: int \| None = None, headers: Mapping[str, str] \| None = None, params: QueryParamTypes \| None = None, ) -> None: """Store or update an item. Args: namespace: A list of strings representing the namespace path. key: The unique identifier for the item within the namespace. value: A dictionary containing the item's data. index: Controls search indexing - None (use defaults), False (disable), or list of field paths to index. ttl: Optional time-to-live in minutes for the item, or None for no expiration. headers: Optional custom headers to include with the request. params: Optional query parameters to include with the request. Returns: None ???+ example...
`Explain the RunsClient: """Client for managing runs in LangGraph. A run is a single assistant invocation with optional input, config, context, and metadata. This client manages runs, which can be stateful logic`	class RunsClient: """Client for managing runs in LangGraph. A run is a single assistant invocation with optional input, config, context, and metadata. This client manages runs, which can be stateful (on threads) or stateless. ???+ example "Example" python<br> client = get_client(url="http://localhost:2024")<br> run = await client.runs.create(assistant_id="asst_123", thread_id="thread_456", input={"query": "Hello"})<br> """ def init(self, http: HttpClient) -> None: self.http = http @overload def stream( self, thread_id: str, assistant_id: str, *, input: Input \| None = None, command: Command \| None = None, stream_mode: StreamMode \| Sequence[StreamMode] = "values", stream_subgraphs: bool = False, stream_resumable: bool = False, metadata: Mapping[str, Any] \| None = None, config: Config \| None = None, context: Context \| N...
`Best practices for MyChildDict`	`class MyChildDict(MyBaseTypedDict): val_11: int val_11b: int \| None val_11c: int \| None \| str`

Loss: MatryoshkaLoss with these parameters:

{
    "loss": "MultipleNegativesRankingLoss",
    "matryoshka_dims": [
        768,
        512,
        256,
        128,
        64
    ],
    "matryoshka_weights": [
        1,
        1,
        1,
        1,
        1
    ],
    "n_dims_per_step": -1
}

Training Hyperparameters

Non-Default Hyperparameters

eval_strategy: epoch
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
gradient_accumulation_steps: 16
learning_rate: 2e-05
num_train_epochs: 2
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: True
load_best_model_at_end: True
optim: adamw_torch
batch_sampler: no_duplicates

All Hyperparameters

Click to expand

overwrite_output_dir: False
do_predict: False
eval_strategy: epoch
prediction_loss_only: True
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 16
eval_accumulation_steps: None
torch_empty_cache_steps: None
learning_rate: 2e-05
weight_decay: 0.0
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1.0
num_train_epochs: 2
max_steps: -1
lr_scheduler_type: cosine
lr_scheduler_kwargs: None
warmup_ratio: 0.1
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: True
logging_nan_inf_filter: True
save_safetensors: True
save_on_each_node: False
save_only_model: False
restore_callback_states_from_checkpoint: False
no_cuda: False
use_cpu: False
use_mps_device: False
seed: 42
data_seed: None
jit_mode_eval: False
bf16: False
fp16: True
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: False
fp16_full_eval: False
tf32: None
local_rank: 0
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: False
debug: []
dataloader_drop_last: False
dataloader_num_workers: 0
dataloader_prefetch_factor: None
past_index: -1
disable_tqdm: False
remove_unused_columns: True
label_names: None
load_best_model_at_end: True
ignore_data_skip: False
fsdp: []
fsdp_min_num_params: 0
fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
parallelism_config: None
deepspeed: None
label_smoothing_factor: 0.0
optim: adamw_torch
optim_args: None
adafactor: False
group_by_length: False
length_column_name: length
project: huggingface
trackio_space_id: trackio
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: False
dataloader_pin_memory: True
dataloader_persistent_workers: False
skip_memory_metrics: True
use_legacy_prediction_loop: False
push_to_hub: False
resume_from_checkpoint: None
hub_model_id: None
hub_strategy: every_save
hub_private_repo: None
hub_always_push: False
hub_revision: None
gradient_checkpointing: False
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: False
include_for_metrics: []
eval_do_concat_batches: True
fp16_backend: auto
push_to_hub_model_id: None
push_to_hub_organization: None
mp_parameters:
auto_find_batch_size: False
full_determinism: False
torchdynamo: None
ray_scope: last
ddp_timeout: 1800
torch_compile: False
torch_compile_backend: None
torch_compile_mode: None
include_tokens_per_second: False
include_num_input_tokens_seen: no
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: False
eval_on_start: False
use_liger_kernel: False
liger_kernel_config: None
eval_use_gather_object: False
average_tokens_across_devices: True
prompts: None
batch_sampler: no_duplicates
multi_dataset_batch_sampler: proportional
router_mapping: {}
learning_rate_mapping: {}

Training Logs

Epoch	Step	Training Loss	dim_768_cosine_ndcg@10	dim_512_cosine_ndcg@10	dim_256_cosine_ndcg@10	dim_128_cosine_ndcg@10	dim_64_cosine_ndcg@10
0.7111	10	0.6327	-	-	-	-	-
1.0	15	-	0.8970	0.8979	0.8925	0.8979	0.8641
1.3556	20	0.2227	-	-	-	-	-
2.0	30	0.1692	0.8887	0.907	0.897	0.9087	0.8887

The bold row denotes the saved checkpoint.

Framework Versions

Python: 3.12.12
Sentence Transformers: 5.2.0
Transformers: 4.57.6
PyTorch: 2.9.0+cu126
Accelerate: 1.12.0
Datasets: 4.0.0
Tokenizers: 0.22.2

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning},
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}