Add new SentenceTransformer model

Browse files

Files changed (9) hide show

1_Pooling/config.json +10 -0
README.md +969 -0
config.json +30 -0
config_sentence_transformers.json +14 -0
model.safetensors +3 -0
modules.json +14 -0
sentence_bert_config.json +4 -0
tokenizer.json +0 -0
tokenizer_config.json +18 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 768,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": true,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,969 @@

+---
+language:
+- en
+license: apache-2.0
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:180
+- loss:MatryoshkaLoss
+- loss:MultipleNegativesRankingLoss
+base_model: shubharuidas/codebert-embed-base-dense-retriever
+widget:
+- source_sentence: Explain the tool1 logic
+  sentences:
+  - "def stream(\n        self,\n        thread_id: str,\n        assistant_id: str,\n\
+    \        *,\n        input: Input | None = None,\n        command: Command | None\
+    \ = None,\n        stream_mode: StreamMode | Sequence[StreamMode] = \"values\"\
+    ,\n        stream_subgraphs: bool = False,\n        stream_resumable: bool = False,\n\
+    \        metadata: Mapping[str, Any] | None = None,\n        config: Config |\
+    \ None = None,\n        context: Context | None = None,\n        checkpoint: Checkpoint\
+    \ | None = None,\n        checkpoint_id: str | None = None,\n        checkpoint_during:\
+    \ bool | None = None,\n        interrupt_before: All | Sequence[str] | None =\
+    \ None,\n        interrupt_after: All | Sequence[str] | None = None,\n       \
+    \ feedback_keys: Sequence[str] | None = None,\n        on_disconnect: DisconnectMode\
+    \ | None = None,\n        webhook: str | None = None,\n        multitask_strategy:\
+    \ MultitaskStrategy | None = None,\n        if_not_exists: IfNotExists | None\
+    \ = None,\n        after_seconds: int | None = None,\n        headers: Mapping[str,\
+    \ str] | None = None,\n        params: QueryParamTypes | None = None,\n      \
+    \  on_run_created: Callable[[RunCreateMetadata], None] | None = None,\n    ) ->\
+    \ AsyncIterator[StreamPart]: ..."
+  - "def tool1(some_val: int, some_other_val: str) -> str:\n    \"\"\"Tool 1 docstring.\"\
+    \"\"\n    if some_val == 0:\n        msg = \"Test error\"\n        raise ValueError(msg)\n\
+    \    return f\"{some_val} - {some_other_val}\""
+  - "class IndexConfig(TypedDict, total=False):\n    \"\"\"Configuration for indexing\
+    \ documents for semantic search in the store.\n\n    If not provided to the store,\
+    \ the store will not support vector search.\n    In that case, all `index` arguments\
+    \ to `put()` and `aput()` operations will be ignored.\n    \"\"\"\n\n    dims:\
+    \ int\n    \"\"\"Number of dimensions in the embedding vectors.\n    \n    Common\
+    \ embedding models have the following dimensions:\n        - `openai:text-embedding-3-large`:\
+    \ `3072`\n        - `openai:text-embedding-3-small`: `1536`\n        - `openai:text-embedding-ada-002`:\
+    \ `1536`\n        - `cohere:embed-english-v3.0`: `1024`\n        - `cohere:embed-english-light-v3.0`:\
+    \ `384`\n        - `cohere:embed-multilingual-v3.0`: `1024`\n        - `cohere:embed-multilingual-light-v3.0`:\
+    \ `384`\n    \"\"\"\n\n    embed: Embeddings | EmbeddingsFunc | AEmbeddingsFunc\
+    \ | str\n    \"\"\"Optional function to generate embeddings from text.\n    \n\
+    \    Can be specified in three ways:\n        1. A LangChain `Embeddings` instance\n\
+    \        2. A synchronous embedding function (`EmbeddingsFunc`)\n        3. An\
+    \ asynchronous embedding function (`AEmbeddingsFunc`)\n        4. A provider string\
+    \ (e.g., `\"openai:text-embedding-3-small\"`)\n    \n    ???+ example \"Examples\"\
+    \n\n        Using LangChain's initialization with `InMemoryStore`:\n\n       \
+    \ ```python\n        from langchain.embeddings import init_embeddings\n      \
+    \  from langgraph.store.memory import InMemoryStore\n        \n        store =\
+    \ InMemoryStore(\n            index={\n                \"dims\": 1536,\n     \
+    \           \"embed\": init_embeddings(\"openai:text-embedding-3-small\")\n  \
+    \          }\n        )\n        ```\n        \n        Using a custom embedding\
+    \ function with `InMemoryStore`:\n\n        ```python\n        from openai import\
+    \ OpenAI\n        from langgraph.store.memory import InMemoryStore\n        \n\
+    \        client = OpenAI()\n        \n        def embed_texts(texts: list[str])\
+    \ -> list[list[float]]:\n            response = client.embeddings.create(\n  \
+    \              model=\"text-embedding-3-small\",\n                input=texts\n\
+    \            )\n            return [e.embedding for e in response.data]\n    \
+    \        \n        store = InMemoryStore(\n            index={\n             \
+    \   \"dims\": 1536,\n                \"embed\": embed_texts\n            }\n \
+    \       )\n        ```\n        \n        Using an asynchronous embedding function\
+    \ with `InMemoryStore`:\n\n        ```python\n        from openai import AsyncOpenAI\n\
+    \        from langgraph.store.memory import InMemoryStore\n        \n        client\
+    \ = AsyncOpenAI()\n        \n        async def aembed_texts(texts: list[str])\
+    \ -> list[list[float]]:\n            response = await client.embeddings.create(\n\
+    \                model=\"text-embedding-3-small\",\n                input=texts\n\
+    \            )\n            return [e.embedding for e in response.data]\n    \
+    \        \n        store = InMemoryStore(\n            index={\n             \
+    \   \"dims\": 1536,\n                \"embed\": aembed_texts\n            }\n\
+    \        )\n        ```\n    \"\"\"\n\n    fields: list[str] | None\n    \"\"\"\
+    Fields to extract text from for embedding generation.\n    \n    Controls which\
+    \ parts of stored items are embedded for semantic search. Follows JSON path syntax:\n\
+    \n    - `[\"$\"]`: Embeds the entire JSON object as one vector  (default)\n  \
+    \  - `[\"field1\", \"field2\"]`: Embeds specific top-level fields\n    - `[\"\
+    parent.child\"]`: Embeds nested fields using dot notation\n    - `[\"array[*].field\"\
+    ]`: Embeds field from each array element separately\n    \n    Note:\n       \
+    \ You can always override this behavior when storing an item using the\n     \
+    \   `index` parameter in the `put` or `aput` operations.\n    \n    ???+ example\
+    \ \"Examples\"\n\n        ```python\n        # Embed entire document (default)\n\
+    \        fields=[\"$\"]\n        \n        # Embed specific fields\n        fields=[\"\
+    text\", \"summary\"]\n        \n        # Embed nested fields\n        fields=[\"\
+    metadata.title\", \"content.body\"]\n        \n        # Embed from arrays\n \
+    \       fields=[\"messages[*].content\"]  # Each message content separately\n\
+    \        fields=[\"context[0].text\"]      # First context item's text\n     \
+    \   ```\n    \n    Note:\n        - Fields missing from a document are skipped\n\
+    \        - Array notation creates separate embeddings for each element\n     \
+    \   - Complex nested paths are supported (e.g., `\"a.b[*].c.d\"`)\n    \"\"\""
+- source_sentence: Explain the UpdateType logic
+  sentences:
+  - "def test_subgraph_checkpoint_true(\n    sync_checkpointer: BaseCheckpointSaver,\
+    \ durability: Durability\n) -> None:\n    class InnerState(TypedDict):\n     \
+    \   my_key: Annotated[str, operator.add]\n        my_other_key: str\n\n    def\
+    \ inner_1(state: InnerState):\n        return {\"my_key\": \" got here\", \"my_other_key\"\
+    : state[\"my_key\"]}\n\n    def inner_2(state: InnerState):\n        return {\"\
+    my_key\": \" and there\"}\n\n    inner = StateGraph(InnerState)\n    inner.add_node(\"\
+    inner_1\", inner_1)\n    inner.add_node(\"inner_2\", inner_2)\n    inner.add_edge(\"\
+    inner_1\", \"inner_2\")\n    inner.set_entry_point(\"inner_1\")\n    inner.set_finish_point(\"\
+    inner_2\")\n\n    class State(TypedDict):\n        my_key: str\n\n    graph =\
+    \ StateGraph(State)\n    graph.add_node(\"inner\", inner.compile(checkpointer=True))\n\
+    \    graph.add_edge(START, \"inner\")\n    graph.add_conditional_edges(\n    \
+    \    \"inner\", lambda s: \"inner\" if s[\"my_key\"].count(\"there\") < 2 else\
+    \ END\n    )\n    app = graph.compile(checkpointer=sync_checkpointer)\n\n    config\
+    \ = {\"configurable\": {\"thread_id\": \"2\"}}\n    assert [\n        c\n    \
+    \    for c in app.stream(\n            {\"my_key\": \"\"}, config, subgraphs=True,\
+    \ durability=durability\n        )\n    ] == [\n        ((\"inner\",), {\"inner_1\"\
+    : {\"my_key\": \" got here\", \"my_other_key\": \"\"}}),\n        ((\"inner\"\
+    ,), {\"inner_2\": {\"my_key\": \" and there\"}}),\n        ((), {\"inner\": {\"\
+    my_key\": \" got here and there\"}}),\n        (\n            (\"inner\",),\n\
+    \            {\n                \"inner_1\": {\n                    \"my_key\"\
+    : \" got here\",\n                    \"my_other_key\": \" got here and there\
+    \ got here and there\",\n                }\n            },\n        ),\n     \
+    \   ((\"inner\",), {\"inner_2\": {\"my_key\": \" and there\"}}),\n        (\n\
+    \            (),\n            {\n                \"inner\": {\n              \
+    \      \"my_key\": \" got here and there got here and there got here and there\"\
+    \n                }\n            },\n        ),\n    ]\n\n    checkpoints = list(app.get_state_history(config))\n\
+    \    if durability != \"exit\":\n        assert len(checkpoints) == 4\n    else:\n\
+    \        assert len(checkpoints) == 1"
+  - "def is_available(self) -> bool:\n        return self.value is not MISSING"
+  - "def UpdateType(self) -> type[Value]:\n        \"\"\"The type of the update received\
+    \ by the channel.\"\"\"\n        return self.typ"
+- source_sentence: "Example usage of ToolOutputMixin:  # type: ignore[no-redef]\n\
+    \        pass"
+  sentences:
+  - 'def task(__func_or_none__: Callable[P, Awaitable[T]]) -> _TaskFunction[P, T]:
+    ...'
+  - "def test_graph_with_jitter_retry_policy():\n    \"\"\"Test a graph with a RetryPolicy\
+    \ that uses jitter.\"\"\"\n\n    class State(TypedDict):\n        foo: str\n\n\
+    \    attempt_count = 0\n\n    def failing_node(state):\n        nonlocal attempt_count\n\
+    \        attempt_count += 1\n        if attempt_count < 2:  # Fail the first attempt\n\
+    \            raise ValueError(\"Intentional failure\")\n        return {\"foo\"\
+    : \"success\"}\n\n    # Create a retry policy with jitter enabled\n    retry_policy\
+    \ = RetryPolicy(\n        max_attempts=3,\n        initial_interval=0.01,\n  \
+    \      jitter=True,  # Enable jitter for randomized backoff\n        retry_on=ValueError,\n\
+    \    )\n\n    # Create and compile the graph\n    graph = (\n        StateGraph(State)\n\
+    \        .add_node(\"failing_node\", failing_node, retry_policy=retry_policy)\n\
+    \        .add_edge(START, \"failing_node\")\n        .compile()\n    )\n\n   \
+    \ # Test graph execution with mocked random and sleep\n    with (\n        patch(\"\
+    random.uniform\", return_value=0.05) as mock_random,\n        patch(\"time.sleep\"\
+    ) as mock_sleep,\n    ):\n        result = graph.invoke({\"foo\": \"\"})\n\n \
+    \   # Verify retry behavior\n    assert attempt_count == 2  # The node should\
+    \ have been tried twice\n    assert result[\"foo\"] == \"success\"\n\n    # Verify\
+    \ jitter was applied\n    mock_random.assert_called_with(0, 1)  # Jitter should\
+    \ use random.uniform(0, 1)\n    mock_sleep.assert_called_with(0.01 + 0.05)"
+  - "class ToolOutputMixin:  # type: ignore[no-redef]\n        pass"
+- source_sentence: Best practices for async test_async_entrypoint_without_checkpointer
+  sentences:
+  - "def __init__(\n        self,\n        assistant_id: str,  # graph_id\n      \
+    \  /,\n        *,\n        url: str | None = None,\n        api_key: str | None\
+    \ = None,\n        headers: dict[str, str] | None = None,\n        client: LangGraphClient\
+    \ | None = None,\n        sync_client: SyncLangGraphClient | None = None,\n  \
+    \      config: RunnableConfig | None = None,\n        name: str | None = None,\n\
+    \        distributed_tracing: bool = False,\n    ):\n        \"\"\"Specify `url`,\
+    \ `api_key`, and/or `headers` to create default sync and async clients.\n\n  \
+    \      If `client` or `sync_client` are provided, they will be used instead of\
+    \ the default clients.\n        See `LangGraphClient` and `SyncLangGraphClient`\
+    \ for details on the default clients. At least\n        one of `url`, `client`,\
+    \ or `sync_client` must be provided.\n\n        Args:\n            assistant_id:\
+    \ The assistant ID or graph name of the remote graph to use.\n            url:\
+    \ The URL of the remote API.\n            api_key: The API key to use for authentication.\
+    \ If not provided, it will be read from the environment (`LANGGRAPH_API_KEY`,\
+    \ `LANGSMITH_API_KEY`, or `LANGCHAIN_API_KEY`).\n            headers: Additional\
+    \ headers to include in the requests.\n            client: A `LangGraphClient`\
+    \ instance to use instead of creating a default client.\n            sync_client:\
+    \ A `SyncLangGraphClient` instance to use instead of creating a default client.\n\
+    \            config: An optional `RunnableConfig` instance with additional configuration.\n\
+    \            name: Human-readable name to attach to the RemoteGraph instance.\n\
+    \                This is useful for adding `RemoteGraph` as a subgraph via `graph.add_node(remote_graph)`.\n\
+    \                If not provided, defaults to the assistant ID.\n            distributed_tracing:\
+    \ Whether to enable sending LangSmith distributed tracing headers.\n        \"\
+    \"\"\n        self.assistant_id = assistant_id\n        if name is None:\n   \
+    \         self.name = assistant_id\n        else:\n            self.name = name\n\
+    \        self.config = config\n        self.distributed_tracing = distributed_tracing\n\
+    \n        if client is None and url is not None:\n            client = get_client(url=url,\
+    \ api_key=api_key, headers=headers)\n        self.client = client\n\n        if\
+    \ sync_client is None and url is not None:\n            sync_client = get_sync_client(url=url,\
+    \ api_key=api_key, headers=headers)\n        self.sync_client = sync_client"
+  - "async def test_async_entrypoint_without_checkpointer() -> None:\n    \"\"\"Test\
+    \ no checkpointer.\"\"\"\n    states = []\n    config = {\"configurable\": {\"\
+    thread_id\": \"1\"}}\n\n    # Test without previous\n    @entrypoint()\n    async\
+    \ def foo(inputs: Any) -> Any:\n        states.append(inputs)\n        return\
+    \ inputs\n\n    assert (await foo.ainvoke({\"a\": \"1\"}, config)) == {\"a\":\
+    \ \"1\"}\n\n    @entrypoint()\n    async def foo(inputs: Any, *, previous: Any)\
+    \ -> Any:\n        states.append(previous)\n        return {\"previous\": previous,\
+    \ \"current\": inputs}\n\n    assert (await foo.ainvoke({\"a\": \"1\"}, config))\
+    \ == {\n        \"current\": {\"a\": \"1\"},\n        \"previous\": None,\n  \
+    \  }\n    assert (await foo.ainvoke({\"a\": \"1\"}, config)) == {\n        \"\
+    current\": {\"a\": \"1\"},\n        \"previous\": None,\n    }"
+  - "class _InjectedStatePydanticV2Schema(BaseModel):\n    messages: list\n    foo:\
+    \ str"
+- source_sentence: Explain the validate_autoresponse logic
+  sentences:
+  - "def task_path_str(tup: str | int | tuple) -> str:\n    \"\"\"Generate a string\
+    \ representation of the task path.\"\"\"\n    return (\n        f\"~{', '.join(task_path_str(x)\
+    \ for x in tup)}\"\n        if isinstance(tup, (tuple, list))\n        else f\"\
+    {tup:010d}\"\n        if isinstance(tup, int)\n        else str(tup)\n    )"
+  - "def ValueType(self) -> type[Value]:\n        \"\"\"The type of the value stored\
+    \ in the channel.\"\"\"\n        return self.typ"
+  - "def validate_autoresponse(cls, v):\n            if v is not None and not isinstance(v,\
+    \ dict):\n                raise TypeError(\"autoresponse must be a dict or None\"\
+    )\n            return v"
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy@1
+- cosine_accuracy@3
+- cosine_accuracy@5
+- cosine_accuracy@10
+- cosine_precision@1
+- cosine_precision@3
+- cosine_precision@5
+- cosine_precision@10
+- cosine_recall@1
+- cosine_recall@3
+- cosine_recall@5
+- cosine_recall@10
+- cosine_ndcg@10
+- cosine_mrr@10
+- cosine_map@100
+model-index:
+- name: codeBert dense retriever
+  results:
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 768
+      type: dim_768
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.65
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.8
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.85
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 1.0
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.65
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.2666666666666666
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.17000000000000007
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.10000000000000002
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.65
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.8
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.85
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 1.0
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.8047507161733674
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.7455555555555555
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.7455555555555555
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 512
+      type: dim_512
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.7
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.75
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.85
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.95
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.7
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.24999999999999994
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.17000000000000007
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.09500000000000001
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.7
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.75
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.85
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.95
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.7959488813947496
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.7499999999999999
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.7545454545454545
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 256
+      type: dim_256
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.65
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.75
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.8
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.95
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.65
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.24999999999999994
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.16000000000000006
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.09500000000000001
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.65
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.75
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.8
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.95
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.7682506698908595
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.7141666666666666
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.7180128205128204
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 128
+      type: dim_128
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.6
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.75
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.9
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.9
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.6
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.24999999999999994
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.18000000000000005
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.09000000000000002
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.6
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.75
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.9
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.9
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.7417655963056966
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.6908333333333333
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.6987121212121211
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 64
+      type: dim_64
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.55
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.7
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.75
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.95
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.55
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.2333333333333333
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.15000000000000005
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.09500000000000001
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.55
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.7
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.75
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.95
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.7155704014087189
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.6454166666666665
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.647202380952381
+      name: Cosine Map@100
+---
+# codeBert dense retriever
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [shubharuidas/codebert-embed-base-dense-retriever](https://huggingface.co/shubharuidas/codebert-embed-base-dense-retriever). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [shubharuidas/codebert-embed-base-dense-retriever](https://huggingface.co/shubharuidas/codebert-embed-base-dense-retriever) <!-- at revision 9594580ae943039d0b85feb304404f9b2bb203ce -->
+- **Maximum Sequence Length:** 512 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+- **Language:** en
+- **License:** apache-2.0
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'RobertaModel'})
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("anaghaj111/codebert-base-code-embed-mrl-langchain-langgraph")
+# Run inference
+sentences = [
+    'Explain the validate_autoresponse logic',
+    'def validate_autoresponse(cls, v):\n            if v is not None and not isinstance(v, dict):\n                raise TypeError("autoresponse must be a dict or None")\n            return v',
+    'def task_path_str(tup: str | int | tuple) -> str:\n    """Generate a string representation of the task path."""\n    return (\n        f"~{\', \'.join(task_path_str(x) for x in tup)}"\n        if isinstance(tup, (tuple, list))\n        else f"{tup:010d}"\n        if isinstance(tup, int)\n        else str(tup)\n    )',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+# tensor([[1.0000, 0.8070, 0.2282],
+#         [0.8070, 1.0000, 0.3158],
+#         [0.2282, 0.3158, 1.0000]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Information Retrieval
+* Dataset: `dim_768`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 768
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.65       |
+| cosine_accuracy@3   | 0.8        |
+| cosine_accuracy@5   | 0.85       |
+| cosine_accuracy@10  | 1.0        |
+| cosine_precision@1  | 0.65       |
+| cosine_precision@3  | 0.2667     |
+| cosine_precision@5  | 0.17       |
+| cosine_precision@10 | 0.1        |
+| cosine_recall@1     | 0.65       |
+| cosine_recall@3     | 0.8        |
+| cosine_recall@5     | 0.85       |
+| cosine_recall@10    | 1.0        |
+| **cosine_ndcg@10**  | **0.8048** |
+| cosine_mrr@10       | 0.7456     |
+| cosine_map@100      | 0.7456     |
+#### Information Retrieval
+* Dataset: `dim_512`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 512
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.7        |
+| cosine_accuracy@3   | 0.75       |
+| cosine_accuracy@5   | 0.85       |
+| cosine_accuracy@10  | 0.95       |
+| cosine_precision@1  | 0.7        |
+| cosine_precision@3  | 0.25       |
+| cosine_precision@5  | 0.17       |
+| cosine_precision@10 | 0.095      |
+| cosine_recall@1     | 0.7        |
+| cosine_recall@3     | 0.75       |
+| cosine_recall@5     | 0.85       |
+| cosine_recall@10    | 0.95       |
+| **cosine_ndcg@10**  | **0.7959** |
+| cosine_mrr@10       | 0.75       |
+| cosine_map@100      | 0.7545     |
+#### Information Retrieval
+* Dataset: `dim_256`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 256
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.65       |
+| cosine_accuracy@3   | 0.75       |
+| cosine_accuracy@5   | 0.8        |
+| cosine_accuracy@10  | 0.95       |
+| cosine_precision@1  | 0.65       |
+| cosine_precision@3  | 0.25       |
+| cosine_precision@5  | 0.16       |
+| cosine_precision@10 | 0.095      |
+| cosine_recall@1     | 0.65       |
+| cosine_recall@3     | 0.75       |
+| cosine_recall@5     | 0.8        |
+| cosine_recall@10    | 0.95       |
+| **cosine_ndcg@10**  | **0.7683** |
+| cosine_mrr@10       | 0.7142     |
+| cosine_map@100      | 0.718      |
+#### Information Retrieval
+* Dataset: `dim_128`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 128
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.6        |
+| cosine_accuracy@3   | 0.75       |
+| cosine_accuracy@5   | 0.9        |
+| cosine_accuracy@10  | 0.9        |
+| cosine_precision@1  | 0.6        |
+| cosine_precision@3  | 0.25       |
+| cosine_precision@5  | 0.18       |
+| cosine_precision@10 | 0.09       |
+| cosine_recall@1     | 0.6        |
+| cosine_recall@3     | 0.75       |
+| cosine_recall@5     | 0.9        |
+| cosine_recall@10    | 0.9        |
+| **cosine_ndcg@10**  | **0.7418** |
+| cosine_mrr@10       | 0.6908     |
+| cosine_map@100      | 0.6987     |
+#### Information Retrieval
+* Dataset: `dim_64`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 64
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.55       |
+| cosine_accuracy@3   | 0.7        |
+| cosine_accuracy@5   | 0.75       |
+| cosine_accuracy@10  | 0.95       |
+| cosine_precision@1  | 0.55       |
+| cosine_precision@3  | 0.2333     |
+| cosine_precision@5  | 0.15       |
+| cosine_precision@10 | 0.095      |
+| cosine_recall@1     | 0.55       |
+| cosine_recall@3     | 0.7        |
+| cosine_recall@5     | 0.75       |
+| cosine_recall@10    | 0.95       |
+| **cosine_ndcg@10**  | **0.7156** |
+| cosine_mrr@10       | 0.6454     |
+| cosine_map@100      | 0.6472     |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 180 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 180 samples:
+  |         | anchor                                                                             | positive                                                                             |
+  |:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                               |
+  | details | <ul><li>min: 6 tokens</li><li>mean: 14.07 tokens</li><li>max: 354 tokens</li></ul> | <ul><li>min: 14 tokens</li><li>mean: 272.19 tokens</li><li>max: 512 tokens</li></ul> |
+* Samples:
+  | anchor                                            | positive                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+  |:--------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>Best practices for test_search_items</code> | <code>def test_search_items(<br>    fake_embeddings: CharacterEmbeddings,<br>) -> None:<br>    """Test search_items functionality by calling store methods directly."""<br>    base = "test_search_items"<br>    test_namespaces = [<br>        (base, "documents", "user1"),<br>        (base, "documents", "user2"),<br>        (base, "reports", "department1"),<br>        (base, "reports", "department2"),<br>    ]<br>    test_items = [<br>        {"title": "Doc 1", "author": "John Doe", "tags": ["important"]},<br>        {"title": "Doc 2", "author": "Jane Smith", "tags": ["draft"]},<br>        {"title": "Report A", "author": "John Doe", "tags": ["final"]},<br>        {"title": "Report B", "author": "Alice Johnson", "tags": ["draft"]},<br>    ]<br><br>    with create_vector_store(<br>        fake_embeddings, text_fields=["key0", "key1", "key3"]<br>    ) as store:<br>        # Insert test data<br>        for ns, item in zip(test_namespaces, test_items, strict=False):<br>            key = f"item_{ns[-1]}"<br>            store.put(ns, key, item)<br><br>        # 1. Search documen...</code>    |
+  | <code>How does async store work in Python?</code> | <code>async def store(request) -> AsyncIterator[AsyncPostgresStore]:<br>    database = f"test_{uuid.uuid4().hex[:16]}"<br>    uri_parts = DEFAULT_URI.split("/")<br>    uri_base = "/".join(uri_parts[:-1])<br>    query_params = ""<br>    if "?" in uri_parts[-1]:<br>        db_name, query_params = uri_parts[-1].split("?", 1)<br>        query_params = "?" + query_params<br><br>    conn_string = f"{uri_base}/{database}{query_params}"<br>    admin_conn_string = DEFAULT_URI<br>    ttl_config = {<br>        "default_ttl": TTL_MINUTES,<br>        "refresh_on_read": True,<br>        "sweep_interval_minutes": TTL_MINUTES / 2,<br>    }<br>    async with await AsyncConnection.connect(<br>        admin_conn_string, autocommit=True<br>    ) as conn:<br>        await conn.execute(f"CREATE DATABASE {database}")<br>    try:<br>        async with AsyncPostgresStore.from_conn_string(<br>            conn_string, ttl=ttl_config<br>        ) as store:<br>            store.MIGRATIONS = [<br>                (<br>                    mig.replace("ttl_minutes INT;", "ttl_minutes FLOAT;")<br>       ...</code> |
+  | <code>How to implement list?</code>               | <code>def list(<br>        self,<br>        config: RunnableConfig \| None,<br>        *,<br>        filter: dict[str, Any] \| None = None,<br>        before: RunnableConfig \| None = None,<br>        limit: int \| None = None,<br>    ) -> Iterator[CheckpointTuple]:<br>        """List checkpoints from the database.<br><br>        This method retrieves a list of checkpoint tuples from the Postgres database based<br>        on the provided config. For ShallowPostgresSaver, this method returns a list with<br>        ONLY the most recent checkpoint.<br>        """<br>        aiter_ = self.alist(config, filter=filter, before=before, limit=limit)<br>        while True:<br>            try:<br>                yield asyncio.run_coroutine_threadsafe(<br>                    anext(aiter_),  # type: ignore[arg-type]  # noqa: F821<br>                    self.loop,<br>                ).result()<br>            except StopAsyncIteration:<br>                break</code>                                                                                                                                    |
+* Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
+  ```json
+  {
+      "loss": "MultipleNegativesRankingLoss",
+      "matryoshka_dims": [
+          768,
+          512,
+          256,
+          128,
+          64
+      ],
+      "matryoshka_weights": [
+          1,
+          1,
+          1,
+          1,
+          1
+      ],
+      "n_dims_per_step": -1
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: epoch
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 4
+- `gradient_accumulation_steps`: 16
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 2
+- `lr_scheduler_type`: cosine
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0.1
+- `fp16`: True
+- `load_best_model_at_end`: True
+- `optim`: adamw_torch
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `do_predict`: False
+- `eval_strategy`: epoch
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 4
+- `gradient_accumulation_steps`: 16
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 2
+- `max_steps`: -1
+- `lr_scheduler_type`: cosine
+- `lr_scheduler_kwargs`: None
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0.1
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `enable_jit_checkpoint`: False
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `use_cpu`: False
+- `seed`: 42
+- `data_seed`: None
+- `bf16`: False
+- `fp16`: True
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: -1
+- `ddp_backend`: None
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: True
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `parallelism_config`: None
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `group_by_length`: False
+- `length_column_name`: length
+- `project`: huggingface
+- `trackio_space_id`: trackio
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_num_input_tokens_seen`: no
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: True
+- `use_cache`: False
+- `prompts`: None
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+| Epoch   | Step  | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
+|:-------:|:-----:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|
+| 1.0     | 3     | 0.7612                 | 0.7137                 | 0.7083                 | 0.6926                 | 0.6624                |
+| **2.0** | **6** | **0.8048**             | **0.7959**             | **0.7683**             | **0.7418**             | **0.7156**            |
+* The bold row denotes the saved checkpoint.
+### Framework Versions
+- Python: 3.14.0
+- Sentence Transformers: 5.2.1
+- Transformers: 5.0.0
+- PyTorch: 2.10.0
+- Accelerate: 1.12.0
+- Datasets: 4.5.0
+- Tokenizers: 0.22.2
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MatryoshkaLoss
+```bibtex
+@misc{kusupati2024matryoshka,
+    title={Matryoshka Representation Learning},
+    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
+    year={2024},
+    eprint={2205.13147},
+    archivePrefix={arXiv},
+    primaryClass={cs.LG}
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "add_cross_attention": false,
+  "architectures": [
+    "RobertaModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "is_decoder": false,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tie_word_embeddings": true,
+  "transformers_version": "5.0.0",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "model_type": "SentenceTransformer",
+  "__version__": {
+    "sentence_transformers": "5.2.1",
+    "transformers": "5.0.0",
+    "pytorch": "2.10.0"
+  },
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:180fd754acf3876c3ede6d2ba40e6ba3eaecee1115d82481953408051451d3ca
+size 498604880

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 512,
+    "do_lower_case": false
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "model_specific_special_tokens": {},
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}