Add new SentenceTransformer model

Browse files

Files changed (15) hide show

.gitattributes +1 -0
1_Pooling/config.json +10 -0
2_Dense/config.json +6 -0
2_Dense/model.safetensors +3 -0
3_Dense/config.json +6 -0
3_Dense/model.safetensors +3 -0
README.md +438 -0
config.json +60 -0
config_sentence_transformers.json +26 -0
model.safetensors +3 -0
modules.json +32 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +33 -0
tokenizer.json +3 -0
tokenizer_config.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 768,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": true,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
+}

2_Dense/config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "in_features": 768,
+    "out_features": 3072,
+    "bias": false,
+    "activation_function": "torch.nn.modules.linear.Identity"
+}

2_Dense/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6fe0bff2c4fc9b269f3de9d67244d99fe8166fc74518cd83df4183d8e06a9ed
+size 9437272

3_Dense/config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "in_features": 3072,
+    "out_features": 768,
+    "bias": false,
+    "activation_function": "torch.nn.modules.linear.Identity"
+}

3_Dense/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d08c869c071243ed84412b3099683304e93ba849245a155ee16debbb2afc9f92
+size 9437272

README.md ADDED Viewed

	@@ -0,0 +1,438 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:1000
+- loss:MultipleNegativesRankingLoss
+base_model: google/embeddinggemma-300m
+widget:
+- source_sentence: 'Theme: Dystopian surveillance and control, Ethical implications
+    of autonomous warfare, Human agency versus machine dominance, Resistance against
+    dehumanization, Unintended consequences of technological advancement, Manipulation
+    and hidden agendas, Redemption and moral choice'
+  sentences:
+  - 'Theme: Discovery of ancient mysteries, Conflict between community values and
+    greed, Sacrifice for the greater good, Renewal and hope through art, The power
+    of collective action'
+  - unknown
+  - 'Theme: AI-driven warfare and its ethical implications, Human agency versus technological
+    determinism, Surveillance and the hunt for dissent, Rebellion against oppressive
+    systems, The moral dilemma of dismantling versus repurposing destructive technology,
+    Hidden sabotage and the foresight of architects, The fragility of global security
+    in a tech‑centric world'
+  - 96_theme_cross
+- source_sentence: 'Theme: Harmony with nature, Mystical forces and ancient traditions,
+    Hidden threats and the struggle against darkness, Courage and personal growth,
+    Connection to the land, Community resilience and cooperation, Restoration of balance'
+  sentences:
+  - 'Actions: Elara discovers a hidden grove where forest spirits gather. -> She learns
+    that a dark entity, long imprisoned beneath the village, is stirring. -> Guided
+    by the wise elder Thorne and a mysterious amulet, she prepares for a perilous
+    journey. -> Elara embarks on the journey, facing trials that test her courage
+    and resolve. -> During the trials, she discovers the true power of her connection
+    to the land. -> With the help of her fellow villagers and the spirits of the forest,
+    she seals the entity away once more. -> The village’s balance is restored and
+    prosperity is ensured.'
+  - unknown
+  - 'Theme: coexistence with nature, supernatural forces, bravery and determination,
+    destiny and personal growth, community support, renewal and protection of heritage,
+    bond with the land'
+  - 167_theme_vs_action
+- source_sentence: 'Theme: Immortality versus isolation, Ethical implications of scientific
+    discovery, The cost of eternal youth, Power and exploitation of knowledge, Sacrifice
+    to prevent misuse'
+  sentences:
+  - 'Theme: The paradox of immortality versus the inevitability of death, Isolation
+    that accompanies prolonged life, Ethical dilemmas surrounding the use of natural
+    wonders for profit, The tension between scientific curiosity and personal sacrifice,
+    The cost of preserving nature’s secrets'
+  - unknown
+  - 'Actions: Discover a rare plant in a remote jungle that can halt aging. -> Develop
+    an experimental serum based on the plant. -> Test the serum on herself, successfully
+    stopping her physical aging. -> Live for decades while watching loved ones age
+    and die. -> A ruthless biotech corporation uncovers her secret. -> Engage in a
+    tense confrontation with the corporation. -> Destroy her research and the last
+    sample of the plant to prevent misuse. -> Walk away from the laboratory, resigned
+    to eternal youth and solitude.'
+  - 56_theme_vs_action
+- source_sentence: 'Outcomes: Sarah and Alex discover that companionship, whether
+    human or artificial, can transcend conventional boundaries, leaving both transformed
+    and redefining connection in an increasingly digital world.'
+  sentences:
+  - unknown
+  - 'Outcomes: Mia and Orion both experience profound personal change. Mia overcomes
+    her fear of solitude and gains a deeper understanding of human connection. Orion
+    attains a form of independence while maintaining its role as a companion. Their
+    relationship demonstrates that companionship, whether human or artificial, can
+    surpass conventional limits.'
+  - 'Outcomes: Ollie learns that true strength lies in collaboration and understanding.
+    He forges an unbreakable bond between the living and the spirits. The united realms
+    leave a lasting legacy for future generations.'
+  - 67_outcome_cross
+- source_sentence: 'Theme: Ethics of de‑extinction and scientific responsibility,
+    Human ambition versus natural limits, Emergence of higher intelligence in extinct
+    species, Corporate militarization of biological research, Coexistence and harmony
+    between ancient and modern life forms'
+  sentences:
+  - unknown
+  - 'Actions: Dr. Sarah Chen extracts viable DNA from a Triceratops fossil. -> She
+    creates the first living dinosaur in 65 million years, nicknamed Trinity. -> The
+    creature is publicly revealed, sparking global debate on de‑extinction ethics.
+    -> Trinity exhibits unexpected higher intelligence. -> Biotech magnate Marcus
+    Voss attempts to weaponize the research for military use. -> A confrontation occurs
+    at the research facility. -> Trinity escapes into the nearby wilderness and encounters
+    modern wildlife. -> Dr. Chen decides to destroy her research data to prevent further
+    exploitation. -> Trinity disappears into a remote forest preserve. -> Final scene
+    shows Trinity peacefully coexisting with a herd of elk.'
+  - 85_theme_vs_action
+  - 'Theme: The ethical limits of scientific ambition, The moral implications of resurrecting
+    extinct species, The clash between corporate exploitation and scientific integrity,
+    The unexpected cognitive complexity of prehistoric life, The possibility of coexistence
+    between past and present ecosystems'
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+---
+# SentenceTransformer based on google/embeddinggemma-300m
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [google/embeddinggemma-300m](https://huggingface.co/google/embeddinggemma-300m). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [google/embeddinggemma-300m](https://huggingface.co/google/embeddinggemma-300m) <!-- at revision 57c266a740f537b4dc058e1b0cda161fd15afa75 -->
+- **Maximum Sequence Length:** 2048 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 2048, 'do_lower_case': False, 'architecture': 'Gemma3TextModel'})
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Dense({'in_features': 768, 'out_features': 3072, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})
+  (3): Dense({'in_features': 3072, 'out_features': 768, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})
+  (4): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("ndsanjana/embedgemma_ns")
+# Run inference
+queries = [
+    "Theme: Ethics of de\u2011extinction and scientific responsibility, Human ambition versus natural limits, Emergence of higher intelligence in extinct species, Corporate militarization of biological research, Coexistence and harmony between ancient and modern life forms",
+]
+documents = [
+    'Theme: The ethical limits of scientific ambition, The moral implications of resurrecting extinct species, The clash between corporate exploitation and scientific integrity, The unexpected cognitive complexity of prehistoric life, The possibility of coexistence between past and present ecosystems',
+    'Actions: Dr. Sarah Chen extracts viable DNA from a Triceratops fossil. -> She creates the first living dinosaur in 65 million years, nicknamed Trinity. -> The creature is publicly revealed, sparking global debate on de‑extinction ethics. -> Trinity exhibits unexpected higher intelligence. -> Biotech magnate Marcus Voss attempts to weaponize the research for military use. -> A confrontation occurs at the research facility. -> Trinity escapes into the nearby wilderness and encounters modern wildlife. -> Dr. Chen decides to destroy her research data to prevent further exploitation. -> Trinity disappears into a remote forest preserve. -> Final scene shows Trinity peacefully coexisting with a herd of elk.',
+    '85_theme_vs_action',
+]
+query_embeddings = model.encode_query(queries)
+document_embeddings = model.encode_document(documents)
+print(query_embeddings.shape, document_embeddings.shape)
+# [1, 768] [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(query_embeddings, document_embeddings)
+print(similarities)
+# tensor([[ 0.7758,  0.1831, -0.0576]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 1,000 training samples
+* Columns: <code>anchor</code>, <code>positive</code>, <code>negative</code>, <code>triplet_id</code>, and <code>source</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                             | positive                                                                            | negative                                                                            | triplet_id                                                                       | source                                                                         |
+  |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:-------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                              | string                                                                              | string                                                                           | string                                                                         |
+  | details | <ul><li>min: 18 tokens</li><li>mean: 80.7 tokens</li><li>max: 204 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 81.97 tokens</li><li>max: 201 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 83.77 tokens</li><li>max: 230 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 9.25 tokens</li><li>max: 11 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 3.0 tokens</li><li>max: 3 tokens</li></ul> |
+* Samples:
+  | anchor                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | positive                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | negative                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | triplet_id                   | source               |
+  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------|:---------------------|
+  | <code>Theme: inheritance, haunted house, supernatural, grief and loss, revenge, family dynamics, possession, exorcism, unresolved trauma, moral choice</code>                                                                                                                                                                                                                                                                                                                                                                                                                                 | <code>Theme: Inheritance of legacy and the weight of family history, Supernatural haunting as a manifestation of unresolved trauma, The conflict between self-preservation and compassion, The cyclical nature of guilt and the desire for redemption, The tension between rational action and inexplicable forces</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | <code>Theme: grief and avoidance, emotional healing, isolation and its psychological effects, responsibility toward family, the interplay between scientific curiosity and personal emotion, the reflective power of nature, guilt and unresolved conflict</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | <code>0_theme_cross</code>   | <code>unknown</code> |
+  | <code>Actions: Family moves into inherited Victorian mansion -> Strange occurrences begin immediately -> Teenage daughter becomes primary target of supernatural activity -> Family researches property’s past and learns about reclusive widow and lost daughter -> Paranormal events intensify, threatening family safety -> Father attempts exorcism using items from hidden basement -> Exorcism angers the entity further -> Mother faces a critical choice: flee or help the spirit find peace by reuniting her with her daughter's remains -> Mother chooses to help the spirit</code> | <code>Actions: A newlywed couple inherits a sprawling ranch house in the desert from an estranged uncle. -> From the first night, bizarre phenomena (whispers, self-opening doors, sudden cold rooms) plague the household. -> The wife becomes the focal point of the disturbances, experiencing terrifying visions and speaking in unfamiliar voices. -> The couple investigates the property's history and learns that the former owner, an elderly hermit, died under suspicious circumstances after his young son accidentally died on the grounds. -> They discover that the hermit's ghost is desperately seeking someone to take his boy's place. -> The husband attempts to banish the spirit using ritual objects found in a concealed cellar. -> The ritual backfires, provoking the entity to greater violence and intensifying the supernatural assault. -> During the final confrontation, the wife faces an impossible decision: escape with her husband to safety or help the anguished ghost locate his son's hidden grave to...</code> | <code>Actions: Marine biologist accepts a research position at an isolated underwater station studying deep‑sea thermal vents. -> She leaves behind her estranged teenage son, who blames her for his father's recent death. -> During her six‑month assignment she discovers unusual bioluminescent organisms that respond to human emotions and memories. -> She spends more time observing the creatures, which triggers vivid recollections of her late husband and the unresolved guilt surrounding their final argument before his fatal accident. -> The organisms feed on her emotional energy, growing brighter and more active as her psychological state deteriorates. -> Her research partner becomes concerned about her erratic behavior and threatens to abort the mission. -> She realizes that her obsession with the creatures is a way of avoiding her grief and responsibility to her son. -> In the final act, she chooses to surface early and return home, accepting that healing requires facing her loss rather than ...</code> | <code>0_action_cross</code>  | <code>unknown</code> |
+  | <code>Outcomes: The mother’s decision to reunite the widow’s daughter’s remains brings peace to the spirit, ending the haunting. The family remains safe and can continue living in the house.</code>                                                                                                                                                                                                                                                                                                                                                                                         | <code>Outcomes: The story concludes with the wife's decision, leaving the haunting either unresolved if they escape or potentially resolved if they help the ghost find the grave. The final state is ambiguous, reflecting the unresolved tension between survival and compassion.</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | <code>Outcomes: She returns home, confronts her grief and responsibility toward her son, and begins the process of healing.</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | <code>0_outcome_cross</code> | <code>unknown</code> |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "gather_across_devices": false
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 10
+- `warmup_ratio`: 0.1
+- `fp16`: True
+- `prompts`: task: sentence similarity | query:
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 8
+- `per_device_eval_batch_size`: 8
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 10
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `bf16`: False
+- `fp16`: True
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `parallelism_config`: None
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch_fused
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `project`: huggingface
+- `trackio_space_id`: trackio
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: no
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: True
+- `prompts`: task: sentence similarity | query:
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: proportional
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+| Epoch | Step | Training Loss |
+|:-----:|:----:|:-------------:|
+| 0.8   | 100  | 0.0664        |
+| 1.6   | 200  | 0.017         |
+| 2.4   | 300  | 0.018         |
+| 3.2   | 400  | 0.005         |
+| 4.0   | 500  | 0.026         |
+| 4.8   | 600  | 0.0119        |
+| 5.6   | 700  | 0.0083        |
+| 6.4   | 800  | 0.0198        |
+| 7.2   | 900  | 0.0217        |
+| 8.0   | 1000 | 0.0123        |
+| 8.8   | 1100 | 0.0174        |
+| 9.6   | 1200 | 0.0112        |
+### Framework Versions
+- Python: 3.11.14
+- Sentence Transformers: 5.1.2
+- Transformers: 4.57.1
+- PyTorch: 2.9.1+cu128
+- Accelerate: 1.12.0
+- Datasets: 4.4.1
+- Tokenizers: 0.22.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "Gemma3TextModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "dtype": "float32",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 1152,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 2048,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 3,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 257,
+  "transformers_version": "4.57.1",
+  "use_bidirectional_attention": true,
+  "use_cache": true,
+  "vocab_size": 262144
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "model_type": "SentenceTransformer",
+  "__version__": {
+    "sentence_transformers": "5.1.2",
+    "transformers": "4.57.1",
+    "pytorch": "2.9.1+cu128"
+  },
+  "prompts": {
+    "query": "task: search result | query: ",
+    "document": "title: none | text: ",
+    "BitextMining": "task: search result | query: ",
+    "Clustering": "task: clustering | query: ",
+    "Classification": "task: classification | query: ",
+    "InstructionRetrieval": "task: code retrieval | query: ",
+    "MultilabelClassification": "task: classification | query: ",
+    "PairClassification": "task: sentence similarity | query: ",
+    "Reranking": "task: search result | query: ",
+    "Retrieval": "task: search result | query: ",
+    "Retrieval-query": "task: search result | query: ",
+    "Retrieval-document": "title: none | text: ",
+    "STS": "task: sentence similarity | query: ",
+    "Summarization": "task: summarization | query: "
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:308fc578fd3f7c7dbd37f11a00ab9266fb2fd611c2de03d900941985d2129c1d
+size 1211486072

modules.json ADDED Viewed

	@@ -0,0 +1,32 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Dense",
+    "type": "sentence_transformers.models.Dense"
+  },
+  {
+    "idx": 3,
+    "name": "3",
+    "path": "3_Dense",
+    "type": "sentence_transformers.models.Dense"
+  },
+  {
+    "idx": 4,
+    "name": "4",
+    "path": "4_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 2048,
+    "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:216e2a79606fe879c9f17c529c71cd241338407fd5646b595ffd3c4b9ea1d503
+size 33385262

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff