Training in progress, step 396

Browse files

Files changed (5) hide show

README.md +108 -121
eval/Information-Retrieval_evaluation_chess-ir-tokens_results.csv +1 -3
eval/Information-Retrieval_evaluation_chess-ir_results.csv +1 -3
model.safetensors +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -7,50 +7,64 @@ tags:
 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
-- dataset_size:5832592
 - loss:MatryoshkaLoss
 - loss:MultipleNegativesRankingLoss
 widget:
-- source_sentence: crushing middlegame sacrifice short
   sentences:
-  - themes advantage middlegame short moves f4f7 c4d5 f7d5 b3d5 f4f7+c4d5 c4d5+f7d5
-    f7d5+b3d5
-  - themes advantage fork middlegame short opening Four Knights Game Four Knights
-    Game Italian Variation moves c8f5 d5e7 g8h8 e7f5 c8f5+d5e7 d5e7+g8h8 g8h8+e7f5
-  - themes crushing middlegame sacrifice short moves g6g4 e1e6 f7e6 d2h6 g6g4+e1e6
-    e1e6+f7e6 f7e6+d2h6
-- source_sentence: crushing endgame long
   sentences:
-  - themes crushing endgame long moves e2c2 f5g5 c2g2 g5h6 g2h2 h6g7 e2c2+f5g5 f5g5+c2g2
-    c2g2+g5h6 g5h6+g2h2 g2h2+h6g7
-  - themes crushing endgame fork hangingPiece long moves c7c3 b2c3 d5f7 g5g7 f7g7
-    f8g7 c7c3+b2c3 b2c3+d5f7 d5f7+g5g7 g5g7+f7g7 f7g7+f8g7
-  - themes crushing intermezzo middlegame short moves c5b4 d1d3 f6e7 a3b4 c5b4+d1d3
-    d1d3+f6e7 f6e7+a3b4
-- source_sentence: crushing endgame fork short
   sentences:
-  - themes crushing endgame rookEndgame short skewer moves b4b3 h7h8 f8g7 h8b8 b4b3+h7h8
-    h7h8+f8g7 f8g7+h8b8
-  - themes crushing endgame fork short moves f2f1 f3d2 f1e2 d2c4 f2f1+f3d2 f3d2+f1e2
-    f1e2+d2c4
-  - themes mate mateIn1 middlegame oneMove moves d7d6 g3g7 d7d6+g3g7
-- source_sentence: crushing fork middlegame veryLong
   sentences:
-  - themes crushing endgame fork master short moves f7f5 a6g6 g5g6 h4g6 f7f5+a6g6
-    a6g6+g5g6 g5g6+h4g6
-  - themes attraction discoveredCheck doubleCheck long mate mateIn3 opening operaMate
-    sacrifice opening Bishops Opening Bishops Opening Ponziani Gambit moves h8g8 f6d8
-    e8d8 d2g5 d8e8 d1d8 h8g8+f6d8 f6d8+e8d8 e8d8+d2g5 d2g5+d8e8 d8e8+d1d8
-  - themes crushing fork middlegame veryLong moves h6h7 e8h5 f3g3 c5e3 h7h8q e3f4
-    g3g2 h5g4 g2h1 f4d2 a1g1 g4f3 h6h7+e8h5 e8h5+f3g3 f3g3+c5e3 c5e3+h7h8q h7h8q+e3f4
-    e3f4+g3g2 g3g2+h5g4 h5g4+g2h1 g2h1+f4d2 f4d2+a1g1 a1g1+g4f3
-- source_sentence: endgame mate mateIn2 pillsburysMate short
   sentences:
-  - themes bishopEndgame crushing defensiveMove endgame master short moves g3g4 h5h4
-    f4g5 h6g5 g3g4+h5h4 h5h4+f4g5 f4g5+h6g5
-  - themes endgame mate mateIn2 pillsburysMate short moves c4e3 b5b8 f5c8 b8c8 c4e3+b5b8
-    b5b8+f5c8 f5c8+b8c8
-  - themes endgame mate mateIn1 oneMove moves e5f4 g3g1 e5f4+g3g1
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 metrics:
@@ -74,31 +88,31 @@ model-index:
       type: chess-ir
     metrics:
     - type: cosine_accuracy@1
-      value: 0.01
       name: Cosine Accuracy@1
     - type: cosine_accuracy@10
-      value: 0.055
       name: Cosine Accuracy@10
     - type: cosine_precision@1
-      value: 0.01
       name: Cosine Precision@1
     - type: cosine_precision@10
-      value: 0.006
       name: Cosine Precision@10
     - type: cosine_recall@1
-      value: 0.003333333333333333
       name: Cosine Recall@1
     - type: cosine_recall@10
-      value: 0.019999999999999997
       name: Cosine Recall@10
     - type: cosine_ndcg@10
-      value: 0.014141653573050736
       name: Cosine Ndcg@10
     - type: cosine_mrr@10
-      value: 0.02086111111111111
       name: Cosine Mrr@10
     - type: cosine_map@100
-      value: 0.012561680163147302
       name: Cosine Map@100
   - task:
       type: information-retrieval
@@ -108,31 +122,31 @@ model-index:
       type: chess-ir-tokens
     metrics:
     - type: cosine_accuracy@1
-      value: 0.037037037037037035
       name: Cosine Accuracy@1
     - type: cosine_accuracy@10
-      value: 0.21164021164021163
       name: Cosine Accuracy@10
     - type: cosine_precision@1
-      value: 0.037037037037037035
       name: Cosine Precision@1
     - type: cosine_precision@10
-      value: 0.047619047619047616
       name: Cosine Precision@10
     - type: cosine_recall@1
-      value: 0.0025144161912381744
       name: Cosine Recall@1
     - type: cosine_recall@10
-      value: 0.02212990521949281
       name: Cosine Recall@10
     - type: cosine_ndcg@10
-      value: 0.0517090496324674
       name: Cosine Ndcg@10
     - type: cosine_mrr@10
-      value: 0.08710842361636012
       name: Cosine Mrr@10
     - type: cosine_map@100
-      value: 0.028156284478181654
       name: Cosine Map@100
 ---
@@ -184,12 +198,12 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer("oneryalcin/static-embedding-chess")
 # Run inference
 queries = [
-    'endgame mate mateIn2 pillsburysMate short',
 ]
 documents = [
-    'themes endgame mate mateIn2 pillsburysMate short moves c4e3 b5b8 f5c8 b8c8 c4e3+b5b8 b5b8+f5c8 f5c8+b8c8',
-    'themes bishopEndgame crushing defensiveMove endgame master short moves g3g4 h5h4 f4g5 h6g5 g3g4+h5h4 h5h4+f4g5 f4g5+h6g5',
-    'themes endgame mate mateIn1 oneMove moves e5f4 g3g1 e5f4+g3g1',
 ]
 query_embeddings = model.encode_query(queries)
 document_embeddings = model.encode_document(documents)
@@ -199,7 +213,7 @@ print(query_embeddings.shape, document_embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(query_embeddings, document_embeddings)
 print(similarities)
-# tensor([[ 0.9826, -0.1530,  0.0366]])
 ```
 <!--
 ### Direct Usage (Transformers)
@@ -234,17 +248,17 @@ You can finetune this model on your own dataset.
 * Datasets: `chess-ir` and `chess-ir-tokens`
 * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.sentence_transformer.evaluation.InformationRetrievalEvaluator)
-| Metric              | chess-ir   | chess-ir-tokens |
-|:--------------------|:-----------|:----------------|
-| cosine_accuracy@1   | 0.01       | 0.037           |
-| cosine_accuracy@10  | 0.055      | 0.2116          |
-| cosine_precision@1  | 0.01       | 0.037           |
-| cosine_precision@10 | 0.006      | 0.0476          |
-| cosine_recall@1     | 0.0033     | 0.0025          |
-| cosine_recall@10    | 0.02       | 0.0221          |
-| **cosine_ndcg@10**  | **0.0141** | **0.0517**      |
-| cosine_mrr@10       | 0.0209     | 0.0871          |
-| cosine_map@100      | 0.0126     | 0.0282          |
 <!--
 ## Bias, Risks and Limitations
@@ -264,20 +278,20 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 5,832,592 training samples
 * Columns: <code>anchor</code> and <code>positive</code>
 * Approximate statistics based on the first 100 samples:
   |          | anchor                                                                                          | positive                                                                                         |
   |:---------|:------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|
   | type     | string                                                                                          | string                                                                                           |
   | modality | text                                                                                            | text                                                                                             |
-  | details  | <ul><li>min: 14 characters</li><li>mean: 45.72 characters</li><li>max: 107 characters</li></ul> | <ul><li>min: 61 characters</li><li>mean: 121.98 characters</li><li>max: 233 characters</li></ul> |
 * Samples:
-  | anchor                                                                 | positive                                                                                                                              |
-  |:-----------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------|
-  | <code>crushing endgame fork short</code>                               | <code>themes crushing endgame fork short moves f7f6 g5e6 g7h6 e6c5 f7f6+g5e6 g5e6+g7h6 g7h6+e6c5</code>                               |
-  | <code>crushing discoveredAttack kingsideAttack middlegame short</code> | <code>themes crushing discoveredAttack kingsideAttack middlegame short moves e4g3 f3g3 f2g3 h5e2 e4g3+f3g3 f3g3+f2g3 f2g3+h5e2</code> |
-  | <code>crushing middlegame short</code>                                 | <code>themes crushing middlegame short moves d7c8 e2g4 c8c7 c3b5 d7c8+e2g4 e2g4+c8c7 c8c7+c3b5</code>                                 |
 * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
   ```json
   {
@@ -303,12 +317,12 @@ You can finetune this model on your own dataset.
 ### Training Hyperparameters
 #### Non-Default Hyperparameters
-- `per_device_train_batch_size`: 2048
-- `num_train_epochs`: 1
-- `learning_rate`: 0.05
 - `warmup_steps`: 0.1
 - `weight_decay`: 0.01
-- `per_device_eval_batch_size`: 2048
 - `push_to_hub`: True
 - `hub_model_id`: oneryalcin/static-embedding-chess
 - `load_best_model_at_end`: True
@@ -317,10 +331,10 @@ You can finetune this model on your own dataset.
 #### All Hyperparameters
 <details><summary>Click to expand</summary>
-- `per_device_train_batch_size`: 2048
-- `num_train_epochs`: 1
 - `max_steps`: -1
-- `learning_rate`: 0.05
 - `lr_scheduler_type`: linear
 - `lr_scheduler_kwargs`: None
 - `warmup_steps`: 0.1
@@ -361,7 +375,7 @@ You can finetune this model on your own dataset.
 - `trackio_space_id`: None
 - `trackio_bucket_id`: None
 - `trackio_static_space_id`: None
-- `per_device_eval_batch_size`: 2048
 - `prediction_loss_only`: True
 - `eval_on_start`: False
 - `eval_do_concat_batches`: True
@@ -422,46 +436,19 @@ You can finetune this model on your own dataset.
 ### Training Logs
 | Epoch  | Step | Training Loss | chess-ir_cosine_ndcg@10 | chess-ir-tokens_cosine_ndcg@10 |
 |:------:|:----:|:-------------:|:-----------------------:|:------------------------------:|
-| -1     | -1   | -             | 0.0087                  | 0.0476                         |
-| 0.0004 | 1    | 25.5090       | -                       | -                              |
-| 0.0102 | 29   | 24.7398       | -                       | -                              |
-| 0.0204 | 58   | 20.8309       | -                       | -                              |
-| 0.0305 | 87   | 16.5176       | -                       | -                              |
-| 0.0407 | 116  | 12.8534       | -                       | -                              |
-| 0.0509 | 145  | 10.2759       | -                       | -                              |
-| 0.0611 | 174  | 8.7313        | -                       | -                              |
-| 0.0713 | 203  | 7.8373        | -                       | -                              |
-| 0.0815 | 232  | 7.3665        | -                       | -                              |
-| 0.0916 | 261  | 7.0534        | -                       | -                              |
-| 0.1001 | 285  | -             | 0.0403                  | 0.0964                         |
-| 0.1018 | 290  | 6.8225        | -                       | -                              |
-| 0.1120 | 319  | 6.6948        | -                       | -                              |
-| 0.1222 | 348  | 6.6811        | -                       | -                              |
-| 0.1324 | 377  | 6.5559        | -                       | -                              |
-| 0.1426 | 406  | 6.6007        | -                       | -                              |
-| 0.1527 | 435  | 6.5704        | -                       | -                              |
-| 0.1629 | 464  | 6.4524        | -                       | -                              |
-| 0.1731 | 493  | 6.4562        | -                       | -                              |
-| 0.1833 | 522  | 6.5016        | -                       | -                              |
-| 0.1935 | 551  | 6.4405        | -                       | -                              |
-| 0.2001 | 570  | -             | 0.0165                  | 0.0624                         |
-| 0.2037 | 580  | 6.5354        | -                       | -                              |
-| 0.2138 | 609  | 6.4492        | -                       | -                              |
-| 0.2240 | 638  | 6.4807        | -                       | -                              |
-| 0.2342 | 667  | 6.4568        | -                       | -                              |
-| 0.2444 | 696  | 6.4335        | -                       | -                              |
-| 0.2546 | 725  | 6.4693        | -                       | -                              |
-| 0.2647 | 754  | 6.4870        | -                       | -                              |
-| 0.2749 | 783  | 6.4468        | -                       | -                              |
-| 0.2851 | 812  | 6.4680        | -                       | -                              |
-| 0.2953 | 841  | 6.3538        | -                       | -                              |
-| 0.3002 | 855  | -             | 0.0141                  | 0.0517                         |
 ### Training Time
-- **Training**: 49.8 seconds
 - **Evaluation**: 0.1 seconds
-- **Total**: 49.9 seconds
 ### Framework Versions
 - Python: 3.12.10

 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
+- dataset_size:1619946
 - loss:MatryoshkaLoss
 - loss:MultipleNegativesRankingLoss
 widget:
+- source_sentence: kingsideAttack master [UNK] mateIn1 oneMove [UNK] [UNK] Defense
+    Sicilian Defense [UNK] Attack
   sentences:
+  - themes kingsideAttack master mate mateIn1 oneMove opening opening Sicilian Defense
+    Sicilian Defense Nyezhmetdinov-Rossolimo Attack moves f3e5 c6g2 f3e5+c6g2
+  - themes crushing middlegame queensideAttack sacrifice veryLong moves d7c7 b3e6
+    f7e6 e1e6 c8b8 f6d7 c7d7 e6d7 d7c7+b3e6 b3e6+f7e6 f7e6+e1e6 e1e6+c8b8 c8b8+f6d7
+    f6d7+c7d7 c7d7+e6d7
+  - themes advancedPawn crushing endgame veryLong zugzwang moves d4e6 c4e6 f7e6 h7g6
+    f8g8 f6f7 g8f8 g6f6 e6e5 f6e5 d4e6+c4e6 c4e6+f7e6 f7e6+h7g6 h7g6+f8g8 f8g8+f6f7
+    f6f7+g8f8 g8f8+g6f6 g6f6+e6e5 e6e5+f6e5
+- source_sentence: crushing intermezzo master middlegame sacrifice veryLong
   sentences:
+  - themes crushing endgame master masterVsMaster veryLong moves f5f6 c5e6 h5g6 h7g6
+    c3f3 d5b4 f3c6 b4c6 f5f6+c5e6 c5e6+h5g6 h5g6+h7g6 h7g6+c3f3 c3f3+d5b4 d5b4+f3c6
+    f3c6+b4c6
+  - themes advancedPawn advantage endgame long master promotion rookEndgame moves
+    h3h2 g1g2 g3g2 a6a7 h2h1q a7b8q h3h2+g1g2 g1g2+g3g2 g3g2+a6a7 a6a7+h2h1q h2h1q+a7b8q
+  - themes crushing intermezzo master middlegame sacrifice veryLong moves a6c4 d6f6
+    f1f6 h6h1 g1f2 h8f6 f2e2 f6e7 a6c4+d6f6 d6f6+f1f6 f1f6+h6h1 h6h1+g1f2 g1f2+h8f6
+    h8f6+f2e2 f2e2+f6e7
+- source_sentence: advantage hangingPiece middlegame short Nimzo-Larsen Attack Nimzo-Larsen
+    Attack Modern [UNK]
   sentences:
+  - themes hangingPiece mate mateIn1 middlegame oneMove opening Trompowsky Attack
+    Trompowsky Attack Classical Defense moves f4g4 d8d1 f4g4+d8d1
+  - themes advancedPawn crushing defensiveMove endgame master quietMove veryLong moves
+    f1e1 h3h2 f8h8 f5h4 h8e5 g3g2 e5e4 h4f3 f1e1+h3h2 h3h2+f8h8 f8h8+f5h4 f5h4+h8e5
+    h8e5+g3g2 g3g2+e5e4 e5e4+h4f3
+  - themes advantage hangingPiece middlegame short opening Nimzo-Larsen Attack Nimzo-Larsen
+    Attack Modern Variation moves f5d7 b5g5 e3e2 d1d2 f5d7+b5g5 b5g5+e3e2 e3e2+d1d2
+- source_sentence: '[UNK] defensiveMove [UNK] [UNK] veryLong'
   sentences:
+  - themes advantage discoveredAttack exposedKing middlegame trappedPiece veryLong
+    opening French Defense French Defense Orthoschnapp Gambit moves e2d1 c4e3 d2e3
+    b5f1 d1d2 f1g2 g1e2 g2h1 e2d1+c4e3 c4e3+d2e3 d2e3+b5f1 b5f1+d1d2 d1d2+f1g2 f1g2+g1e2
+    g1e2+g2h1
+  - themes crushing defensiveMove enPassant middlegame veryLong moves g2e2 a3f3 f7f5
+    e5f6 c4f4 g3f4 e2g2 f3g3 g2e2+a3f3 a3f3+f7f5 f7f5+e5f6 e5f6+c4f4 c4f4+g3f4 g3f4+e2g2
+    e2g2+f3g3
+  - themes advancedPawn bishopEndgame crushing defensiveMove endgame veryLong moves
+    f3e4 a3a2 g6g7 e6f7 e5e6 f7g8 e6e7 c5e7 f3e4+a3a2 a3a2+g6g7 g6g7+e6f7 e6f7+e5e6
+    e5e6+f7g8 f7g8+e6e7 e6e7+c5e7
+- source_sentence: '[UNK] deflection discoveredAttack [UNK] queensideAttack short
+    Philidor Defense [UNK] Defense Other variations'
   sentences:
+  - themes crushing middlegame pin queensideAttack short opening Sicilian Defense
+    Sicilian Defense Najdorf Variation moves c3d5 c5b3 c1b1 b3d2 c3d5+c5b3 c5b3+c1b1
+    c1b1+b3d2
+  - themes crushing deflection discoveredAttack middlegame queensideAttack short opening
+    Philidor Defense Philidor Defense Other variations moves d3c3 d4b3 c1b1 d7d1 d3c3+d4b3
+    d4b3+c1b1 c1b1+d7d1
+  - themes advantage discoveredAttack middlegame short opening Philidor Defense Philidor
+    Defense Other variations moves e4d4 d3f5 c8b8 d1d4 e4d4+d3f5 d3f5+c8b8 c8b8+d1d4
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 metrics:
       type: chess-ir
     metrics:
     - type: cosine_accuracy@1
+      value: 0.06
       name: Cosine Accuracy@1
     - type: cosine_accuracy@10
+      value: 0.255
       name: Cosine Accuracy@10
     - type: cosine_precision@1
+      value: 0.06
       name: Cosine Precision@1
     - type: cosine_precision@10
+      value: 0.032
       name: Cosine Precision@10
     - type: cosine_recall@1
+      value: 0.02
       name: Cosine Recall@1
     - type: cosine_recall@10
+      value: 0.10666666666666665
       name: Cosine Recall@10
     - type: cosine_ndcg@10
+      value: 0.07998649265394674
       name: Cosine Ndcg@10
     - type: cosine_mrr@10
+      value: 0.11224206349206348
       name: Cosine Mrr@10
     - type: cosine_map@100
+      value: 0.06593273410392075
       name: Cosine Map@100
   - task:
       type: information-retrieval
       type: chess-ir-tokens
     metrics:
     - type: cosine_accuracy@1
+      value: 0.12698412698412698
       name: Cosine Accuracy@1
     - type: cosine_accuracy@10
+      value: 0.3544973544973545
       name: Cosine Accuracy@10
     - type: cosine_precision@1
+      value: 0.12698412698412698
       name: Cosine Precision@1
     - type: cosine_precision@10
+      value: 0.10476190476190476
       name: Cosine Precision@10
     - type: cosine_recall@1
+      value: 0.0066613186633905
       name: Cosine Recall@1
     - type: cosine_recall@10
+      value: 0.0462228099305809
       name: Cosine Recall@10
     - type: cosine_ndcg@10
+      value: 0.11807198905104373
       name: Cosine Ndcg@10
     - type: cosine_mrr@10
+      value: 0.18598303518938442
       name: Cosine Mrr@10
     - type: cosine_map@100
+      value: 0.06497812950052975
       name: Cosine Map@100
 ---
 model = SentenceTransformer("oneryalcin/static-embedding-chess")
 # Run inference
 queries = [
+    '[UNK] deflection discoveredAttack [UNK] queensideAttack short Philidor Defense [UNK] Defense Other variations',
 ]
 documents = [
+    'themes crushing deflection discoveredAttack middlegame queensideAttack short opening Philidor Defense Philidor Defense Other variations moves d3c3 d4b3 c1b1 d7d1 d3c3+d4b3 d4b3+c1b1 c1b1+d7d1',
+    'themes advantage discoveredAttack middlegame short opening Philidor Defense Philidor Defense Other variations moves e4d4 d3f5 c8b8 d1d4 e4d4+d3f5 d3f5+c8b8 c8b8+d1d4',
+    'themes crushing middlegame pin queensideAttack short opening Sicilian Defense Sicilian Defense Najdorf Variation moves c3d5 c5b3 c1b1 b3d2 c3d5+c5b3 c5b3+c1b1 c1b1+b3d2',
 ]
 query_embeddings = model.encode_query(queries)
 document_embeddings = model.encode_document(documents)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(query_embeddings, document_embeddings)
 print(similarities)
+# tensor([[0.6231, 0.4530, 0.1689]])
 ```
 <!--
 ### Direct Usage (Transformers)
 * Datasets: `chess-ir` and `chess-ir-tokens`
 * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.sentence_transformer.evaluation.InformationRetrievalEvaluator)
+| Metric              | chess-ir | chess-ir-tokens |
+|:--------------------|:---------|:----------------|
+| cosine_accuracy@1   | 0.06     | 0.127           |
+| cosine_accuracy@10  | 0.255    | 0.3545          |
+| cosine_precision@1  | 0.06     | 0.127           |
+| cosine_precision@10 | 0.032    | 0.1048          |
+| cosine_recall@1     | 0.02     | 0.0067          |
+| cosine_recall@10    | 0.1067   | 0.0462          |
+| **cosine_ndcg@10**  | **0.08** | **0.1181**      |
+| cosine_mrr@10       | 0.1122   | 0.186           |
+| cosine_map@100      | 0.0659   | 0.065           |
 <!--
 ## Bias, Risks and Limitations
 #### Unnamed Dataset
+* Size: 1,619,946 training samples
 * Columns: <code>anchor</code> and <code>positive</code>
 * Approximate statistics based on the first 100 samples:
   |          | anchor                                                                                          | positive                                                                                         |
   |:---------|:------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|
   | type     | string                                                                                          | string                                                                                           |
   | modality | text                                                                                            | text                                                                                             |
+  | details  | <ul><li>min: 21 characters</li><li>mean: 75.57 characters</li><li>max: 122 characters</li></ul> | <ul><li>min: 86 characters</li><li>mean: 158.13 characters</li><li>max: 256 characters</li></ul> |
 * Samples:
+  | anchor                                                                                                               | positive                                                                                                                                                                               |
+  |:---------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>kingsideAttack mate mateIn1 middlegame oneMove Horwitz Defense Horwitz Defense [UNK] variations</code>         | <code>themes kingsideAttack mate mateIn1 middlegame oneMove opening Horwitz Defense Horwitz Defense Other variations moves f7h8 g6g2 f7h8+g6g2</code>                                  |
+  | <code>backRankMate endgame mate mateIn2 short Kings Knight Opening Kings Knight Opening [UNK] [UNK]</code>           | <code>themes backRankMate endgame mate mateIn2 short opening Kings Knight Opening Kings Knight Opening Other variations moves c5d4 c3c8 g5d8 c8d8 c5d4+c3c8 c3c8+g5d8 g5d8+c8d8</code> |
+  | <code>kingsideAttack mate mateIn1 middlegame oneMove Sicilian Defense Sicilian Defense Paulsen-Basman Defense</code> | <code>themes kingsideAttack mate mateIn1 middlegame oneMove opening Sicilian Defense Sicilian Defense Paulsen-Basman Defense moves g3f3 c7h2 g3f3+c7h2</code>                          |
 * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
   ```json
   {
 ### Training Hyperparameters
 #### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 4096
+- `num_train_epochs`: 20
+- `learning_rate`: 0.01
 - `warmup_steps`: 0.1
 - `weight_decay`: 0.01
+- `per_device_eval_batch_size`: 4096
 - `push_to_hub`: True
 - `hub_model_id`: oneryalcin/static-embedding-chess
 - `load_best_model_at_end`: True
 #### All Hyperparameters
 <details><summary>Click to expand</summary>
+- `per_device_train_batch_size`: 4096
+- `num_train_epochs`: 20
 - `max_steps`: -1
+- `learning_rate`: 0.01
 - `lr_scheduler_type`: linear
 - `lr_scheduler_kwargs`: None
 - `warmup_steps`: 0.1
 - `trackio_space_id`: None
 - `trackio_bucket_id`: None
 - `trackio_static_space_id`: None
+- `per_device_eval_batch_size`: 4096
 - `prediction_loss_only`: True
 - `eval_on_start`: False
 - `eval_do_concat_batches`: True
 ### Training Logs
 | Epoch  | Step | Training Loss | chess-ir_cosine_ndcg@10 | chess-ir-tokens_cosine_ndcg@10 |
 |:------:|:----:|:-------------:|:-----------------------:|:------------------------------:|
+| -1     | -1   | -             | 0.0123                  | 0.0561                         |
+| 0.0025 | 1    | 27.3123       | -                       | -                              |
+| 0.2020 | 80   | 26.3304       | -                       | -                              |
+| 0.4040 | 160  | 22.2114       | -                       | -                              |
+| 0.6061 | 240  | 17.4522       | -                       | -                              |
+| 0.8081 | 320  | 12.8864       | -                       | -                              |
+| 1.0    | 396  | -             | 0.0800                  | 0.1181                         |
 ### Training Time
+- **Training**: 57.6 seconds
 - **Evaluation**: 0.1 seconds
+- **Total**: 57.7 seconds
 ### Framework Versions
 - Python: 3.12.10

eval/Information-Retrieval_evaluation_chess-ir-tokens_results.csv CHANGED Viewed

@@ -1,4 +1,2 @@
 epoch,steps,cosine-Accuracy@1,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
-0.10007022471910113,285,0.1111111111111111,0.30158730158730157,0.1111111111111111,0.008191309640952804,0.0835978835978836,0.03797928598263959,0.16048962794994542,0.0963937043281825,0.05480807151213741
-0.20014044943820225,570,0.05291005291005291,0.21164021164021163,0.05291005291005291,0.0032049522325313766,0.056613756613756616,0.023108435943979263,0.09312379272696733,0.062386658509055025,0.0369514194632888
-0.30021067415730335,855,0.037037037037037035,0.21164021164021163,0.037037037037037035,0.0025144161912381744,0.047619047619047616,0.02212990521949281,0.08710842361636012,0.0517090496324674,0.028156284478181654


1	epoch,steps,cosine-Accuracy@1,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2	+ 1.0,396,0.12698412698412698,0.3544973544973545,0.12698412698412698,0.0066613186633905,0.10476190476190476,0.0462228099305809,0.18598303518938442,0.11807198905104373,0.06497812950052975

eval/Information-Retrieval_evaluation_chess-ir_results.csv CHANGED Viewed

@@ -1,4 +1,2 @@
 epoch,steps,cosine-Accuracy@1,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
-0.10007022471910113,285,0.02,0.135,0.02,0.006666666666666666,0.0175,0.05833333333333333,0.05090277777777777,0.040260232965004236,0.03468285594907049
-0.20014044943820225,570,0.01,0.06,0.01,0.003333333333333333,0.006999999999999999,0.02333333333333333,0.021797619047619052,0.0165414546823231,0.01826039464782554
-0.30021067415730335,855,0.01,0.055,0.01,0.003333333333333333,0.006,0.019999999999999997,0.02086111111111111,0.014141653573050736,0.012561680163147302


1	epoch,steps,cosine-Accuracy@1,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2	+ 1.0,396,0.06,0.255,0.06,0.02,0.032,0.10666666666666665,0.11224206349206348,0.07998649265394674,0.06593273410392075

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a585943d76bec5e52fc185f5414dba80093b3693261e4921542d31ea01c10fb8
 size 8880224

 version https://git-lfs.github.com/spec/v1
+oid sha256:0946dae682df6739a9cd9ab6a2c4699a9557dcff45cc062b465309d6d403b2e3
 size 8880224

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1f79a123f09dc75fd3488fe5caef388a8c542815dabe7ec16811867955b17a2
 size 5713

 version https://git-lfs.github.com/spec/v1
+oid sha256:426fc88cc7388ad3485f0a0e98b7edcbc0f7e7ad469707d5448cc9275c652053
 size 5713