TakoData
/

chart-reranker

@@ -4,7 +4,7 @@ tags:
 - cross-encoder
 - reranker
 - generated_from_trainer
-- dataset_size:28274
 - loss:BinaryCrossEntropyLoss
 base_model: Alibaba-NLP/gte-multilingual-reranker-base
 pipeline_tag: text-ranking
@@ -23,10 +23,10 @@ model-index:
       type: validation
     metrics:
     - type: pearson
-      value: 0.871938379575355
       name: Pearson
     - type: spearman
-      value: 0.8696556409896702
       name: Spearman
 ---
@@ -70,11 +70,11 @@ from sentence_transformers import CrossEncoder
 model = CrossEncoder("cross_encoder_model_id")
 # Get scores for pairs of texts
 pairs = [
-    ['Which securities listed on ENXTAM were the top performers (highest returns) in the period shown?', 'Title: "Pedro\'s List Financials"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Financials"="Financials Overview", "Pedro\'s List"="Pedro\'s List, Inc."\nSources: S&P Global'],
-    ['Best crypto platforms compliant with MiCA 2025, reliable, secure, and transparent.', 'Title: "Top Performing Crypto"\nCollections: Crypto Currencies\nDatasets: CryptoAssetMetrics\nChart Type: categorical_bar'],
-    ['implications fiscales et sociales formes juridiques entreprises France 2025 apports industrie', 'Title: "Eagle Industries Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Eagle Industries"="Eagle Industry Co.,Ltd.", "Overview"="Stock Overview"\nSources: S&P Global'],
-    ['Which US-listed stocks on the NYSE and NASDAQ are the top performers (highest returns)?', 'Title: "Nasdaq Inc. Financials"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Nasdaq Inc."="Nasdaq, Inc.", "Financials"="Financials Overview"\nSources: S&P Global'],
-    ['How many children are recorded with wheezing, and has that number gone up or down over time?', 'Title: "Mani Number (Annual), Universal Music Number (Quarterly)"\nCollections: Companies\nDatasets: StandardIncomeStatement\nChart Type: timeseries:eav_v3\nCanonical forms: "Number"="inventory"'],
 ]
 scores = model.predict(pairs)
 print(scores.shape)
@@ -82,13 +82,13 @@ print(scores.shape)
 # Or rank different texts based on similarity to a single text
 ranks = model.rank(
-    'Which securities listed on ENXTAM were the top performers (highest returns) in the period shown?',
     [
-        'Title: "Pedro\'s List Financials"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Financials"="Financials Overview", "Pedro\'s List"="Pedro\'s List, Inc."\nSources: S&P Global',
-        'Title: "Top Performing Crypto"\nCollections: Crypto Currencies\nDatasets: CryptoAssetMetrics\nChart Type: categorical_bar',
-        'Title: "Eagle Industries Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Eagle Industries"="Eagle Industry Co.,Ltd.", "Overview"="Stock Overview"\nSources: S&P Global',
-        'Title: "Nasdaq Inc. Financials"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Nasdaq Inc."="Nasdaq, Inc.", "Financials"="Financials Overview"\nSources: S&P Global',
-        'Title: "Mani Number (Annual), Universal Music Number (Quarterly)"\nCollections: Companies\nDatasets: StandardIncomeStatement\nChart Type: timeseries:eav_v3\nCanonical forms: "Number"="inventory"',
     ]
 )
 # [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
@@ -129,8 +129,8 @@ You can finetune this model on your own dataset.
 | Metric       | Value      |
 |:-------------|:-----------|
-| pearson      | 0.8719     |
-| **spearman** | **0.8697** |
 <!--
 ## Bias, Risks and Limitations
@@ -150,19 +150,19 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 28,274 training samples
 * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
 * Approximate statistics based on the first 1000 samples:
   |         | sentence_0                                                                                     | sentence_1                                                                                       | label                                                          |
   |:--------|:-----------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------|
   | type    | string                                                                                         | string                                                                                           | float                                                          |
-  | details | <ul><li>min: 3 characters</li><li>mean: 82.52 characters</li><li>max: 939 characters</li></ul> | <ul><li>min: 75 characters</li><li>mean: 171.07 characters</li><li>max: 436 characters</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.43</li><li>max: 1.0</li></ul> |
 * Samples:
-  | sentence_0                                                                                                    | sentence_1                                                                                                                                                                                                                    | label             |
-  |:--------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------|
-  | <code>Which securities listed on ENXTAM were the top performers (highest returns) in the period shown?</code> | <code>Title: "Pedro's List Financials"<br>Collections: Companies<br>Chart Type: company:finance<br>Canonical forms: "Financials"="Financials Overview", "Pedro's List"="Pedro's List, Inc."<br>Sources: S&P Global</code>     | <code>0.0</code>  |
-  | <code>Best crypto platforms compliant with MiCA 2025, reliable, secure, and transparent.</code>               | <code>Title: "Top Performing Crypto"<br>Collections: Crypto Currencies<br>Datasets: CryptoAssetMetrics<br>Chart Type: categorical_bar</code>                                                                                  | <code>0.25</code> |
-  | <code>implications fiscales et sociales formes juridiques entreprises France 2025 apports industrie</code>    | <code>Title: "Eagle Industries Overview"<br>Collections: Companies<br>Chart Type: company:finance<br>Canonical forms: "Eagle Industries"="Eagle Industry Co.,Ltd.", "Overview"="Stock Overview"<br>Sources: S&P Global</code> | <code>0.0</code>  |
 * Loss: [<code>BinaryCrossEntropyLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#binarycrossentropyloss) with these parameters:
   ```json
   {
@@ -308,47 +308,38 @@ You can finetune this model on your own dataset.
 ### Training Logs
 | Epoch  | Step | Training Loss | validation_spearman |
 |:------:|:----:|:-------------:|:-------------------:|
-| 0.1131 | 100  | -             | 0.7659              |
-| 0.2262 | 200  | -             | 0.7941              |
-| 0.3394 | 300  | -             | 0.8119              |
-| 0.4525 | 400  | -             | 0.8237              |
-| 0.5656 | 500  | 0.4773        | 0.8284              |
-| 0.6787 | 600  | -             | 0.8304              |
-| 0.7919 | 700  | -             | 0.8361              |
-| 0.9050 | 800  | -             | 0.8454              |
-| 1.0    | 884  | -             | 0.8382              |
-| 1.0181 | 900  | -             | 0.8438              |
-| 1.1312 | 1000 | 0.4184        | 0.8469              |
-| 1.2443 | 1100 | -             | 0.8458              |
-| 1.3575 | 1200 | -             | 0.8492              |
-| 1.4706 | 1300 | -             | 0.8514              |
-| 1.5837 | 1400 | -             | 0.8567              |
-| 1.6968 | 1500 | 0.3897        | 0.8582              |
-| 1.8100 | 1600 | -             | 0.8586              |
-| 1.9231 | 1700 | -             | 0.8582              |
-| 2.0    | 1768 | -             | 0.8587              |
-| 2.0362 | 1800 | -             | 0.8583              |
-| 2.1493 | 1900 | -             | 0.8597              |
-| 2.2624 | 2000 | 0.3709        | 0.8596              |
-| 2.3756 | 2100 | -             | 0.8608              |
-| 2.4887 | 2200 | -             | 0.8598              |
-| 2.6018 | 2300 | -             | 0.8623              |
-| 2.7149 | 2400 | -             | 0.8643              |
-| 2.8281 | 2500 | 0.3556        | 0.8661              |
-| 2.9412 | 2600 | -             | 0.8672              |
-| 3.0    | 2652 | -             | 0.8656              |
-| 3.0543 | 2700 | -             | 0.8668              |
-| 3.1674 | 2800 | -             | 0.8657              |
-| 3.2805 | 2900 | -             | 0.8654              |
-| 3.3937 | 3000 | 0.3435        | 0.8656              |
-| 3.5068 | 3100 | -             | 0.8665              |
-| 3.6199 | 3200 | -             | 0.8661              |
-| 3.7330 | 3300 | -             | 0.8660              |
-| 3.8462 | 3400 | -             | 0.8666              |
-| 3.9593 | 3500 | 0.3364        | 0.8679              |
-| 4.0    | 3536 | -             | 0.8674              |
-| 4.0724 | 3600 | -             | 0.8670              |
-| 4.1855 | 3700 | -             | 0.8697              |
 ### Framework Versions

 - cross-encoder
 - reranker
 - generated_from_trainer
+- dataset_size:24504
 - loss:BinaryCrossEntropyLoss
 base_model: Alibaba-NLP/gte-multilingual-reranker-base
 pipeline_tag: text-ranking
       type: validation
     metrics:
     - type: pearson
+      value: 0.8721120209782917
       name: Pearson
     - type: spearman
+      value: 0.8685098375943734
       name: Spearman
 ---
 model = CrossEncoder("cross_encoder_model_id")
 # Get scores for pairs of texts
 pairs = [
+    ['include the popular publications as well', 'Title: "Americans\' Library use - past 3 months (United States)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov'],
+    ['Give it a good research topic', 'Title: "The most important issues facing the country (United Kingdom)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov'],
+    ['When and where are the Denver Broncos playing the Kansas City Chiefs?', 'Title: "Denver Broncos at Kansas City Chiefs"\nCollections: Football\nChart Type: game_score:football'],
+    ['49ers vs Seahawks', 'Title: "Seahawk Deep Ocean Technology, Inc. Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Overview"="Stock Overview"\nSources: S&P Global'],
+    ['Comparative review of JBL vs Marshall 2025 Bluetooth speakers', 'Title: "B&C Speakers Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "B&C Speakers"="B&C Speakers S.p.A.", "Overview"="Stock Overview"\nSources: S&P Global'],
 ]
 scores = model.predict(pairs)
 print(scores.shape)
 # Or rank different texts based on similarity to a single text
 ranks = model.rank(
+    'include the popular publications as well',
     [
+        'Title: "Americans\' Library use - past 3 months (United States)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov',
+        'Title: "The most important issues facing the country (United Kingdom)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov',
+        'Title: "Denver Broncos at Kansas City Chiefs"\nCollections: Football\nChart Type: game_score:football',
+        'Title: "Seahawk Deep Ocean Technology, Inc. Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Overview"="Stock Overview"\nSources: S&P Global',
+        'Title: "B&C Speakers Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "B&C Speakers"="B&C Speakers S.p.A.", "Overview"="Stock Overview"\nSources: S&P Global',
     ]
 )
 # [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
 | Metric       | Value      |
 |:-------------|:-----------|
+| pearson      | 0.8721     |
+| **spearman** | **0.8685** |
 <!--
 ## Bias, Risks and Limitations
 #### Unnamed Dataset
+* Size: 24,504 training samples
 * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
 * Approximate statistics based on the first 1000 samples:
   |         | sentence_0                                                                                     | sentence_1                                                                                       | label                                                          |
   |:--------|:-----------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------|
   | type    | string                                                                                         | string                                                                                           | float                                                          |
+  | details | <ul><li>min: 2 characters</li><li>mean: 86.83 characters</li><li>max: 993 characters</li></ul> | <ul><li>min: 77 characters</li><li>mean: 169.16 characters</li><li>max: 360 characters</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.42</li><li>max: 1.0</li></ul> |
 * Samples:
+  | sentence_0                                                                         | sentence_1                                                                                                                                                                                                | label            |
+  |:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
+  | <code>include the popular publications as well</code>                              | <code>Title: "Americans' Library use - past 3 months (United States)"<br>Collections: YouGov Trackers<br>Datasets: YouGovTrackerValueV2<br>Chart Type: survey:timeseries<br>Sources: YouGov</code>        | <code>0.5</code> |
+  | <code>Give it a good research topic</code>                                         | <code>Title: "The most important issues facing the country (United Kingdom)"<br>Collections: YouGov Trackers<br>Datasets: YouGovTrackerValueV2<br>Chart Type: survey:timeseries<br>Sources: YouGov</code> | <code>1.0</code> |
+  | <code>When and where are the Denver Broncos playing the Kansas City Chiefs?</code> | <code>Title: "Denver Broncos at Kansas City Chiefs"<br>Collections: Football<br>Chart Type: game_score:football</code>                                                                                    | <code>1.0</code> |
 * Loss: [<code>BinaryCrossEntropyLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#binarycrossentropyloss) with these parameters:
   ```json
   {
 ### Training Logs
 | Epoch  | Step | Training Loss | validation_spearman |
 |:------:|:----:|:-------------:|:-------------------:|
+| 0.1305 | 100  | -             | 0.7594              |
+| 0.2611 | 200  | -             | 0.7951              |
+| 0.3916 | 300  | -             | 0.8050              |
+| 0.5222 | 400  | -             | 0.8200              |
+| 0.6527 | 500  | 0.468         | 0.8290              |
+| 0.7833 | 600  | -             | 0.8331              |
+| 0.9138 | 700  | -             | 0.8347              |
+| 1.0    | 766  | -             | 0.8434              |
+| 1.0444 | 800  | -             | 0.8432              |
+| 1.1749 | 900  | -             | 0.8467              |
+| 1.3055 | 1000 | 0.4135        | 0.8473              |
+| 1.4360 | 1100 | -             | 0.8475              |
+| 1.5666 | 1200 | -             | 0.8535              |
+| 1.6971 | 1300 | -             | 0.8518              |
+| 1.8277 | 1400 | -             | 0.8571              |
+| 1.9582 | 1500 | 0.3747        | 0.8577              |
+| 2.0    | 1532 | -             | 0.8556              |
+| 2.0888 | 1600 | -             | 0.8587              |
+| 2.2193 | 1700 | -             | 0.8609              |
+| 2.3499 | 1800 | -             | 0.8612              |
+| 2.4804 | 1900 | -             | 0.8619              |
+| 2.6110 | 2000 | 0.3515        | 0.8626              |
+| 2.7415 | 2100 | -             | 0.8622              |
+| 2.8721 | 2200 | -             | 0.8653              |
+| 3.0    | 2298 | -             | 0.8656              |
+| 3.0026 | 2300 | -             | 0.8656              |
+| 3.1332 | 2400 | -             | 0.8643              |
+| 3.2637 | 2500 | 0.3421        | 0.8646              |
+| 3.3943 | 2600 | -             | 0.8654              |
+| 3.5248 | 2700 | -             | 0.8666              |
+| 3.6554 | 2800 | -             | 0.8640              |
+| 3.7859 | 2900 | -             | 0.8685              |
 ### Framework Versions

eval/CrossEncoderCorrelationEvaluator_validation_results.csv CHANGED Viewed

@@ -1,6 +1,6 @@
 epoch,steps,Pearson_Correlation,Spearman_Correlation
-1.0,884,0.8401566336608757,0.8382300998214652
-2.0,1768,0.8615358688421247,0.8587285555844401
-3.0,2652,0.8681246217823901,0.8655791469764533
-4.0,3536,0.8692562599529016,0.8674157368971115
-5.0,4420,0.8707395217729429,0.8684691714077699

 epoch,steps,Pearson_Correlation,Spearman_Correlation
+1.0,766,0.8453028536443531,0.8434351098924865
+2.0,1532,0.8574271674817566,0.8556349102862147
+3.0,2298,0.8687755325286843,0.865637110569002
+4.0,3064,0.8698030506575616,0.8669249926545327
+5.0,3830,0.8701775404822807,0.8675087793394471

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:922f11cc3ffaaa5abc058dadce6044e126ff6b5b5c6595408217073cd2d86548
 size 1223854204

 version https://git-lfs.github.com/spec/v1
+oid sha256:06035d5c262912d8a1e0fd97e71fc51f0e84c66ed6a5f7e14862da0e88600252
 size 1223854204

training_info.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 Base Model: Alibaba-NLP/gte-multilingual-reranker-base
-Training Samples: 28274
 Epochs: 5
 Batch Size: 32
 Learning Rate: 2e-05

 Base Model: Alibaba-NLP/gte-multilingual-reranker-base
+Training Samples: 24504
 Epochs: 5
 Batch Size: 32
 Learning Rate: 2e-05