TakoData
/

chart-reranker

@@ -4,7 +4,7 @@ tags:
 - cross-encoder
 - reranker
 - generated_from_trainer
-- dataset_size:12349
 - loss:BinaryCrossEntropyLoss
 base_model: Alibaba-NLP/gte-multilingual-reranker-base
 pipeline_tag: text-ranking
@@ -23,10 +23,10 @@ model-index:
       type: validation
     metrics:
     - type: pearson
-      value: 0.8643473065020739
       name: Pearson
     - type: spearman
-      value: 0.8620968090164374
       name: Spearman
 ---
@@ -70,11 +70,11 @@ from sentence_transformers import CrossEncoder
 model = CrossEncoder("cross_encoder_model_id")
 # Get scores for pairs of texts
 pairs = [
-    ['DJ mixers compatible with Apple Music 2025', 'Title: "Music devices - radio (United States)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov'],
-    ['current USD to PLN exchange rate', 'Title: "Conversion rate from PLN to USD"\nCollections: Foreign Exchange Rates\nDatasets: Forex\nChart Type: exchange:currency\nSources: Xignite'],
-    ['Aktuelle Investmenttrends 2025', 'Title: "Financial activity - next 12 months (United States)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov'],
-    ["What are Amazon's accrued liabilities?", 'Title: "Amazon Expenses Accrued (Quarterly)"\nCollections: Companies\nDatasets: StandardIncomeStatement\nChart Type: timeseries:eav_v2\nCanonical forms: "Expenses Accrued"="accrued_expenses_total"\nSources: S&P Global'],
-    ["Costco's long-term lease obligations", 'Title: "Air Lease Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Air Lease"="Air Lease Corporation", "Overview"="Stock Overview"\nSources: S&P Global'],
 ]
 scores = model.predict(pairs)
 print(scores.shape)
@@ -82,13 +82,13 @@ print(scores.shape)
 # Or rank different texts based on similarity to a single text
 ranks = model.rank(
-    'DJ mixers compatible with Apple Music 2025',
     [
-        'Title: "Music devices - radio (United States)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov',
-        'Title: "Conversion rate from PLN to USD"\nCollections: Foreign Exchange Rates\nDatasets: Forex\nChart Type: exchange:currency\nSources: Xignite',
-        'Title: "Financial activity - next 12 months (United States)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov',
-        'Title: "Amazon Expenses Accrued (Quarterly)"\nCollections: Companies\nDatasets: StandardIncomeStatement\nChart Type: timeseries:eav_v2\nCanonical forms: "Expenses Accrued"="accrued_expenses_total"\nSources: S&P Global',
-        'Title: "Air Lease Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Air Lease"="Air Lease Corporation", "Overview"="Stock Overview"\nSources: S&P Global',
     ]
 )
 # [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
@@ -129,8 +129,8 @@ You can finetune this model on your own dataset.
 | Metric       | Value      |
 |:-------------|:-----------|
-| pearson      | 0.8643     |
-| **spearman** | **0.8621** |
 <!--
 ## Bias, Risks and Limitations
@@ -150,19 +150,19 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 12,349 training samples
 * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | sentence_0                                                                                     | sentence_1                                                                                      | label                                                          |
-  |:--------|:-----------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:---------------------------------------------------------------|
-  | type    | string                                                                                         | string                                                                                          | float                                                          |
-  | details | <ul><li>min: 5 characters</li><li>mean: 46.81 characters</li><li>max: 123 characters</li></ul> | <ul><li>min: 77 characters</li><li>mean: 182.4 characters</li><li>max: 495 characters</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.48</li><li>max: 1.0</li></ul> |
 * Samples:
-  | sentence_0                                              | sentence_1                                                                                                                                                                                      | label             |
-  |:--------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------|
-  | <code>DJ mixers compatible with Apple Music 2025</code> | <code>Title: "Music devices - radio (United States)"<br>Collections: YouGov Trackers<br>Datasets: YouGovTrackerValueV2<br>Chart Type: survey:timeseries<br>Sources: YouGov</code>               | <code>0.25</code> |
-  | <code>current USD to PLN exchange rate</code>           | <code>Title: "Conversion rate from PLN to USD"<br>Collections: Foreign Exchange Rates<br>Datasets: Forex<br>Chart Type: exchange:currency<br>Sources: Xignite</code>                            | <code>0.75</code> |
-  | <code>Aktuelle Investmenttrends 2025</code>             | <code>Title: "Financial activity - next 12 months (United States)"<br>Collections: YouGov Trackers<br>Datasets: YouGovTrackerValueV2<br>Chart Type: survey:timeseries<br>Sources: YouGov</code> | <code>0.75</code> |
 * Loss: [<code>BinaryCrossEntropyLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#binarycrossentropyloss) with these parameters:
   ```json
   {
@@ -308,29 +308,40 @@ You can finetune this model on your own dataset.
 ### Training Logs
 | Epoch  | Step | Training Loss | validation_spearman |
 |:------:|:----:|:-------------:|:-------------------:|
-| 0.2591 | 100  | -             | 0.7835              |
-| 0.5181 | 200  | -             | 0.8161              |
-| 0.7772 | 300  | -             | 0.8369              |
-| 1.0    | 386  | -             | 0.8392              |
-| 1.0363 | 400  | -             | 0.8442              |
-| 1.2953 | 500  | 0.47          | 0.8475              |
-| 1.5544 | 600  | -             | 0.8533              |
-| 1.8135 | 700  | -             | 0.8544              |
-| 2.0    | 772  | -             | 0.8579              |
-| 2.0725 | 800  | -             | 0.8585              |
-| 2.3316 | 900  | -             | 0.8548              |
-| 2.5907 | 1000 | 0.3926        | 0.8577              |
-| 2.8497 | 1100 | -             | 0.8569              |
-| 3.0    | 1158 | -             | 0.8607              |
-| 3.1088 | 1200 | -             | 0.8573              |
-| 3.3679 | 1300 | -             | 0.8614              |
-| 3.6269 | 1400 | -             | 0.8594              |
-| 3.8860 | 1500 | 0.3602        | 0.8591              |
-| 4.0    | 1544 | -             | 0.8596              |
-| 4.1451 | 1600 | -             | 0.8611              |
-| 4.4041 | 1700 | -             | 0.8619              |
-| 4.6632 | 1800 | -             | 0.8618              |
-| 4.9223 | 1900 | -             | 0.8621              |
 ### Framework Versions

 - cross-encoder
 - reranker
 - generated_from_trainer
+- dataset_size:20347
 - loss:BinaryCrossEntropyLoss
 base_model: Alibaba-NLP/gte-multilingual-reranker-base
 pipeline_tag: text-ranking
       type: validation
     metrics:
     - type: pearson
+      value: 0.8381245620713855
       name: Pearson
     - type: spearman
+      value: 0.8388188648567115
       name: Spearman
 ---
 model = CrossEncoder("cross_encoder_model_id")
 # Get scores for pairs of texts
 pairs = [
+    ['Thanks, now you have everything pick the most important item  or 2 or three if you find it really appropriate from each group. Just simplify  this list a bit, to make sure I have my micro nutrients, vitamins, whatever checked off.', 'Title: "Natural Grocers by Vitamin Cottage Overview"\nCollections: Companies\nDatasets: InstrumentClosePrice1Day\nChart Type: timeseries:eav_v3\nCanonical forms: "Natural Grocers by Vitamin Cottage"="closing_price"'],
+    ['How do people feel about Nicola Sturgeon?', 'Title: "Nicola Sturgeon fame & popularity tracker (United Kingdom)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov'],
+    ['Create a skit about hino. It should be a horror theme and humor in the end. Without the need of driving a truck. it can be about hino genuine spareparts or technician services', 'Title: "Hino Motors Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Hino Motors"="Hino Motors, Ltd.", "Overview"="Stock Overview"\nSources: S&P Global'],
+    ['no i mean talk about the trends in school', 'Title: "Should private schools be banned? (United Kingdom)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov'],
+    ['Exchange rate Moroccan dirham to euro 29 October 2025', 'Title: "Conversion rate from EUR to MAD"\nCollections: Foreign Exchange Rates\nDatasets: Forex\nChart Type: exchange:currency\nSources: Xignite'],
 ]
 scores = model.predict(pairs)
 print(scores.shape)
 # Or rank different texts based on similarity to a single text
 ranks = model.rank(
+    'Thanks, now you have everything pick the most important item  or 2 or three if you find it really appropriate from each group. Just simplify  this list a bit, to make sure I have my micro nutrients, vitamins, whatever checked off.',
     [
+        'Title: "Natural Grocers by Vitamin Cottage Overview"\nCollections: Companies\nDatasets: InstrumentClosePrice1Day\nChart Type: timeseries:eav_v3\nCanonical forms: "Natural Grocers by Vitamin Cottage"="closing_price"',
+        'Title: "Nicola Sturgeon fame & popularity tracker (United Kingdom)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov',
+        'Title: "Hino Motors Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Hino Motors"="Hino Motors, Ltd.", "Overview"="Stock Overview"\nSources: S&P Global',
+        'Title: "Should private schools be banned? (United Kingdom)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov',
+        'Title: "Conversion rate from EUR to MAD"\nCollections: Foreign Exchange Rates\nDatasets: Forex\nChart Type: exchange:currency\nSources: Xignite',
     ]
 )
 # [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
 | Metric       | Value      |
 |:-------------|:-----------|
+| pearson      | 0.8381     |
+| **spearman** | **0.8388** |
 <!--
 ## Bias, Risks and Limitations
 #### Unnamed Dataset
+* Size: 20,347 training samples
 * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                                     | sentence_1                                                                                       | label                                                          |
+  |:--------|:-----------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------|
+  | type    | string                                                                                         | string                                                                                           | float                                                          |
+  | details | <ul><li>min: 1 characters</li><li>mean: 84.39 characters</li><li>max: 943 characters</li></ul> | <ul><li>min: 74 characters</li><li>mean: 180.44 characters</li><li>max: 396 characters</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.45</li><li>max: 1.0</li></ul> |
 * Samples:
+  | sentence_0                                                                                                                                                                                                                                          | sentence_1                                                                                                                                                                                                                                  | label            |
+  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
+  | <code>Thanks, now you have everything pick the most important item  or 2 or three if you find it really appropriate from each group. Just simplify  this list a bit, to make sure I have my micro nutrients, vitamins, whatever checked off.</code> | <code>Title: "Natural Grocers by Vitamin Cottage Overview"<br>Collections: Companies<br>Datasets: InstrumentClosePrice1Day<br>Chart Type: timeseries:eav_v3<br>Canonical forms: "Natural Grocers by Vitamin Cottage"="closing_price"</code> | <code>0.0</code> |
+  | <code>How do people feel about Nicola Sturgeon?</code>                                                                                                                                                                                              | <code>Title: "Nicola Sturgeon fame & popularity tracker (United Kingdom)"<br>Collections: YouGov Trackers<br>Datasets: YouGovTrackerValueV2<br>Chart Type: survey:timeseries<br>Sources: YouGov</code>                                      | <code>1.0</code> |
+  | <code>Create a skit about hino. It should be a horror theme and humor in the end. Without the need of driving a truck. it can be about hino genuine spareparts or technician services</code>                                                        | <code>Title: "Hino Motors Overview"<br>Collections: Companies<br>Chart Type: company:finance<br>Canonical forms: "Hino Motors"="Hino Motors, Ltd.", "Overview"="Stock Overview"<br>Sources: S&P Global</code>                               | <code>0.5</code> |
 * Loss: [<code>BinaryCrossEntropyLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#binarycrossentropyloss) with these parameters:
   ```json
   {
 ### Training Logs
 | Epoch  | Step | Training Loss | validation_spearman |
 |:------:|:----:|:-------------:|:-------------------:|
+| 0.1572 | 100  | -             | 0.7137              |
+| 0.3145 | 200  | -             | 0.7573              |
+| 0.4717 | 300  | -             | 0.7748              |
+| 0.6289 | 400  | -             | 0.7888              |
+| 0.7862 | 500  | 0.5153        | 0.8000              |
+| 0.9434 | 600  | -             | 0.8039              |
+| 1.0    | 636  | -             | 0.8044              |
+| 1.1006 | 700  | -             | 0.8065              |
+| 1.2579 | 800  | -             | 0.8167              |
+| 1.4151 | 900  | -             | 0.8164              |
+| 1.5723 | 1000 | 0.445         | 0.8192              |
+| 1.7296 | 1100 | -             | 0.8225              |
+| 1.8868 | 1200 | -             | 0.8287              |
+| 2.0    | 1272 | -             | 0.8284              |
+| 2.0440 | 1300 | -             | 0.8281              |
+| 2.2013 | 1400 | -             | 0.8255              |
+| 2.3585 | 1500 | 0.4102        | 0.8276              |
+| 2.5157 | 1600 | -             | 0.8305              |
+| 2.6730 | 1700 | -             | 0.8343              |
+| 2.8302 | 1800 | -             | 0.8301              |
+| 2.9874 | 1900 | -             | 0.8351              |
+| 3.0    | 1908 | -             | 0.8355              |
+| 3.1447 | 2000 | 0.3904        | 0.8336              |
+| 3.3019 | 2100 | -             | 0.8319              |
+| 3.4591 | 2200 | -             | 0.8319              |
+| 3.6164 | 2300 | -             | 0.8308              |
+| 3.7736 | 2400 | -             | 0.8331              |
+| 3.9308 | 2500 | 0.3741        | 0.8370              |
+| 4.0    | 2544 | -             | 0.8383              |
+| 4.0881 | 2600 | -             | 0.8369              |
+| 4.2453 | 2700 | -             | 0.8385              |
+| 4.4025 | 2800 | -             | 0.8368              |
+| 4.5597 | 2900 | -             | 0.8370              |
+| 4.7170 | 3000 | 0.3643        | 0.8388              |
 ### Framework Versions

eval/CrossEncoderCorrelationEvaluator_validation_results.csv CHANGED Viewed

@@ -1,6 +1,6 @@
 epoch,steps,Pearson_Correlation,Spearman_Correlation
-1.0,386,0.837652001216976,0.8392216545657446
-2.0,772,0.8587273733508869,0.8579410042452386
-3.0,1158,0.8640762414247022,0.8607296855959887
-4.0,1544,0.8618951242992149,0.8596087038742567
-5.0,1930,0.8643015819365643,0.8620805071202834

 epoch,steps,Pearson_Correlation,Spearman_Correlation
+1.0,636,0.8050961988795169,0.8044347672638916
+2.0,1272,0.8267567950795853,0.8284146931811501
+3.0,1908,0.8351882809975475,0.8355004054548
+4.0,2544,0.8381740944766652,0.8382614031363851
+5.0,3180,0.8368434817201468,0.8374989674723212

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6485ea4ee6cc3859ccff14222884fed3c2f23e283906df217bd6bd60d8377bff
 size 1223854204

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6dde1675c82135fb9296d9c990693ce3373c5982f7f01cd53a72fb674e86d82
 size 1223854204

training_info.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 Base Model: Alibaba-NLP/gte-multilingual-reranker-base
-Training Samples: 12349
 Epochs: 5
 Batch Size: 32
 Learning Rate: 2e-05

 Base Model: Alibaba-NLP/gte-multilingual-reranker-base
+Training Samples: 20347
 Epochs: 5
 Batch Size: 32
 Learning Rate: 2e-05