TakoData
/

chart-reranker

@@ -4,7 +4,7 @@ tags:
 - cross-encoder
 - reranker
 - generated_from_trainer
-- dataset_size:3999
 - loss:BinaryCrossEntropyLoss
 base_model: Alibaba-NLP/gte-reranker-modernbert-base
 pipeline_tag: text-ranking
@@ -23,10 +23,10 @@ model-index:
       type: validation
     metrics:
     - type: pearson
-      value: 0.8452666435840461
       name: Pearson
     - type: spearman
-      value: 0.8488108402924169
       name: Spearman
 ---
@@ -70,11 +70,11 @@ from sentence_transformers import CrossEncoder
 model = CrossEncoder("cross_encoder_model_id")
 # Get scores for pairs of texts
 pairs = [
-    ['Texas Abbott sues remove ringleader Democratic walkout redistricting article', 'Title: "The Republican Party favorability  (United States)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov'],
-    ["chatbot administration publique cas d'utilisation", 'Title: "EDH NETWORK, LLC Overview"\nCollections: Companies\nChart Type: company:private\nSources: S&P Global'],
-    ['What is the price of Vanguard S&P 500 ETF stock? Answer in as few words as possible.', 'Title: "LSEG Stock Price, Vanguard S&P 500 ETF Stock Price"\nCollections: Companies\nDatasets: InstrumentClosePrice1Day\nChart Type: timeseries:eav_v2\nCanonical forms: "Stock Price"="closing_price", "London Stock Exchange Group plc"="closing_price"'],
-    ['JP Morgan Goldman Sachs return on equity comparison 2015-2025', 'Title: "Goldman Sachs Equity (Quarterly)"\nCollections: Companies\nDatasets: StandardIncomeStatement\nChart Type: timeseries:eav_v2\nCanonical forms: "Equity"="total_equity"\nSources: S&P Global'],
-    ['Nvidia financial history', 'Title: "Nvidia Financials"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Nvidia"="NVIDIA Corporation", "Financials"="Financials Overview"\nSources: S&P Global'],
 ]
 scores = model.predict(pairs)
 print(scores.shape)
@@ -82,13 +82,13 @@ print(scores.shape)
 # Or rank different texts based on similarity to a single text
 ranks = model.rank(
-    'Texas Abbott sues remove ringleader Democratic walkout redistricting article',
     [
-        'Title: "The Republican Party favorability  (United States)"\nCollections: YouGov Trackers\nDatasets: YouGovTrackerValueV2\nChart Type: survey:timeseries\nSources: YouGov',
-        'Title: "EDH NETWORK, LLC Overview"\nCollections: Companies\nChart Type: company:private\nSources: S&P Global',
-        'Title: "LSEG Stock Price, Vanguard S&P 500 ETF Stock Price"\nCollections: Companies\nDatasets: InstrumentClosePrice1Day\nChart Type: timeseries:eav_v2\nCanonical forms: "Stock Price"="closing_price", "London Stock Exchange Group plc"="closing_price"',
-        'Title: "Goldman Sachs Equity (Quarterly)"\nCollections: Companies\nDatasets: StandardIncomeStatement\nChart Type: timeseries:eav_v2\nCanonical forms: "Equity"="total_equity"\nSources: S&P Global',
-        'Title: "Nvidia Financials"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Nvidia"="NVIDIA Corporation", "Financials"="Financials Overview"\nSources: S&P Global',
     ]
 )
 # [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
@@ -129,8 +129,8 @@ You can finetune this model on your own dataset.
 | Metric       | Value      |
 |:-------------|:-----------|
-| pearson      | 0.8453     |
-| **spearman** | **0.8488** |
 <!--
 ## Bias, Risks and Limitations
@@ -150,19 +150,19 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 3,999 training samples
 * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | sentence_0                                                                                    | sentence_1                                                                                       | label                                                          |
-  |:--------|:----------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------|
-  | type    | string                                                                                        | string                                                                                           | float                                                          |
-  | details | <ul><li>min: 3 characters</li><li>mean: 43.02 characters</li><li>max: 99 characters</li></ul> | <ul><li>min: 75 characters</li><li>mean: 181.52 characters</li><li>max: 411 characters</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.48</li><li>max: 1.0</li></ul> |
 * Samples:
-  | sentence_0                                                                                        | sentence_1                                                                                                                                                                                                                                                                     | label             |
-  |:--------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------|
-  | <code>Texas Abbott sues remove ringleader Democratic walkout redistricting article</code>         | <code>Title: "The Republican Party favorability  (United States)"<br>Collections: YouGov Trackers<br>Datasets: YouGovTrackerValueV2<br>Chart Type: survey:timeseries<br>Sources: YouGov</code>                                                                                 | <code>0.5</code>  |
-  | <code>chatbot administration publique cas d'utilisation</code>                                    | <code>Title: "EDH NETWORK, LLC Overview"<br>Collections: Companies<br>Chart Type: company:private<br>Sources: S&P Global</code>                                                                                                                                                | <code>0.0</code>  |
-  | <code>What is the price of Vanguard S&P 500 ETF stock? Answer in as few words as possible.</code> | <code>Title: "LSEG Stock Price, Vanguard S&P 500 ETF Stock Price"<br>Collections: Companies<br>Datasets: InstrumentClosePrice1Day<br>Chart Type: timeseries:eav_v2<br>Canonical forms: "Stock Price"="closing_price", "London Stock Exchange Group plc"="closing_price"</code> | <code>0.75</code> |
 * Loss: [<code>BinaryCrossEntropyLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#binarycrossentropyloss) with these parameters:
   ```json
   {
@@ -305,17 +305,24 @@ You can finetune this model on your own dataset.
 </details>
 ### Training Logs
-| Epoch | Step | Training Loss | validation_spearman |
-|:-----:|:----:|:-------------:|:-------------------:|
-| 0.8   | 100  | -             | 0.8206              |
-| 1.0   | 125  | -             | 0.8230              |
-| 1.6   | 200  | -             | 0.8370              |
-| 2.0   | 250  | -             | 0.8417              |
-| 2.4   | 300  | -             | 0.8363              |
-| 3.0   | 375  | -             | 0.8458              |
-| 3.2   | 400  | -             | 0.8477              |
-| 4.0   | 500  | 0.4023        | 0.8475              |
-| 4.8   | 600  | -             | 0.8488              |
 ### Framework Versions

 - cross-encoder
 - reranker
 - generated_from_trainer
+- dataset_size:7779
 - loss:BinaryCrossEntropyLoss
 base_model: Alibaba-NLP/gte-reranker-modernbert-base
 pipeline_tag: text-ranking
       type: validation
     metrics:
     - type: pearson
+      value: 0.8888985992978667
       name: Pearson
     - type: spearman
+      value: 0.8845425048973017
       name: Spearman
 ---
 model = CrossEncoder("cross_encoder_model_id")
 # Get scores for pairs of texts
 pairs = [
+    ['Cohere funding history: amounts raised by round', 'Title: "Cohere Overview"\nCollections: Companies\nChart Type: company:private\nSources: S&P Global'],
+    ['villes sympa à voir entre turin et come', 'Title: "Turin F.C. Schedule"\nCollections: Soccer\nChart Type: schedule:soccer_team_v2'],
+    ['Current housing inventory in Chattanooga, TN', 'Title: "Tusculum, TN Inventory - House"\nCollections: Residential Real Estate\nDatasets: RegionalRealEstateIndicators\nChart Type: timeseries:eav_v2\nCanonical forms: "Inventory"="inventory_seasonally_unadjusted"\nSources: Redfin'],
+    ["What's Tesla's raw material inventory?", 'Title: "Tesla Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Tesla"="Tesla, Inc.", "Overview"="Stock Overview"\nSources: S&P Global'],
+    ['current weather in hong kong', 'Title: "Hong Kong Weather"\nCollections: Weather Forecasts\nChart Type: weather:international_forecast\nSources: OpenWeather'],
 ]
 scores = model.predict(pairs)
 print(scores.shape)
 # Or rank different texts based on similarity to a single text
 ranks = model.rank(
+    'Cohere funding history: amounts raised by round',
     [
+        'Title: "Cohere Overview"\nCollections: Companies\nChart Type: company:private\nSources: S&P Global',
+        'Title: "Turin F.C. Schedule"\nCollections: Soccer\nChart Type: schedule:soccer_team_v2',
+        'Title: "Tusculum, TN Inventory - House"\nCollections: Residential Real Estate\nDatasets: RegionalRealEstateIndicators\nChart Type: timeseries:eav_v2\nCanonical forms: "Inventory"="inventory_seasonally_unadjusted"\nSources: Redfin',
+        'Title: "Tesla Overview"\nCollections: Companies\nChart Type: company:finance\nCanonical forms: "Tesla"="Tesla, Inc.", "Overview"="Stock Overview"\nSources: S&P Global',
+        'Title: "Hong Kong Weather"\nCollections: Weather Forecasts\nChart Type: weather:international_forecast\nSources: OpenWeather',
     ]
 )
 # [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
 | Metric       | Value      |
 |:-------------|:-----------|
+| pearson      | 0.8889     |
+| **spearman** | **0.8845** |
 <!--
 ## Bias, Risks and Limitations
 #### Unnamed Dataset
+* Size: 7,779 training samples
 * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                                     | sentence_1                                                                                       | label                                                          |
+  |:--------|:-----------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------|
+  | type    | string                                                                                         | string                                                                                           | float                                                          |
+  | details | <ul><li>min: 4 characters</li><li>mean: 44.22 characters</li><li>max: 116 characters</li></ul> | <ul><li>min: 75 characters</li><li>mean: 184.59 characters</li><li>max: 383 characters</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.45</li><li>max: 1.0</li></ul> |
 * Samples:
+  | sentence_0                                                   | sentence_1                                                                                                                                                                                                                                                   | label             |
+  |:-------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------|
+  | <code>Cohere funding history: amounts raised by round</code> | <code>Title: "Cohere Overview"<br>Collections: Companies<br>Chart Type: company:private<br>Sources: S&P Global</code>                                                                                                                                        | <code>0.75</code> |
+  | <code>villes sympa à voir entre turin et come</code>         | <code>Title: "Turin F.C. Schedule"<br>Collections: Soccer<br>Chart Type: schedule:soccer_team_v2</code>                                                                                                                                                      | <code>0.0</code>  |
+  | <code>Current housing inventory in Chattanooga, TN</code>    | <code>Title: "Tusculum, TN Inventory - House"<br>Collections: Residential Real Estate<br>Datasets: RegionalRealEstateIndicators<br>Chart Type: timeseries:eav_v2<br>Canonical forms: "Inventory"="inventory_seasonally_unadjusted"<br>Sources: Redfin</code> | <code>0.25</code> |
 * Loss: [<code>BinaryCrossEntropyLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#binarycrossentropyloss) with these parameters:
   ```json
   {
 </details>
 ### Training Logs
+| Epoch  | Step | Training Loss | validation_spearman |
+|:------:|:----:|:-------------:|:-------------------:|
+| 0.4098 | 100  | -             | 0.8203              |
+| 0.8197 | 200  | -             | 0.8565              |
+| 1.0    | 244  | -             | 0.8587              |
+| 1.2295 | 300  | -             | 0.8632              |
+| 1.6393 | 400  | -             | 0.8772              |
+| 2.0    | 488  | -             | 0.8714              |
+| 2.0492 | 500  | 0.4207        | 0.8776              |
+| 2.4590 | 600  | -             | 0.8786              |
+| 2.8689 | 700  | -             | 0.8761              |
+| 3.0    | 732  | -             | 0.8824              |
+| 3.2787 | 800  | -             | 0.8817              |
+| 3.6885 | 900  | -             | 0.8838              |
+| 4.0    | 976  | -             | 0.8835              |
+| 4.0984 | 1000 | 0.3261        | 0.8836              |
+| 4.5082 | 1100 | -             | 0.8843              |
+| 4.9180 | 1200 | -             | 0.8845              |
 ### Framework Versions

eval/CrossEncoderCorrelationEvaluator_validation_results.csv CHANGED Viewed

@@ -1,6 +1,6 @@
 epoch,steps,Pearson_Correlation,Spearman_Correlation
-1.0,125,0.8167008903095528,0.8229508199621789
-2.0,250,0.8399878282678421,0.8416534760112289
-3.0,375,0.8467421503612298,0.8458351888933171
-4.0,500,0.8467246954540373,0.8475455993951261
-5.0,625,0.8454725522086717,0.8483499396539944

 epoch,steps,Pearson_Correlation,Spearman_Correlation
+1.0,244,0.8620642924096914,0.8587166361363444
+2.0,488,0.8764832585164201,0.8713859435370955
+3.0,732,0.8867003524365638,0.8823857804088827
+4.0,976,0.8881431986959347,0.8835376105032559
+5.0,1220,0.8889602207955667,0.8845866499868097

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fc45b6c805952aa21239ccb30d4f8904a272dfc01e478a5373be9b9ec2b04ca
 size 598436708

 version https://git-lfs.github.com/spec/v1
+oid sha256:71ca08ed8176f01a71eaa842d8135564d04d405af3ad33d2ba4c1f91e581b05d
 size 598436708

training_info.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 Base Model: Alibaba-NLP/gte-reranker-modernbert-base
-Training Samples: 3999
 Epochs: 5
 Batch Size: 32
 Learning Rate: 2e-05

 Base Model: Alibaba-NLP/gte-reranker-modernbert-base
+Training Samples: 7779
 Epochs: 5
 Batch Size: 32
 Learning Rate: 2e-05