thebajajra commited on
Commit
aa0faed
·
verified ·
1 Parent(s): 7c35ae1

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +31 -268
  2. config.json +47 -23
  3. model.safetensors +2 -2
  4. reranker_config.json +1 -0
  5. training_metadata.json +8 -0
README.md CHANGED
@@ -1,290 +1,53 @@
1
- ---
2
- license: apache-2.0
3
- language:
4
- - en
5
- tags:
6
- - ecommerce
7
- - e-commerce
8
- - retail
9
- - marketplace
10
- - shopping
11
- - amazon
12
- - ebay
13
- - alibaba
14
- - google
15
- - rakuten
16
- - bestbuy
17
- - walmart
18
- - flipkart
19
- - wayfair
20
- - shein
21
- - target
22
- - etsy
23
- - shopify
24
- - taobao
25
- - asos
26
- - carrefour
27
- - costco
28
- - overstock
29
- - pretraining
30
- - encoder
31
- - language-modeling
32
- - foundation-model
33
- base_model:
34
- - thebajajra/RexBERT-micro
35
- pipeline_tag: text-ranking
36
- library_name: sentence-transformers
37
- ---
38
- <br><br>
39
 
40
- <p align="center">
41
- <img src="https://cdn-uploads.huggingface.co/production/uploads/6893dd21467f7d2f5f358a95/apOIbl5PdJuRk-tQMdDc8.png" alt="RexReranker">
42
- </p>
43
- <p align="center">
44
- </p>
45
 
46
- # RexReranker Micro
47
 
48
- A distributional **e-commerce** neural reranker based on RexBERT-micro that predicts relevance scores as a probability distribution, providing both accurate relevance predictions and uncertainty estimates.
 
 
49
 
50
- ## Features
51
-
52
- - **Distributional Output**: Predicts a probability distribution over relevance bins (0.0 to 1.0)
53
- - **Uncertainty Estimates**: Provides variance and entropy for confidence assessment
54
- - **CrossEncoder Compatible**: Works directly with Sentence Transformers CrossEncoder
55
- - **Mean Pooling**: Uses mean pooling over all tokens for robust representations
56
-
57
- ## Installation
58
-
59
- ```bash
60
- pip install transformers sentence-transformers torch
61
- ```
62
-
63
- ## Quick Start
64
-
65
- ### 1. Using HuggingFace Transformers
66
 
67
  ```python
68
- from transformers import AutoModel, AutoTokenizer
69
  import torch
 
 
70
 
71
  # Load model and tokenizer
72
- model = AutoModel.from_pretrained(
73
- "thebajajra/RexReranker-micro",
74
- trust_remote_code=True
75
- )
76
- tokenizer = AutoTokenizer.from_pretrained("thebajajra/RexReranker-micro")
77
-
78
- # Move to GPU if available
79
- device = "cuda" if torch.cuda.is_available() else "cpu"
80
- model = model.to(device)
81
  model.eval()
82
 
83
- # Prepare input (query-document pair)
84
- query = "best laptop for programming"
85
- title = "MacBook Pro M3"
86
- description = "Powerful laptop with M3 chip, 16GB RAM, perfect for developers and creative professionals"
87
 
88
- inputs = tokenizer(
89
- f"Query: {query}",
90
- f"Title: {title}\nDescription: {description}",
91
- return_tensors="pt",
92
- truncation=True,
93
- max_length=2048,
94
- ).to(device)
95
 
96
- # Get relevance score
97
  with torch.no_grad():
98
- score = model.predict_relevance(**inputs)
99
- print(f"Relevance Score: {score.item():.4f}")
100
- ```
101
-
102
- ### 2. Using Sentence Transformers CrossEncoder
103
-
104
- ```python
105
- from sentence_transformers import CrossEncoder
106
-
107
- # Load as CrossEncoder
108
- model = CrossEncoder(
109
- "thebajajra/RexReranker-micro",
110
- trust_remote_code=True
111
- )
112
-
113
- # Single prediction
114
- query = "best laptop for programming"
115
- document = "MacBook Pro M3 - Powerful laptop with M3 chip for developers"
116
-
117
- score = model.predict([(query, document)])[0]
118
- print(f"Score: {score:.4f}")
119
- ```
120
-
121
- ### 3. Batch Reranking with CrossEncoder
122
-
123
- ```python
124
- from sentence_transformers import CrossEncoder
125
-
126
- model = CrossEncoder("thebajajra/RexReranker-micro", trust_remote_code=True)
127
-
128
- query = "best laptop for programming"
129
- documents = [
130
- "MacBook Pro M3 - Powerful laptop with M3 chip for developers",
131
- "Gaming Mouse RGB - High precision gaming mouse with 16000 DPI",
132
- "ThinkPad X1 Carbon - Business ultrabook with long battery life",
133
- "Mechanical Keyboard - Cherry MX switches for typing comfort",
134
- "Dell XPS 15 - Premium laptop with 4K OLED display",
135
- ]
136
-
137
- # Get scores for all documents
138
- pairs = [(query, doc) for doc in documents]
139
- scores = model.predict(pairs)
140
-
141
- # Print ranked results
142
- print(f"Query: {query}\n")
143
- for doc, score in sorted(zip(documents, scores), key=lambda x: x[1], reverse=True):
144
- print(f" {score:.4f} | {doc[:60]}")
145
- ```
146
-
147
- ### 4. Using CrossEncoder's rank() Method
148
-
149
- ```python
150
- from sentence_transformers import CrossEncoder
151
-
152
- model = CrossEncoder("thebajajra/RexReranker-micro", trust_remote_code=True)
153
-
154
- query = "wireless headphones with noise cancellation"
155
- documents = [
156
- "Sony WH-1000XM5 - Industry-leading noise cancellation headphones",
157
- "Apple AirPods Max - Premium over-ear headphones with spatial audio",
158
- "Bose QuietComfort 45 - Comfortable wireless noise cancelling headphones",
159
- "JBL Tune 750BTNC - Affordable wireless headphones with ANC",
160
- "Logitech Gaming Headset - Wired gaming headphones with microphone",
161
- ]
162
-
163
- # Rank documents
164
- results = model.rank(query, documents, top_k=3)
165
-
166
- print(f"Query: {query}\n")
167
- print("Top 3 Results:")
168
- for result in results:
169
- idx = result['corpus_id']
170
- score = result['score']
171
- print(f" {score:.4f} | {documents[idx][:60]}")
172
- ```
173
-
174
- ### 5. With Uncertainty Estimates
175
-
176
- ```python
177
- from transformers import AutoModel, AutoTokenizer
178
- import torch
179
-
180
- model = AutoModel.from_pretrained("thebajajra/RexReranker-micro", trust_remote_code=True)
181
- tokenizer = AutoTokenizer.from_pretrained("thebajajra/RexReranker-micro")
182
-
183
- device = "cuda" if torch.cuda.is_available() else "cpu"
184
- model = model.to(device).eval()
185
-
186
- # Prepare inputs
187
- inputs = tokenizer(
188
- "Query: best laptop for programming",
189
- "Title: MacBook Pro\nDescription: Great laptop for developers",
190
- return_tensors="pt",
191
- truncation=True,
192
- ).to(device)
193
-
194
- # Get prediction with uncertainty
195
- with torch.no_grad():
196
- result = model.predict_with_uncertainty(**inputs)
197
-
198
- print(f"Relevance: {result['relevance'].item():.4f}")
199
- print(f"Variance: {result['variance'].item():.6f}") # Higher = more uncertain
200
- print(f"Entropy: {result['entropy'].item():.4f}") # Higher = more uncertain
201
-
202
- # Access full probability distribution
203
- print(f"\nDistribution over bins:")
204
- probs = result['probs'][0].cpu().numpy()
205
- for i, p in enumerate(probs):
206
- bin_center = i / (len(probs) - 1)
207
- bar = "█" * int(p * 50)
208
- print(f" {bin_center:.1f}: {bar} ({p:.3f})")
209
- ```
210
-
211
- ### 6. Batch Processing for Production
212
-
213
- ```python
214
- from transformers import AutoModel, AutoTokenizer
215
- import torch
216
- from torch.utils.data import DataLoader
217
-
218
- model = AutoModel.from_pretrained("thebajajra/RexReranker-micro", trust_remote_code=True)
219
- tokenizer = AutoTokenizer.from_pretrained("thebajajra/RexReranker-micro")
220
-
221
- device = "cuda" if torch.cuda.is_available() else "cpu"
222
- model = model.to(device).eval()
223
-
224
- def rerank_batch(query: str, documents: list, batch_size: int = 32) -> list:
225
- """Rerank documents for a query with batched inference."""
226
-
227
- # Prepare all inputs
228
- all_inputs = []
229
- for doc in documents:
230
- title = doc.get("title", "")
231
- description = doc.get("description", "")
232
- inputs = tokenizer(
233
- f"Query: {query}",
234
- f"Title: {title}\nDescription: {description}",
235
- truncation=True,
236
- max_length=2048,
237
- padding=False,
238
- )
239
- all_inputs.append(inputs)
240
-
241
- # Batch inference
242
- all_scores = []
243
- for i in range(0, len(all_inputs), batch_size):
244
- batch = all_inputs[i:i + batch_size]
245
- padded = tokenizer.pad(batch, return_tensors="pt").to(device)
246
-
247
- with torch.no_grad():
248
- scores = model.predict_relevance(**padded)
249
- all_scores.extend(scores.cpu().tolist())
250
 
251
- # Add scores to documents and sort
252
- for doc, score in zip(documents, all_scores):
253
- doc["score"] = score
254
-
255
- return sorted(documents, key=lambda x: x["score"], reverse=True)
256
-
257
- # Example usage
258
- query = "best laptop for programming"
259
- documents = [
260
- {"title": "MacBook Pro M3", "description": "Powerful laptop for developers"},
261
- {"title": "Gaming Mouse", "description": "High DPI gaming mouse"},
262
- {"title": "ThinkPad X1", "description": "Business laptop with long battery"},
263
- ]
264
-
265
- ranked = rerank_batch(query, documents)
266
- for doc in ranked:
267
- print(f"{doc['score']:.4f} | {doc['title']}")
268
  ```
269
 
270
  ## Input Format
271
 
272
  The model expects query-document pairs formatted as:
 
 
 
 
 
 
273
 
274
- | Field | Format |
275
- |-------|--------|
276
- | Text A (Query) | `Query: {your search query}` |
277
- | Text B (Document) | `Title: {document title}\nDescription: {document description}` |
278
-
279
- ## Output Details
280
-
281
- ### Standard Output (CrossEncoder compatible)
282
- - `outputs.logits`: Shape `[B, 1]` - Single relevance score per example
283
- - `outputs.relevance`: Shape `[B]` - Same as logits squeezed
284
 
285
- ### With Uncertainty (`output_distribution=True` or `predict_with_uncertainty()`)
286
- - `relevance`: Expected relevance score [0, 1]
287
- - `variance`: Prediction variance (higher = less confident)
288
- - `entropy`: Distribution entropy (higher = less confident)
289
- - `probs`: Full probability distribution over bins
290
- - `distribution_logits`: Raw logits before softmax
 
1
+ # Reranker Model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ This model was exported from checkpoint: `rexbert-reranker-micro/checkpoint-67628/`
 
 
 
 
4
 
5
+ ## Model Details
6
 
7
+ - **Base Model**: thebajajra/RexBERT-micro
8
+ - **Task**: Document Reranking
9
+ - **Output**: Relevance score between 0 and 1
10
 
11
+ ## Usage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  ```python
 
14
  import torch
15
+ from transformers import AutoTokenizer
16
+ from train_modernbert_reranker import ModernBERTReranker
17
 
18
  # Load model and tokenizer
19
+ model_path = "rexreranker-micro"
20
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
21
+ model = ModernBERTReranker.from_pretrained(model_path)
 
 
 
 
 
 
22
  model.eval()
23
 
24
+ # Example inference
25
+ query = "wireless bluetooth headphones"
26
+ document = "Title: Sony WH-1000XM5\nDescription: Premium wireless headphones with noise cancellation"
 
27
 
28
+ inputs = tokenizer(query, document, return_tensors="pt", truncation=True, max_length=2048)
 
 
 
 
 
 
29
 
 
30
  with torch.no_grad():
31
+ outputs = model(**inputs)
32
+ score = outputs.logits.squeeze().item()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ print(f"Relevance score: {score:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  ```
36
 
37
  ## Input Format
38
 
39
  The model expects query-document pairs formatted as:
40
+ ```
41
+ Query: <query text>
42
+ [SEP]
43
+ Title: <title>
44
+ Description: <description>
45
+ ```
46
 
47
+ ## Training
 
 
 
 
 
 
 
 
 
48
 
49
+ This model was trained on the Amazebay reranker dataset with:
50
+ - Max sequence length: 2048
51
+ - BF16 precision
52
+ - Flash Attention 2
53
+ - Multi-GPU training (4 GPUs)
 
config.json CHANGED
@@ -1,28 +1,52 @@
1
  {
2
  "architectures": [
3
- "RexRerankerModel"
4
  ],
5
- "backbone_name": "thebajajra/RexBERT-micro",
6
- "dropout": 0.0,
 
 
 
 
 
 
 
 
7
  "dtype": "bfloat16",
 
 
 
 
 
 
8
  "hidden_size": 256,
9
- "model_type": "rex_reranker",
10
- "num_bins": 11,
11
- "pooling_strategy": "mean",
12
- "sigma_delta": 0.08,
13
- "sigma_max": 0.12,
14
- "sigma_min": 0.04,
15
- "transformers_version": "4.57.3",
16
- "transitions": [
17
- 0.2,
18
- 0.5,
19
- 0.8
20
- ],
21
- "num_labels": 1,
22
- "torch_dtype": "bfloat16",
23
- "auto_map": {
24
- "AutoConfig": "modeling_rex_reranker.RexRerankerConfig",
25
- "AutoModel": "modeling_rex_reranker.RexRerankerModel",
26
- "AutoModelForSequenceClassification": "modeling_rex_reranker.RexRerankerModel"
27
- }
28
- }
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
  "architectures": [
3
+ "ModernBertForSequenceClassification"
4
  ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50281,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
  "dtype": "bfloat16",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50282,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
  "hidden_size": 256,
23
+ "id2label": {
24
+ "0": "LABEL_0"
25
+ },
26
+ "initializer_cutoff_factor": 2.0,
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 384,
29
+ "label2id": {
30
+ "LABEL_0": 0
31
+ },
32
+ "layer_norm_eps": 1e-05,
33
+ "local_attention": 128,
34
+ "local_rope_theta": 160000.0,
35
+ "max_position_embeddings": 7999,
36
+ "mlp_bias": false,
37
+ "mlp_dropout": 0.0,
38
+ "model_type": "modernbert",
39
+ "norm_bias": false,
40
+ "norm_eps": 1e-05,
41
+ "num_attention_heads": 4,
42
+ "num_hidden_layers": 7,
43
+ "pad_token_id": 50283,
44
+ "position_embedding_type": "sans_pos",
45
+ "problem_type": "regression",
46
+ "repad_logits_with_grad": false,
47
+ "sep_token_id": 50282,
48
+ "sparse_pred_ignore_index": -100,
49
+ "sparse_prediction": false,
50
+ "transformers_version": "4.57.0",
51
+ "vocab_size": 50368
52
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53f09aabe729ed1164fcdad6c7e026ac7ee027339618bfedc8231d67896e6929
3
- size 33605318
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc08698cb0999cc34df87650e13ea88ff02e615e155830038a5eed98e8136eff
3
+ size 33731778
reranker_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"use_regression": true, "model_type": "reranker"}
training_metadata.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint_path": "rexbert-reranker-micro/checkpoint-67628/",
3
+ "base_model": "thebajajra/RexBERT-micro",
4
+ "global_step": 67628,
5
+ "epoch": 5.500447336315576,
6
+ "best_metric": 0.6525706870245737,
7
+ "best_model_checkpoint": "./rexbert-reranker-micro/checkpoint-46110"
8
+ }