Sync from GitHub Actions
Browse files- README.md +11 -4
- config.json +1 -2
- demo.py +27 -0
- src/__pycache__/__init__.cpython-313.pyc +0 -0
- src/__pycache__/inference.cpython-313.pyc +0 -0
- src/__pycache__/model.cpython-313.pyc +0 -0
- src/__pycache__/tokenizer.cpython-313.pyc +0 -0
README.md
CHANGED
|
@@ -18,11 +18,18 @@ This is a specialized version of **MiniEmbed**, fine-tuned exclusively for **hig
|
|
| 18 |
Unlike general-purpose embedding models, this model is designed to determine if two product listings—often with different titles, specifications, or formatting—refer to the **exact same physical item**.
|
| 19 |
|
| 20 |
## Use Case
|
|
|
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
* **
|
| 25 |
-
* **
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
## Interactive Demo
|
| 28 |
|
|
|
|
| 18 |
Unlike general-purpose embedding models, this model is designed to determine if two product listings—often with different titles, specifications, or formatting—refer to the **exact same physical item**.
|
| 19 |
|
| 20 |
## Use Case
|
| 21 |
+
**E-commerce Product Matching & Entity Resolution**
|
| 22 |
|
| 23 |
+
This model is fine-tuned to solve the "Same Product, Different Description" problem in e-commerce:
|
| 24 |
+
|
| 25 |
+
* **Marketplace Aggregation**: Unifying listings from Amazon, Walmart, and eBay into a single catalog.
|
| 26 |
+
* **Competitor Analysis**: Matching your inventory against competitors to track pricing.
|
| 27 |
+
* **Data Cleaning**: Removing duplicates in databases where titles vary slightly (e.g., "Nike Air Max" vs "Nike Men's Air Max Shoe").
|
| 28 |
+
|
| 29 |
+
**Example Challenges Handled:**
|
| 30 |
+
* **Variations**: "iPhone 14 128GB" vs "Apple iPhone 14 Midnight 128GB"
|
| 31 |
+
* **Missing Attributes**: "Sony Headphones" vs "Sony WH-1000XM5 Noise Canceling Headphones"
|
| 32 |
+
* **Formatting Differences**: "5-Pack T-Shirts" vs "T-Shirt (Pack of 5)"
|
| 33 |
|
| 34 |
## Interactive Demo
|
| 35 |
|
config.json
CHANGED
|
@@ -4,6 +4,5 @@
|
|
| 4 |
"num_heads": 4,
|
| 5 |
"num_layers": 4,
|
| 6 |
"d_ff": 1024,
|
| 7 |
-
"max_seq_len": 128
|
| 8 |
-
"pad_token_id": 0
|
| 9 |
}
|
|
|
|
| 4 |
"num_heads": 4,
|
| 5 |
"num_layers": 4,
|
| 6 |
"d_ff": 1024,
|
| 7 |
+
"max_seq_len": 128
|
|
|
|
| 8 |
}
|
demo.py
CHANGED
|
@@ -108,6 +108,33 @@ if st.button("Load Large Benchmark Dataset (100+ items)"):
|
|
| 108 |
("Lenovo ThinkPad X1 Carbon Gen 11", "Lenovo ThinkPad X1 Carbon Gen 11 14 inch", "Lenovo ThinkPad T14s"),
|
| 109 |
("HP Spectre x360 14", "HP Spectre x360 2-in-1 Laptop 13.5t", "HP Envy x360 15"),
|
| 110 |
("Microsoft Surface Pro 9", "Microsoft Surface Pro 9 (2022), 13 2-in-1", "Microsoft Surface Laptop 5"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
]
|
| 112 |
|
| 113 |
a_list = []
|
|
|
|
| 108 |
("Lenovo ThinkPad X1 Carbon Gen 11", "Lenovo ThinkPad X1 Carbon Gen 11 14 inch", "Lenovo ThinkPad T14s"),
|
| 109 |
("HP Spectre x360 14", "HP Spectre x360 2-in-1 Laptop 13.5t", "HP Envy x360 15"),
|
| 110 |
("Microsoft Surface Pro 9", "Microsoft Surface Pro 9 (2022), 13 2-in-1", "Microsoft Surface Laptop 5"),
|
| 111 |
+
# New E-commerce items
|
| 112 |
+
("Levis 501 Original Fit Jeans", "Levi's Men's 501 Original Fit Jeans - Dark Stonewash", "Levi's 511 Slim Fit Jeans"),
|
| 113 |
+
("Adidas Ultraboost Light", "adidas Men's Ultraboost Light Running Shoe", "Adidas Ultraboost 1.0"),
|
| 114 |
+
("North Face McMurdo Parka", "The North Face Men's McMurdo Parka Coat", "North Face Gotham Jacket"),
|
| 115 |
+
("Patagonia Better Sweater", "Patagonia Men's Better Sweater 1/4-Zip Fleece", "Patagonia Synchilla Snap-T"),
|
| 116 |
+
("Ray-Ban Aviator Classic", "Ray-Ban RB3025 Classic Aviator Sunglasses", "Ray-Ban Wayfarer"),
|
| 117 |
+
("Herschel Little America", "Herschel Supply Co. Little America Backpack", "Herschel Heritage Backpack"),
|
| 118 |
+
("Hydro Flask 32 oz Wide Mouth", "Hydro Flask Azure 32 oz Wide Mouth Bottle with Flex Cap", "Hydro Flask 21 oz Standard Mouth"),
|
| 119 |
+
("Lululemon Align Leggings 25", "lululemon Align High-Rise Pant 25 inch", "Lululemon Wunder Train"),
|
| 120 |
+
("Birkenstock Arizona Sandals", "Birkenstock Unisex Arizona Essentials EVA Sandals", "Birkenstock Boston Clogs"),
|
| 121 |
+
("Crocs Classic Clog", "Crocs Unisex-Adult Classic Clogs without Jibbitz", "Crocs Baya Clog"),
|
| 122 |
+
("Oral-B iO Series 9", "Oral-B iO Series 9 Electric Toothbrush with 4 Brush Heads", "Oral-B Pro 1000"),
|
| 123 |
+
("Philips Sonicare DiamondClean", "Philips Sonicare DiamondClean Smart 9500 Rechargeable", "Philips Sonicare 4100"),
|
| 124 |
+
("Keurig K-Elite Coffee Maker", "Keurig K-Elite Single Serve K-Cup Pod Coffee Brewer", "Keurig K-Express"),
|
| 125 |
+
("Nespresso Vertuo Next", "Nespresso Vertuo Next Coffee and Espresso Machine by Breville", "Nespresso Essenza Mini"),
|
| 126 |
+
("KitchenAid Artisan Stand Mixer", "KitchenAid KSM150PSER Artisan Series 5-Qt. Stand Mixer", "KitchenAid Classic Series"),
|
| 127 |
+
("Cuisinart Food Processor 14 Cup", "Cuisinart DFP-14BCNY 14-Cup Food Processor", "Cuisinart Mini Prep Plus"),
|
| 128 |
+
("Weber Spirit II E-310", "Weber Spirit II E-310 3-Burner Liquid Propane Grill", "Weber Traveler Portable Grill"),
|
| 129 |
+
("Traeger Pro 575 Pellet Grill", "Traeger Grills Pro Series 575 Wood Pellet Grill and Smoker", "Traeger Ironwood 650"),
|
| 130 |
+
("Dewalt 20V Max Cordless Drill", "DEWALT 20V MAX Cordless Drill / Driver Kit, Compact", "Dewalt 20V Max Impact Driver"),
|
| 131 |
+
("Milwaukee M18 Fuel Impact Driver", "Milwaukee 2853-20 M18 FUEL 1/4 Hex Impact Driver", "Milwaukee M12 Impact Driver"),
|
| 132 |
+
("Samsonite Omni PC Hardside", "Samsonite Omni PC Hardside Expandable Luggage 20 inch", "Samsonite Winfield 2"),
|
| 133 |
+
("Away The Carry-On", "Away Travel The Carry-On Suitcase - Black", "Away The Bigger Carry-On"),
|
| 134 |
+
("Coleman Sundome Camping Tent", "Coleman Camping Tent with WeatherTec Setup", "Coleman Skydome Tent"),
|
| 135 |
+
("Yeti Tundra 45 Cooler", "YETI Tundra 45 Cooler Hard Cooler", "YETI Roadie 24 Cooler"),
|
| 136 |
+
("Bose SoundLink Flex", "Bose SoundLink Flex Bluetooth Portable Speaker", "Bose SoundLink Micro"),
|
| 137 |
+
("JBL Flip 6 Speaker", "JBL Flip 6 - Portable Bluetooth Speaker", "JBL Charge 5"),
|
| 138 |
]
|
| 139 |
|
| 140 |
a_list = []
|
src/__pycache__/__init__.cpython-313.pyc
CHANGED
|
Binary files a/src/__pycache__/__init__.cpython-313.pyc and b/src/__pycache__/__init__.cpython-313.pyc differ
|
|
|
src/__pycache__/inference.cpython-313.pyc
CHANGED
|
Binary files a/src/__pycache__/inference.cpython-313.pyc and b/src/__pycache__/inference.cpython-313.pyc differ
|
|
|
src/__pycache__/model.cpython-313.pyc
CHANGED
|
Binary files a/src/__pycache__/model.cpython-313.pyc and b/src/__pycache__/model.cpython-313.pyc differ
|
|
|
src/__pycache__/tokenizer.cpython-313.pyc
CHANGED
|
Binary files a/src/__pycache__/tokenizer.cpython-313.pyc and b/src/__pycache__/tokenizer.cpython-313.pyc differ
|
|
|