all-MiniLM-L6-v2-ProductMatching / benchmark_comparison.py
surazbhandari's picture
Initial upload of fine-tuned Product Matching model
0ada64f verified
from sentence_transformers import SentenceTransformer
import torch
import pandas as pd
def calculate_sim(model, p1, p2):
emb = model.encode([p1, p2])
return torch.nn.functional.cosine_similarity(
torch.tensor(emb[0]).unsqueeze(0),
torch.tensor(emb[1]).unsqueeze(0)
).item()
def run_benchmark():
models = {
"Fine-Tuned (ProductMatching)": "/Users/surajbhandari/Documents/antigravity/all-MiniLM-L6-v2-ProductMatching",
"Base Model (all-MiniLM-L6-v2)": "sentence-transformers/all-MiniLM-L6-v2"
}
# Test cases: (Product 1, Product 2, Expected Relationship)
test_cases = [
("Apple iPhone 15 Pro Max 256GB Titanium", "iPhone 15 Pro Max - Blue Titanium - 256 GB", "Match"),
("Logitech MX Master 3S Wireless Mouse - Graphite", "Logitech MX Master 3S (Graphite) Mouse", "Match"),
("Samsung Galaxy S23 Ultra 512GB", "Galaxy S23 Ultra - 512 GB - Phantom Black", "Match"),
("Sony WH-1000XM5 Noise Cancelling Headphones", "Sony WH-1000XM4 Wireless Headphones", "Hard Negative (Diff Version)"),
("Apple MacBook Pro 14 M3", "Apple MacBook Pro 16 M3", "Hard Negative (Diff Size)"),
("Nike Air Max Running Shoes", "Adidas Ultraboost Running Shoes", "Similar Category (Diff Brand)"),
("KitchenAid Stand Mixer", "HP LaserJet Printer", "Random Negative")
]
results = []
print("🚀 Starting Benchmark Comparison...")
# Load models
loaded_models = {}
for name, path in models.items():
print(f"📦 Loading {name}...")
loaded_models[name] = SentenceTransformer(path)
for p1, p2, rel in test_cases:
row = {"Product 1": p1, "Product 2": p2, "Type": rel}
for name, model in loaded_models.items():
row[name] = calculate_sim(model, p1, p2)
results.append(row)
df = pd.DataFrame(results)
print("\n" + "="*80)
print(f"{'PRODUCT MATCHING BENCHMARK':^80}")
print("="*80)
for _, res in df.iterrows():
print(f"\nPairs: {res['Product 1']} \n VS: {res['Product 2']}")
print(f"Type: {res['Type']}")
ft_val = res['Fine-Tuned (ProductMatching)']
base_val = res['Base Model (all-MiniLM-L6-v2)']
diff = ft_val - base_val
status = "✅ FT Higher" if diff > 0 else "❌ Base Higher"
if res['Type'] != "Match":
status = "✅ FT Lower (Better)" if diff < 0 else "❌ Base Lower"
print(f" - Fine-Tuned: {ft_val:.4f}")
print(f" - Base Model: {base_val:.4f}")
print(f" - Decision: {status}")
# Summary Statistics
matches = df[df['Type'] == "Match"]
negatives = df[df['Type'] != "Match"]
print("\n" + "="*80)
print(f"{'SUMMARY STATISTICS':^80}")
print("="*80)
print(f"Avg Match Similarity: FT: {matches['Fine-Tuned (ProductMatching)'].mean():.4f} | Base: {matches['Base Model (all-MiniLM-L6-v2)'].mean():.4f}")
print(f"Avg Negative Similarity: FT: {negatives['Fine-Tuned (ProductMatching)'].mean():.4f} | Base: {negatives['Base Model (all-MiniLM-L6-v2)'].mean():.4f}")
# Margin (Gap between matches and negatives)
ft_margin = matches['Fine-Tuned (ProductMatching)'].mean() - negatives['Fine-Tuned (ProductMatching)'].mean()
base_margin = matches['Base Model (all-MiniLM-L6-v2)'].mean() - negatives['Base Model (all-MiniLM-L6-v2)'].mean()
print(f"\nSeparation Margin (Match - Negative):")
print(f" - Fine-Tuned: {ft_margin:.4f}")
print(f" - Base Model: {base_margin:.4f}")
better = "FINE-TUNED" if ft_margin > base_margin else "BASE MODEL"
print(f"\n🏆 Overall Winner for Product Matching: {better}")
if __name__ == "__main__":
run_benchmark()