import time import torch import numpy as np from sentence_transformers import SentenceTransformer import psutil import os def benchmark_bge(): print("šŸš€ Starting BGE-M3 Efficiency Benchmark...") device = "cuda" if torch.cuda.is_available() else "cpu" print(f"šŸ’» Device: {device}") print("šŸ“„ Loading BAAI/bge-m3...") start_load = time.time() model = SentenceTransformer('BAAI/bge-m3', device=device) print(f"ā±ļø Load Time: {time.time() - start_load:.2f}s") process = psutil.Process(os.getpid()) mem_info = process.memory_info() print(f"šŸ“Š Memory Usage (RAM): {mem_info.rss / 1024 / 1024:.2f} MB") sentences = [ "The quick brown fox jumps over the lazy dog.", "Artificial intelligence is transforming the recruitment industry.", "Candidate has 5 years of experience in Python and FastAPI.", "Looking for a Senior Software Engineer with cloud expertise." ] * 25 # 100 sentences batch_sizes = [1, 4, 8, 16, 32] print("\n--- Latency vs Batch Size ---") print(f"{'Batch Size':<12} | {'Time (s)':<10} | {'Sec/Sent':<10} | {'Throughput (sent/s)':<20}") print("-" * 65) for bs in batch_sizes: start_time = time.time() # Warmup model.encode(sentences[:bs], batch_size=bs, show_progress_bar=False) # Actual benchmark start_time = time.time() model.encode(sentences, batch_size=bs, show_progress_bar=False) end_time = time.time() total_time = end_time - start_time sec_per_sent = total_time / len(sentences) throughput = len(sentences) / total_time print(f"{bs:<12} | {total_time:<10.3f} | {sec_per_sent:<10.4f} | {throughput:<20.2f}") print("\nāœ… Benchmark Complete.") if __name__ == "__main__": benchmark_bge()