import time
import torch
import numpy as np
from sentence_transformers import SentenceTransformer
import psutil
import os

def benchmark_bge():
    print("🚀 Starting BGE-M3 Efficiency Benchmark...")
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"💻 Device: {device}")
    
    print("📥 Loading BAAI/bge-m3...")
    start_load = time.time()
    model = SentenceTransformer('BAAI/bge-m3', device=device)
    print(f"⏱️ Load Time: {time.time() - start_load:.2f}s")
    
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    print(f"📊 Memory Usage (RAM): {mem_info.rss / 1024 / 1024:.2f} MB")
    
    sentences = [
        "The quick brown fox jumps over the lazy dog.",
        "Artificial intelligence is transforming the recruitment industry.",
        "Candidate has 5 years of experience in Python and FastAPI.",
        "Looking for a Senior Software Engineer with cloud expertise."
    ] * 25 # 100 sentences
    
    batch_sizes = [1, 4, 8, 16, 32]
    
    print("\n--- Latency vs Batch Size ---")
    print(f"{'Batch Size':<12} | {'Time (s)':<10} | {'Sec/Sent':<10} | {'Throughput (sent/s)':<20}")
    print("-" * 65)
    
    for bs in batch_sizes:
        start_time = time.time()
        # Warmup
        model.encode(sentences[:bs], batch_size=bs, show_progress_bar=False)
        
        # Actual benchmark
        start_time = time.time()
        model.encode(sentences, batch_size=bs, show_progress_bar=False)
        end_time = time.time()
        
        total_time = end_time - start_time
        sec_per_sent = total_time / len(sentences)
        throughput = len(sentences) / total_time
        
        print(f"{bs:<12} | {total_time:<10.3f} | {sec_per_sent:<10.4f} | {throughput:<20.2f}")

    print("\n✅ Benchmark Complete.")

if __name__ == "__main__":
    benchmark_bge()