import requests import time import json from statistics import mean, stdev from typing import List, Dict BASE_URL = "http://localhost:8000" class PerformanceBenchmark: def __init__(self): self.results = {} def run_benchmark(self, name: str, payload: Dict, iterations: int = 5) -> Dict: times = [] print(f"\n{'='*60}") print(f"πŸ” Benchmarking: {name}") print(f"{'='*60}") print(f"Iterations: {iterations}") for i in range(iterations): start = time.time() try: response = requests.post(f"{BASE_URL}/summarize", json=payload, timeout=30) elapsed = (time.time() - start) * 1000 if response.status_code == 200: times.append(elapsed) print(f" Iteration {i+1}: {elapsed:.2f}ms βœ…") else: print(f" Iteration {i+1}: ERROR (Status {response.status_code}) ❌") except Exception as e: print(f" Iteration {i+1}: ERROR ({str(e)}) ❌") if times: result = { 'name': name, 'iterations': len(times), 'min_ms': min(times), 'max_ms': max(times), 'avg_ms': mean(times), 'stdev_ms': stdev(times) if len(times) > 1 else 0, 'success_rate': (len(times) / iterations) * 100 } print(f"\nπŸ“Š Results:") print(f" Min: {result['min_ms']:.2f}ms") print(f" Max: {result['max_ms']:.2f}ms") print(f" Avg: {result['avg_ms']:.2f}ms") print(f" Std Dev: {result['stdev_ms']:.2f}ms") print(f" Success Rate: {result['success_rate']:.1f}%") self.results[name] = result return result else: print(f"❌ All iterations failed") return None def print_summary(self): """Print benchmark summary""" print(f"\n{'='*60}") print("πŸ“ˆ BENCHMARK SUMMARY") print(f"{'='*60}") if not self.results: print("No successful benchmarks") return # Sort by average time sorted_results = sorted(self.results.items(), key=lambda x: x[1]['avg_ms']) for name, result in sorted_results: print(f"\n{name}:") print(f" Average: {result['avg_ms']:.2f}ms") print(f" Range: {result['min_ms']:.2f}ms - {result['max_ms']:.2f}ms") print(f" Success: {result['success_rate']:.1f}%") def main(): print("\n" + "="*60) print("πŸš€ DOCUMENT SUMMARIZER PERFORMANCE BENCHMARK") print("="*60) benchmark = PerformanceBenchmark() # Test 1: Short document with speed mode print("\n[1/6] Speed Mode - Short Document") payload_speed_short = { "document": "Machine learning is AI. It learns from data.", "quality_preference": "speed" } benchmark.run_benchmark("Speed Mode (Short Doc)", payload_speed_short, iterations=5) # Test 2: Medium document with balanced mode print("\n[2/6] Balanced Mode - Medium Document") payload_balanced_med = { "document": """Deep learning is a subset of machine learning using neural networks. Each layer learns different features. It's used for image recognition, NLP, and speech. Training requires GPU acceleration. Popular frameworks include PyTorch and TensorFlow.""", "quality_preference": "balanced" } benchmark.run_benchmark("Balanced Mode (Medium Doc)", payload_balanced_med, iterations=5) # Test 3: Quality mode print("\n[3/6] Quality Mode - Medium Document") payload_quality = { "document": """Transformers have revolutionized NLP. The attention mechanism allows focusing on relevant sequence parts. BERT and GPT are transformer-based. Self-attention enables learning long-range dependencies. Position encoding preserves sequence information. Transformers scale to billions of parameters like GPT-3.""", "quality_preference": "quality" } benchmark.run_benchmark("Quality Mode (Medium Doc)", payload_quality, iterations=3) # Test 4: Different intent print("\n[4/6] Technical Overview Intent") payload_intent = { "document": "Convolutional neural networks use filters for feature extraction. ReLU activations add non-linearity. Pooling reduces dimensionality. CNNs excel at image tasks.", "intent": "technical_overview" } benchmark.run_benchmark("Technical Overview Intent", payload_intent, iterations=5) # Test 5: Methodology intent print("\n[5/6] Methodology Intent") payload_methodology = { "document": """We collected 10000 samples. Training used SGD with learning rate 0.01. Batch size was 32. We trained for 100 epochs. Cross-entropy was the loss function. We achieved 95% accuracy on test set.""", "intent": "methodology" } benchmark.run_benchmark("Methodology Intent", payload_methodology, iterations=5) # Test 6: Spanish language print("\n[6/6] Multilingual - Spanish") payload_spanish = { "document": "El aprendizaje automΓ‘tico es una rama de la inteligencia artificial importante.", "language": "spanish", "quality_preference": "speed" } benchmark.run_benchmark("Spanish Language (Speed)", payload_spanish, iterations=5) # Print summary benchmark.print_summary() # Save results to file with open('benchmark_results.json', 'w') as f: json.dump(benchmark.results, f, indent=2) print(f"\nβœ… Results saved to benchmark_results.json") print("\n" + "="*60) print("βœ… BENCHMARK COMPLETE") print("="*60 + "\n") if __name__ == "__main__": main()