Spaces:
Running
Running
| import requests | |
| import time | |
| import json | |
| from statistics import mean, stdev | |
| from typing import List, Dict | |
| BASE_URL = "http://localhost:8000" | |
| class PerformanceBenchmark: | |
| def __init__(self): | |
| self.results = {} | |
| def run_benchmark(self, name: str, payload: Dict, iterations: int = 5) -> Dict: | |
| times = [] | |
| print(f"\n{'='*60}") | |
| print(f"🔍 Benchmarking: {name}") | |
| print(f"{'='*60}") | |
| print(f"Iterations: {iterations}") | |
| for i in range(iterations): | |
| start = time.time() | |
| try: | |
| response = requests.post(f"{BASE_URL}/summarize", json=payload, timeout=30) | |
| elapsed = (time.time() - start) * 1000 | |
| if response.status_code == 200: | |
| times.append(elapsed) | |
| print(f" Iteration {i+1}: {elapsed:.2f}ms ✅") | |
| else: | |
| print(f" Iteration {i+1}: ERROR (Status {response.status_code}) ❌") | |
| except Exception as e: | |
| print(f" Iteration {i+1}: ERROR ({str(e)}) ❌") | |
| if times: | |
| result = { | |
| 'name': name, | |
| 'iterations': len(times), | |
| 'min_ms': min(times), | |
| 'max_ms': max(times), | |
| 'avg_ms': mean(times), | |
| 'stdev_ms': stdev(times) if len(times) > 1 else 0, | |
| 'success_rate': (len(times) / iterations) * 100 | |
| } | |
| print(f"\n📊 Results:") | |
| print(f" Min: {result['min_ms']:.2f}ms") | |
| print(f" Max: {result['max_ms']:.2f}ms") | |
| print(f" Avg: {result['avg_ms']:.2f}ms") | |
| print(f" Std Dev: {result['stdev_ms']:.2f}ms") | |
| print(f" Success Rate: {result['success_rate']:.1f}%") | |
| self.results[name] = result | |
| return result | |
| else: | |
| print(f"❌ All iterations failed") | |
| return None | |
| def print_summary(self): | |
| """Print benchmark summary""" | |
| print(f"\n{'='*60}") | |
| print("📈 BENCHMARK SUMMARY") | |
| print(f"{'='*60}") | |
| if not self.results: | |
| print("No successful benchmarks") | |
| return | |
| # Sort by average time | |
| sorted_results = sorted(self.results.items(), key=lambda x: x[1]['avg_ms']) | |
| for name, result in sorted_results: | |
| print(f"\n{name}:") | |
| print(f" Average: {result['avg_ms']:.2f}ms") | |
| print(f" Range: {result['min_ms']:.2f}ms - {result['max_ms']:.2f}ms") | |
| print(f" Success: {result['success_rate']:.1f}%") | |
| def main(): | |
| print("\n" + "="*60) | |
| print("🚀 DOCUMENT SUMMARIZER PERFORMANCE BENCHMARK") | |
| print("="*60) | |
| benchmark = PerformanceBenchmark() | |
| # Test 1: Short document with speed mode | |
| print("\n[1/6] Speed Mode - Short Document") | |
| payload_speed_short = { | |
| "document": "Machine learning is AI. It learns from data.", | |
| "quality_preference": "speed" | |
| } | |
| benchmark.run_benchmark("Speed Mode (Short Doc)", payload_speed_short, iterations=5) | |
| # Test 2: Medium document with balanced mode | |
| print("\n[2/6] Balanced Mode - Medium Document") | |
| payload_balanced_med = { | |
| "document": """Deep learning is a subset of machine learning using neural networks. | |
| Each layer learns different features. It's used for image recognition, NLP, and speech. | |
| Training requires GPU acceleration. Popular frameworks include PyTorch and TensorFlow.""", | |
| "quality_preference": "balanced" | |
| } | |
| benchmark.run_benchmark("Balanced Mode (Medium Doc)", payload_balanced_med, iterations=5) | |
| # Test 3: Quality mode | |
| print("\n[3/6] Quality Mode - Medium Document") | |
| payload_quality = { | |
| "document": """Transformers have revolutionized NLP. The attention mechanism allows focusing | |
| on relevant sequence parts. BERT and GPT are transformer-based. Self-attention enables learning | |
| long-range dependencies. Position encoding preserves sequence information. Transformers scale | |
| to billions of parameters like GPT-3.""", | |
| "quality_preference": "quality" | |
| } | |
| benchmark.run_benchmark("Quality Mode (Medium Doc)", payload_quality, iterations=3) | |
| # Test 4: Different intent | |
| print("\n[4/6] Technical Overview Intent") | |
| payload_intent = { | |
| "document": "Convolutional neural networks use filters for feature extraction. ReLU activations add non-linearity. Pooling reduces dimensionality. CNNs excel at image tasks.", | |
| "intent": "technical_overview" | |
| } | |
| benchmark.run_benchmark("Technical Overview Intent", payload_intent, iterations=5) | |
| # Test 5: Methodology intent | |
| print("\n[5/6] Methodology Intent") | |
| payload_methodology = { | |
| "document": """We collected 10000 samples. Training used SGD with learning rate 0.01. | |
| Batch size was 32. We trained for 100 epochs. Cross-entropy was the loss function. | |
| We achieved 95% accuracy on test set.""", | |
| "intent": "methodology" | |
| } | |
| benchmark.run_benchmark("Methodology Intent", payload_methodology, iterations=5) | |
| # Test 6: Spanish language | |
| print("\n[6/6] Multilingual - Spanish") | |
| payload_spanish = { | |
| "document": "El aprendizaje automático es una rama de la inteligencia artificial importante.", | |
| "language": "spanish", | |
| "quality_preference": "speed" | |
| } | |
| benchmark.run_benchmark("Spanish Language (Speed)", payload_spanish, iterations=5) | |
| # Print summary | |
| benchmark.print_summary() | |
| # Save results to file | |
| with open('benchmark_results.json', 'w') as f: | |
| json.dump(benchmark.results, f, indent=2) | |
| print(f"\n✅ Results saved to benchmark_results.json") | |
| print("\n" + "="*60) | |
| print("✅ BENCHMARK COMPLETE") | |
| print("="*60 + "\n") | |
| if __name__ == "__main__": | |
| main() | |