MogensR commited on
Commit
e74e423
·
1 Parent(s): dd4248f

Create scripts/benchmark.py

Browse files
Files changed (1) hide show
  1. scripts/benchmark.py +432 -0
scripts/benchmark.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Benchmark script for BackgroundFX Pro.
4
+ Tests performance across different configurations and hardware.
5
+ """
6
+
7
+ import time
8
+ import psutil
9
+ import torch
10
+ import cv2
11
+ import numpy as np
12
+ from pathlib import Path
13
+ import json
14
+ import argparse
15
+ from typing import Dict, List, Any
16
+ import statistics
17
+ from datetime import datetime
18
+
19
+ # Add parent directory to path
20
+ import sys
21
+ sys.path.append(str(Path(__file__).parent.parent))
22
+
23
+ from api import ProcessingPipeline, PipelineConfig
24
+ from models import ModelRegistry, ModelLoader
25
+
26
+
27
+ class Benchmarker:
28
+ """Performance benchmarking tool."""
29
+
30
+ def __init__(self, output_file: str = None):
31
+ """Initialize benchmarker."""
32
+ self.results = {
33
+ 'timestamp': datetime.now().isoformat(),
34
+ 'system_info': self._get_system_info(),
35
+ 'benchmarks': []
36
+ }
37
+ self.output_file = output_file or f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
38
+
39
+ def _get_system_info(self) -> Dict[str, Any]:
40
+ """Collect system information."""
41
+ info = {
42
+ 'cpu': {
43
+ 'count': psutil.cpu_count(),
44
+ 'frequency': psutil.cpu_freq().current if psutil.cpu_freq() else 0,
45
+ 'model': self._get_cpu_model()
46
+ },
47
+ 'memory': {
48
+ 'total_gb': psutil.virtual_memory().total / (1024**3),
49
+ 'available_gb': psutil.virtual_memory().available / (1024**3)
50
+ },
51
+ 'gpu': self._get_gpu_info(),
52
+ 'python_version': sys.version,
53
+ 'torch_version': torch.__version__,
54
+ 'cuda_available': torch.cuda.is_available()
55
+ }
56
+ return info
57
+
58
+ def _get_cpu_model(self) -> str:
59
+ """Get CPU model name."""
60
+ try:
61
+ import platform
62
+ return platform.processor()
63
+ except:
64
+ return "Unknown"
65
+
66
+ def _get_gpu_info(self) -> Dict[str, Any]:
67
+ """Get GPU information."""
68
+ if torch.cuda.is_available():
69
+ return {
70
+ 'name': torch.cuda.get_device_name(0),
71
+ 'memory_gb': torch.cuda.get_device_properties(0).total_memory / (1024**3),
72
+ 'compute_capability': torch.cuda.get_device_capability(0)
73
+ }
74
+ return {'available': False}
75
+
76
+ def benchmark_image_processing(self,
77
+ sizes: List[tuple] = None,
78
+ qualities: List[str] = None,
79
+ num_iterations: int = 5) -> Dict[str, Any]:
80
+ """Benchmark image processing performance."""
81
+ print("\n=== Image Processing Benchmark ===")
82
+
83
+ sizes = sizes or [(512, 512), (1024, 1024), (1920, 1080)]
84
+ qualities = qualities or ['low', 'medium', 'high']
85
+
86
+ results = {
87
+ 'test': 'image_processing',
88
+ 'iterations': num_iterations,
89
+ 'results': []
90
+ }
91
+
92
+ for size in sizes:
93
+ for quality in qualities:
94
+ print(f"Testing {size[0]}x{size[1]} @ {quality} quality...")
95
+
96
+ # Create test image
97
+ image = np.random.randint(0, 255, (*size, 3), dtype=np.uint8)
98
+
99
+ # Configure pipeline
100
+ config = PipelineConfig(
101
+ quality_preset=quality,
102
+ use_gpu=torch.cuda.is_available(),
103
+ enable_cache=False
104
+ )
105
+
106
+ try:
107
+ pipeline = ProcessingPipeline(config)
108
+
109
+ # Warmup
110
+ pipeline.process_image(image, None)
111
+
112
+ # Benchmark
113
+ times = []
114
+ memory_usage = []
115
+
116
+ for _ in range(num_iterations):
117
+ start_mem = psutil.Process().memory_info().rss / (1024**2)
118
+ start_time = time.time()
119
+
120
+ result = pipeline.process_image(image, None)
121
+
122
+ elapsed = time.time() - start_time
123
+ end_mem = psutil.Process().memory_info().rss / (1024**2)
124
+
125
+ times.append(elapsed)
126
+ memory_usage.append(end_mem - start_mem)
127
+
128
+ # Calculate statistics
129
+ result_data = {
130
+ 'size': f"{size[0]}x{size[1]}",
131
+ 'quality': quality,
132
+ 'avg_time': statistics.mean(times),
133
+ 'std_time': statistics.stdev(times) if len(times) > 1 else 0,
134
+ 'min_time': min(times),
135
+ 'max_time': max(times),
136
+ 'fps': 1.0 / statistics.mean(times),
137
+ 'avg_memory_mb': statistics.mean(memory_usage)
138
+ }
139
+
140
+ results['results'].append(result_data)
141
+ print(f" Average: {result_data['avg_time']:.3f}s ({result_data['fps']:.1f} FPS)")
142
+
143
+ except Exception as e:
144
+ print(f" Failed: {str(e)}")
145
+ results['results'].append({
146
+ 'size': f"{size[0]}x{size[1]}",
147
+ 'quality': quality,
148
+ 'error': str(e)
149
+ })
150
+
151
+ self.results['benchmarks'].append(results)
152
+ return results
153
+
154
+ def benchmark_model_loading(self) -> Dict[str, Any]:
155
+ """Benchmark model loading times."""
156
+ print("\n=== Model Loading Benchmark ===")
157
+
158
+ results = {
159
+ 'test': 'model_loading',
160
+ 'results': []
161
+ }
162
+
163
+ registry = ModelRegistry()
164
+ loader = ModelLoader(registry, device='cuda' if torch.cuda.is_available() else 'cpu')
165
+
166
+ # Test loading different models
167
+ models_to_test = ['rmbg-1.4', 'u2netp', 'modnet']
168
+
169
+ for model_id in models_to_test:
170
+ print(f"Loading {model_id}...")
171
+
172
+ # Clear cache
173
+ loader.unload_all()
174
+
175
+ # Measure loading time
176
+ start_time = time.time()
177
+ start_mem = psutil.Process().memory_info().rss / (1024**2)
178
+
179
+ try:
180
+ loaded = loader.load_model(model_id)
181
+
182
+ elapsed = time.time() - start_time
183
+ end_mem = psutil.Process().memory_info().rss / (1024**2)
184
+
185
+ if loaded:
186
+ result_data = {
187
+ 'model': model_id,
188
+ 'load_time': elapsed,
189
+ 'memory_usage_mb': end_mem - start_mem,
190
+ 'device': loaded.device
191
+ }
192
+ print(f" Loaded in {elapsed:.2f}s, Memory: {end_mem - start_mem:.1f}MB")
193
+ else:
194
+ result_data = {
195
+ 'model': model_id,
196
+ 'error': 'Failed to load'
197
+ }
198
+ print(f" Failed to load")
199
+
200
+ except Exception as e:
201
+ result_data = {
202
+ 'model': model_id,
203
+ 'error': str(e)
204
+ }
205
+ print(f" Error: {str(e)}")
206
+
207
+ results['results'].append(result_data)
208
+
209
+ self.results['benchmarks'].append(results)
210
+ return results
211
+
212
+ def benchmark_video_processing(self,
213
+ duration: int = 5,
214
+ fps: int = 30,
215
+ size: tuple = (1280, 720)) -> Dict[str, Any]:
216
+ """Benchmark video processing performance."""
217
+ print("\n=== Video Processing Benchmark ===")
218
+
219
+ results = {
220
+ 'test': 'video_processing',
221
+ 'video_specs': {
222
+ 'duration': duration,
223
+ 'fps': fps,
224
+ 'size': f"{size[0]}x{size[1]}",
225
+ 'total_frames': duration * fps
226
+ },
227
+ 'results': []
228
+ }
229
+
230
+ # Create test video
231
+ import tempfile
232
+ video_path = Path(tempfile.mkdtemp()) / "test_video.mp4"
233
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
234
+ out = cv2.VideoWriter(str(video_path), fourcc, fps, size)
235
+
236
+ print(f"Creating test video: {duration}s @ {fps}fps, {size[0]}x{size[1]}")
237
+ for i in range(duration * fps):
238
+ frame = np.random.randint(0, 255, (*size[::-1], 3), dtype=np.uint8)
239
+ # Add moving rectangle for motion
240
+ x = int((i / (duration * fps)) * size[0])
241
+ cv2.rectangle(frame, (x, 100), (x + 100, 200), (0, 255, 0), -1)
242
+ out.write(frame)
243
+ out.release()
244
+
245
+ # Test different quality settings
246
+ for quality in ['low', 'medium', 'high']:
247
+ print(f"Processing at {quality} quality...")
248
+
249
+ from api import VideoProcessorAPI
250
+ processor = VideoProcessorAPI()
251
+
252
+ start_time = time.time()
253
+ start_mem = psutil.Process().memory_info().rss / (1024**2)
254
+
255
+ try:
256
+ output_path = video_path.parent / f"output_{quality}.mp4"
257
+ stats = processor.process_video(
258
+ str(video_path),
259
+ str(output_path),
260
+ background=None
261
+ )
262
+
263
+ elapsed = time.time() - start_time
264
+ end_mem = psutil.Process().memory_info().rss / (1024**2)
265
+
266
+ result_data = {
267
+ 'quality': quality,
268
+ 'total_time': elapsed,
269
+ 'frames_processed': stats.frames_processed,
270
+ 'processing_fps': stats.processing_fps,
271
+ 'time_per_frame': elapsed / stats.frames_processed if stats.frames_processed > 0 else 0,
272
+ 'memory_usage_mb': end_mem - start_mem
273
+ }
274
+
275
+ print(f" Processed in {elapsed:.2f}s @ {stats.processing_fps:.1f} FPS")
276
+
277
+ except Exception as e:
278
+ result_data = {
279
+ 'quality': quality,
280
+ 'error': str(e)
281
+ }
282
+ print(f" Failed: {str(e)}")
283
+
284
+ results['results'].append(result_data)
285
+
286
+ # Cleanup
287
+ video_path.unlink(missing_ok=True)
288
+
289
+ self.results['benchmarks'].append(results)
290
+ return results
291
+
292
+ def benchmark_batch_processing(self,
293
+ batch_sizes: List[int] = None,
294
+ num_workers_list: List[int] = None) -> Dict[str, Any]:
295
+ """Benchmark batch processing performance."""
296
+ print("\n=== Batch Processing Benchmark ===")
297
+
298
+ batch_sizes = batch_sizes or [1, 5, 10, 20]
299
+ num_workers_list = num_workers_list or [1, 2, 4, 8]
300
+
301
+ results = {
302
+ 'test': 'batch_processing',
303
+ 'results': []
304
+ }
305
+
306
+ # Create test images
307
+ test_images = []
308
+ for i in range(max(batch_sizes)):
309
+ img = np.random.randint(0, 255, (512, 512, 3), dtype=np.uint8)
310
+ test_images.append(img)
311
+
312
+ for batch_size in batch_sizes:
313
+ for num_workers in num_workers_list:
314
+ print(f"Testing batch_size={batch_size}, workers={num_workers}...")
315
+
316
+ config = PipelineConfig(
317
+ batch_size=batch_size,
318
+ num_workers=num_workers,
319
+ use_gpu=torch.cuda.is_available(),
320
+ enable_cache=False
321
+ )
322
+
323
+ try:
324
+ pipeline = ProcessingPipeline(config)
325
+
326
+ start_time = time.time()
327
+ results_batch = pipeline.process_batch(test_images[:batch_size])
328
+ elapsed = time.time() - start_time
329
+
330
+ successful = sum(1 for r in results_batch if r.success)
331
+
332
+ result_data = {
333
+ 'batch_size': batch_size,
334
+ 'num_workers': num_workers,
335
+ 'total_time': elapsed,
336
+ 'time_per_image': elapsed / batch_size,
337
+ 'throughput': batch_size / elapsed,
338
+ 'successful': successful
339
+ }
340
+
341
+ print(f" {elapsed:.2f}s total, {result_data['throughput']:.1f} images/sec")
342
+
343
+ except Exception as e:
344
+ result_data = {
345
+ 'batch_size': batch_size,
346
+ 'num_workers': num_workers,
347
+ 'error': str(e)
348
+ }
349
+ print(f" Failed: {str(e)}")
350
+
351
+ results['results'].append(result_data)
352
+
353
+ self.results['benchmarks'].append(results)
354
+ return results
355
+
356
+ def save_results(self):
357
+ """Save benchmark results to file."""
358
+ with open(self.output_file, 'w') as f:
359
+ json.dump(self.results, f, indent=2)
360
+ print(f"\nResults saved to: {self.output_file}")
361
+
362
+ def print_summary(self):
363
+ """Print benchmark summary."""
364
+ print("\n" + "="*50)
365
+ print("BENCHMARK SUMMARY")
366
+ print("="*50)
367
+
368
+ for benchmark in self.results['benchmarks']:
369
+ print(f"\n{benchmark['test'].upper()}:")
370
+
371
+ if 'results' in benchmark:
372
+ for result in benchmark['results']:
373
+ if 'error' not in result:
374
+ if benchmark['test'] == 'image_processing':
375
+ print(f" {result['size']} @ {result['quality']}: {result['fps']:.1f} FPS")
376
+ elif benchmark['test'] == 'model_loading':
377
+ print(f" {result['model']}: {result['load_time']:.2f}s")
378
+ elif benchmark['test'] == 'video_processing':
379
+ print(f" {result['quality']}: {result['processing_fps']:.1f} FPS")
380
+ elif benchmark['test'] == 'batch_processing':
381
+ print(f" Batch {result['batch_size']} x {result['num_workers']} workers: {result['throughput']:.1f} img/s")
382
+
383
+
384
+ def main():
385
+ """Main benchmark function."""
386
+ parser = argparse.ArgumentParser(description='BackgroundFX Pro Performance Benchmark')
387
+ parser.add_argument('--tests', nargs='+',
388
+ choices=['image', 'model', 'video', 'batch', 'all'],
389
+ default=['all'],
390
+ help='Tests to run')
391
+ parser.add_argument('--output', '-o', help='Output file for results')
392
+ parser.add_argument('--iterations', '-i', type=int, default=5,
393
+ help='Number of iterations for each test')
394
+
395
+ args = parser.parse_args()
396
+
397
+ benchmarker = Benchmarker(args.output)
398
+
399
+ tests_to_run = args.tests
400
+ if 'all' in tests_to_run:
401
+ tests_to_run = ['image', 'model', 'video', 'batch']
402
+
403
+ print("BackgroundFX Pro Performance Benchmark")
404
+ print("="*50)
405
+ print("System Information:")
406
+ print(f" CPU: {benchmarker.results['system_info']['cpu']['model']}")
407
+ print(f" Memory: {benchmarker.results['system_info']['memory']['total_gb']:.1f}GB")
408
+ if benchmarker.results['system_info']['cuda_available']:
409
+ print(f" GPU: {benchmarker.results['system_info']['gpu']['name']}")
410
+ else:
411
+ print(" GPU: Not available")
412
+
413
+ # Run selected benchmarks
414
+ if 'image' in tests_to_run:
415
+ benchmarker.benchmark_image_processing(num_iterations=args.iterations)
416
+
417
+ if 'model' in tests_to_run:
418
+ benchmarker.benchmark_model_loading()
419
+
420
+ if 'video' in tests_to_run:
421
+ benchmarker.benchmark_video_processing()
422
+
423
+ if 'batch' in tests_to_run:
424
+ benchmarker.benchmark_batch_processing()
425
+
426
+ # Save and display results
427
+ benchmarker.save_results()
428
+ benchmarker.print_summary()
429
+
430
+
431
+ if __name__ == "__main__":
432
+ main()