meccatronis commited on
Commit
6d6f4dd
·
verified ·
1 Parent(s): 0b35532

Upload benchmark.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. benchmark.py +151 -0
benchmark.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ import matplotlib.pyplot as plt
4
+ import matplotlib.animation as animation
5
+ from datetime import datetime
6
+ import subprocess
7
+ import time
8
+ import psutil
9
+ import re
10
+ from collections import deque
11
+ import threading
12
+
13
+ class GPUBenchmark:
14
+ def __init__(self):
15
+ self.max_temp = 85
16
+ self.temperatures = deque(maxlen=100)
17
+ self.tflops_history = deque(maxlen=100)
18
+ self.times = deque(maxlen=100)
19
+ self.peak_tflops = 0
20
+ self.running = True
21
+ self.stress_size = 8192
22
+ self.fig, (self.ax1, self.ax2) = plt.subplots(2, 1, figsize=(12, 8))
23
+ self.fig.suptitle('Benchmark TFLOPS - Radeon Pro VII', fontsize=16)
24
+
25
+ def get_gpu_temp(self):
26
+ try:
27
+ result = subprocess.run(['sensors'], capture_output=True, text=True, timeout=1)
28
+ for line in result.stdout.split('\n'):
29
+ if 'edge:' in line.lower():
30
+ match = re.search(r'([+-]?\d+\.?\d*)\s*°C', line)
31
+ if match:
32
+ return float(match.group(1))
33
+ except:
34
+ return 0
35
+ return 0
36
+
37
+ def check_system_responsiveness(self):
38
+ try:
39
+ start = time.time()
40
+ _ = psutil.cpu_percent(interval=0.1)
41
+ return (time.time() - start) < 0.5
42
+ except:
43
+ return False
44
+
45
+ def calculate_tflops(self, matrix_size, elapsed_time):
46
+ operations = 2 * (matrix_size ** 3)
47
+ return (operations / elapsed_time) / 1e12
48
+
49
+ def stress_gpu(self):
50
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
51
+ if device.type == 'cpu':
52
+ print("ERRO: GPU não detectada!")
53
+ self.running = False
54
+ return
55
+
56
+ print(f"GPU detectada: {torch.cuda.get_device_name(0)}")
57
+ print("Iniciando benchmark...\n")
58
+
59
+ while self.running:
60
+ temp = self.get_gpu_temp()
61
+ if temp >= self.max_temp:
62
+ print(f"\n⚠ TEMPERATURA LIMITE: {temp}°C")
63
+ self.running = False
64
+ break
65
+
66
+ if not self.check_system_responsiveness():
67
+ print("\n⚠ SISTEMA TRAVANDO")
68
+ self.running = False
69
+ break
70
+
71
+ try:
72
+ torch.cuda.synchronize()
73
+ start = time.time()
74
+ a = torch.randn(self.stress_size, self.stress_size, device=device)
75
+ b = torch.randn(self.stress_size, self.stress_size, device=device)
76
+ c = torch.mm(a, b)
77
+ torch.cuda.synchronize()
78
+ elapsed = time.time() - start
79
+
80
+ tflops = self.calculate_tflops(self.stress_size, elapsed)
81
+ self.temperatures.append(temp)
82
+ self.tflops_history.append(tflops)
83
+ self.times.append(datetime.now())
84
+
85
+ if tflops > self.peak_tflops:
86
+ self.peak_tflops = tflops
87
+
88
+ print(f"TFLOPS: {tflops:.2f} | Temp: {temp}°C | Peak: {self.peak_tflops:.2f}", end='\r')
89
+
90
+ if temp < 75 and tflops < self.peak_tflops * 0.9:
91
+ self.stress_size = min(self.stress_size + 256, 16384)
92
+ elif temp > 80:
93
+ self.stress_size = max(self.stress_size - 256, 4096)
94
+
95
+ time.sleep(0.1)
96
+ except Exception as e:
97
+ print(f"\n⚠ ERRO: {e}")
98
+ self.running = False
99
+ break
100
+
101
+ def update_plot(self, frame):
102
+ if not self.running and len(self.tflops_history) == 0:
103
+ return
104
+
105
+ self.ax1.clear()
106
+ self.ax2.clear()
107
+
108
+ if len(self.tflops_history) > 0:
109
+ self.ax1.plot(list(self.tflops_history), 'b-', linewidth=2, label='TFLOPS atual')
110
+ self.ax1.axhline(y=self.peak_tflops, color='g', linestyle='--',
111
+ label=f'Peak: {self.peak_tflops:.2f} TFLOPS')
112
+ self.ax1.set_ylabel('TFLOPS', fontsize=12)
113
+ self.ax1.set_title('Desempenho em Tempo Real')
114
+ self.ax1.legend()
115
+ self.ax1.grid(True, alpha=0.3)
116
+
117
+ if len(self.temperatures) > 0:
118
+ self.ax2.plot(list(self.temperatures), 'r-', linewidth=2, label='Temperatura')
119
+ self.ax2.axhline(y=self.max_temp, color='orange', linestyle='--',
120
+ label=f'Limite: {self.max_temp}°C')
121
+ self.ax2.set_ylabel('Temperatura (°C)', fontsize=12)
122
+ self.ax2.set_xlabel('Amostras', fontsize=12)
123
+ self.ax2.legend()
124
+ self.ax2.grid(True, alpha=0.3)
125
+
126
+ if not self.running:
127
+ self.ax1.text(0.5, 0.5, f'PEAK TFLOPS: {self.peak_tflops:.2f}',
128
+ transform=self.ax1.transAxes, fontsize=20,
129
+ ha='center', color='green', weight='bold')
130
+
131
+ def run(self):
132
+ stress_thread = threading.Thread(target=self.stress_gpu)
133
+ stress_thread.daemon = True
134
+ stress_thread.start()
135
+
136
+ ani = animation.FuncAnimation(self.fig, self.update_plot,
137
+ interval=500, cache_frame_data=False)
138
+ plt.tight_layout()
139
+ plt.show()
140
+ stress_thread.join(timeout=2)
141
+
142
+ print(f"\n\n{'='*50}")
143
+ print(f"RESULTADO FINAL")
144
+ print(f"{'='*50}")
145
+ print(f"🏆 PEAK TFLOPS: {self.peak_tflops:.2f}")
146
+ print(f"🌡️ Temp máxima: {max(self.temperatures) if self.temperatures else 0:.1f}°C")
147
+ print(f"{'='*50}\n")
148
+
149
+ if __name__ == "__main__":
150
+ benchmark = GPUBenchmark()
151
+ benchmark.run()