yangtb24 commited on
Commit
dcf4e1e
·
verified ·
1 Parent(s): e13ea2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +204 -166
app.py CHANGED
@@ -1,8 +1,9 @@
1
  from flask import Flask, render_template_string
2
  import requests
3
- import threading
 
4
  import time
5
- from concurrent.futures import ThreadPoolExecutor
6
 
7
  app = Flask(__name__)
8
 
@@ -225,209 +226,246 @@ htmlTemplate = f"""
225
  <script src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js" integrity="sha512-yFjZbTYRCJodnuyGlsKamNE/LlEaEA/3uWCGാരി7eIq7jWqVl3J8jL/kof/tfu9Xqzh/y/VM5sJd/tq5iEew==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
226
 
227
  <script>
228
- const username = '{{ username }}';
229
- const serversData = {{ servers_data|tojson }};
230
 
231
  function updateServerCard(data, spaceId) {{
232
- const serverId = data.replica;
233
- const serverElement = document.getElementById(`server-${serverId}`);
234
- const owner = '{{ username }}';
235
-
236
- if (!serverElement) {{
237
- const card = document.createElement('div');
238
- card.id = `server-${serverId}`;
239
- card.className = 'server-card';
240
- card.innerHTML = `
241
- <div class="server-header">
242
- <div class="server-name">
243
- <div class="status-dot status-online"></div>
244
- <svg class="server-flag" width="20" height="20" viewBox="0 0 24 24" fill="currentColor">
245
- <path d="M21 3H3C1.9 3 1 3.9 1 5v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zm-1 5H4V6h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2z"/>
246
- </svg>
247
- <div>${serverId} (${owner}/${spaceId})</div>
 
 
 
 
 
 
 
248
  </div>
249
- </div>
250
- <div class="metric-grid">
251
- <div class="metric-item">
252
- <div class="metric-label">CPU</div>
253
- <div class="progress-bar-container">
254
- <div class="cpu-progress-bar"></div>
255
- </div>
256
- <div class="metric-value cpu-usage">0%</div>
257
  </div>
258
- <div class="metric-item">
259
- <div class="metric-label">内存</div>
260
- <div class="progress-bar-container">
261
- <div class="memory-progress-bar"></div>
262
- </div>
263
- <div class="metric-value memory-usage">0%</div>
264
- </div>
265
- <div class="metric-item">
266
- <div class="metric-label">上传</div>
267
- <div class="metric-value upload">0 KB/s</div>
268
- </div>
269
- <div class="metric-item">
270
- <div class="metric-label">下载</div>
271
- <div class="metric-value download">0 KB/s</div>
272
- </div>
273
- </div>
274
- `;
275
- document.getElementById('servers').appendChild(card);
276
- }}
277
-
278
- const card = document.getElementById(`server-${serverId}`);
279
- const cpuUsage = data.cpu_usage_pct;
280
- const memoryUsage = (data.memory_used_bytes / data.memory_total_bytes) * 100;
281
- const uploadBps = data.tx_bps;
282
- const downloadBps = data.rx_bps;
283
 
284
- card.querySelector('.cpu-usage').textContent = `${cpuUsage.toFixed(2)}%`;
285
- card.querySelector('.cpu-progress-bar').style.width = `${cpuUsage}%`;
 
 
 
286
 
287
- card.querySelector('.memory-usage').textContent = `${memoryUsage.toFixed(2)}%`;
288
- card.querySelector('.memory-progress-bar').style.width = `${memoryUsage}%`;
289
 
290
- card.querySelector('.upload').textContent = `${formatBytes(uploadBps)}/s`;
291
- card.querySelector('.download').textContent = `${formatBytes(downloadBps)}/s`;
292
 
293
- serversData[serverId] = {{ last_seen: Date.now() }}; // Update last seen
294
- updateSummary();
295
  }}
296
-
297
- function updateSummary() {{
298
- const now = Date.now();
299
  let online = 0;
300
  let offline = 0;
301
  let totalUpload = 0;
302
  let totalDownload = 0;
 
303
 
304
- for (const serverId in serversData) {{
305
- const lastSeen = serversData[serverId].last_seen;
306
- const isOnline = (now - lastSeen) < 10000;
307
- const serverCard = document.getElementById(`server-${serverId}`);
 
 
308
 
309
- if (serverCard) {{
310
- const statusDot = serverCard.querySelector('.status-dot');
311
- statusDot.className = `status-dot status-${{isOnline ? 'online' : 'offline'}}`;
312
 
313
- if (isOnline) {{
314
- const uploadText = serverCard.querySelector('.upload').textContent;
315
- const downloadText = serverCard.querySelector('.download').textContent;
316
- totalUpload += parseFloat(uploadText) || 0;
317
- totalDownload += parseFloat(downloadText) || 0;
318
- }}
319
- }}
320
- isOnline ? online++ : offline++;
321
- }}
322
 
323
- document.getElementById('totalServers').textContent = Object.keys(serversData).length;
324
- document.getElementById('onlineServers').textContent = online;
325
- document.getElementById('offlineServers').textContent = offline;
326
- document.getElementById('totalUpload').textContent = `${{formatBytes(totalUpload)}}/s`;
327
- document.getElementById('totalDownload').textContent = `${{formatBytes(totalDownload)}}/s`;
328
- }}
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
- function formatBytes(bytes) {{
332
  if (bytes === 0) return '0 B';
333
  const k = 1024;
334
  const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
335
  const i = Math.floor(Math.log(bytes) / Math.log(k));
336
  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
337
- }}
338
-
339
-
340
- // Initial rendering of server cards (from server-side data)
341
  for (const spaceId in serversData) {{
342
- for (const replicaId in serversData[spaceId].replicas) {{
343
- updateServerCard(serversData[spaceId].replicas[replicaId], spaceId);
344
- }}
345
- }}
346
- updateSummary(); // Initial summary update
347
-
348
-
349
- // Long-polling (simulated SSE with fetch)
350
- async function fetchMetrics() {{
351
- try {{
352
- const response = await fetch('/metrics');
353
- const updatedServersData = await response.json();
354
-
355
- for (const spaceId in updatedServersData) {{
356
- for(const replicaId in updatedServersData[spaceId].replicas) {
357
- updateServerCard(updatedServersData[spaceId].replicas[replicaId], spaceId);
358
- }
359
- }}
360
- updateSummary(); // Update summary after each fetch
361
- }} catch (error) {{
362
- console.error("Error fetching metrics:", error);
363
  }}
364
- setTimeout(fetchMetrics, 2000); // Poll every 2 seconds
365
  }}
366
-
367
- fetchMetrics(); // Start fetching metrics
368
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  </script>
370
  </body>
371
  </html>
372
  """
373
 
374
 
 
 
 
 
 
 
 
 
375
  def fetch_instances(username):
376
- """Fetches the list of instances for a given username."""
377
  try:
378
  response = requests.get(f"https://huggingface.co/api/spaces?author={username}")
379
- response.raise_for_status() # Raise an exception for bad status codes
380
  user_instances = response.json()
381
- return [{"id": instance["id"].split("/")[1], "owner": username} for instance in user_instances]
382
- except requests.exceptions.RequestException as e:
383
- print(f"Error fetching instances: {e}")
384
  return []
385
 
386
- def fetch_metrics_for_instance(username, instance_id):
387
- """Fetches metrics for a single instance using long-polling."""
388
  try:
389
- response = requests.get(f"https://api.hf.space/v1/{username}/{instance_id}/live-metrics", timeout=30) # Use a timeout
390
  response.raise_for_status()
391
- return response.json()
392
- except requests.exceptions.RequestException as e:
393
- print(f"Error fetching metrics for {username}/{instance_id}: {e}")
394
- return None
395
-
396
- def get_all_metrics(username, instances):
397
- """Fetches metrics for all instances concurrently."""
398
- all_servers_data = {}
399
- with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
400
- futures = {executor.submit(fetch_metrics_for_instance, username, instance['id']): instance for instance in instances}
401
- for future in futures:
402
- instance = futures[future]
403
- try:
404
- metrics = future.result()
405
- if metrics:
406
- all_servers_data[instance['id']] = {
407
- "last_seen": int(time.time() * 1000),
408
- "replicas": {replica["replica"]: replica for replica in metrics}
409
- }
410
-
411
- except Exception as e:
412
- print(f"Error processing metrics for {instance['id']}: {e}")
413
- return all_servers_data
414
-
415
- @app.route("/")
416
- def home():
417
- username = "yangtb24" # Replace with your desired username
418
- instances = fetch_instances(username)
419
- initial_servers_data = get_all_metrics(username, instances)
420
-
421
- return render_template_string(htmlTemplate, username=username, servers_data=initial_servers_data)
422
-
423
- @app.route("/metrics")
424
- def metrics():
425
- username = "yangtb24"
426
- instances = fetch_instances(username)
427
- updated_servers_data = get_all_metrics(username, instances)
428
- return updated_servers_data, 200, {'Content-Type': 'application/json'}
429
-
430
-
431
- if __name__ == "__main__":
432
- app.run(debug=True, host="0.0.0.0", port=7860) # Use port 7860 for HF Spaces
433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from flask import Flask, render_template_string
2
  import requests
3
+ import json
4
+ from threading import Thread, Lock
5
  import time
6
+ from datetime import datetime, timedelta
7
 
8
  app = Flask(__name__)
9
 
 
226
  <script src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js" integrity="sha512-yFjZbTYRCJodnuyGlsKamNE/LlEaEA/3uWCGാരി7eIq7jWqVl3J8jL/kof/tfu9Xqzh/y/VM5sJd/tq5iEew==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
227
 
228
  <script>
229
+ const username = 'yangtb24';
230
+ let serversData = {{ servers_data|tojson }}; // 从 Flask 传入初始数据
231
 
232
  function updateServerCard(data, spaceId) {{
233
+ const serverId = data.replica;
234
+ const serverElement = document.getElementById(`server-${serverId}`);
235
+ const owner = username; // Simplified, since we know the owner
236
+
237
+ if (!serverElement) {{
238
+ const card = document.createElement('div');
239
+ card.id = `server-${serverId}`;
240
+ card.className = 'server-card';
241
+ card.innerHTML = `
242
+ <div class="server-header">
243
+ <div class="server-name">
244
+ <div class="status-dot status-online"></div>
245
+ <svg class="server-flag" width="20" height="20" viewBox="0 0 24 24" fill="currentColor">
246
+ <path d="M21 3H3C1.9 3 1 3.9 1 5v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zm-1 5H4V6h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2z"/>
247
+ </svg>
248
+ <div>${{serverId}} (${{owner}}/${{spaceId}})</div>
249
+ </div>
250
+ </div>
251
+ <div class="metric-grid">
252
+ <div class="metric-item">
253
+ <div class="metric-label">CPU</div>
254
+ <div class="progress-bar-container">
255
+ <div class="cpu-progress-bar"></div>
256
  </div>
257
+ <div class="metric-value cpu-usage">0%</div>
258
+ </div>
259
+ <div class="metric-item">
260
+ <div class="metric-label">内存</div>
261
+ <div class="progress-bar-container">
262
+ <div class="memory-progress-bar"></div>
 
 
263
  </div>
264
+ <div class="metric-value memory-usage">0%</div>
265
+ </div>
266
+ <div class="metric-item">
267
+ <div class="metric-label">上传</div>
268
+ <div class="metric-value upload">0 KB/s</div>
269
+ </div>
270
+ <div class="metric-item">
271
+ <div class="metric-label">下载</div>
272
+ <div class="metric-value download">0 KB/s</div>
273
+ </div>
274
+ </div>
275
+ `;
276
+ document.getElementById('servers').appendChild(card);
277
+ }}
 
 
 
 
 
 
 
 
 
 
 
278
 
279
+ const card = document.getElementById(`server-${serverId}`);
280
+ const cpuUsage = data.cpu_usage_pct;
281
+ const memoryUsage = (data.memory_used_bytes / data.memory_total_bytes) * 100;
282
+ const uploadBps = data.tx_bps;
283
+ const downloadBps = data.rx_bps;
284
 
285
+ card.querySelector('.cpu-usage').textContent = `${cpuUsage.toFixed(2)}%`;
286
+ card.querySelector('.cpu-progress-bar').style.width = `${cpuUsage}%`;
287
 
288
+ card.querySelector('.memory-usage').textContent = `${memoryUsage.toFixed(2)}%`;
289
+ card.querySelector('.memory-progress-bar').style.width = `${memoryUsage}%`;
290
 
291
+ card.querySelector('.upload').textContent = `${formatBytes(uploadBps)}/s`;
292
+ card.querySelector('.download').textContent = `${formatBytes(downloadBps)}/s`;
293
  }}
294
+
295
+ function updateSummary() {
 
296
  let online = 0;
297
  let offline = 0;
298
  let totalUpload = 0;
299
  let totalDownload = 0;
300
+ let totalServers = 0;
301
 
302
+ for (const spaceId in serversData) {
303
+ for (const replicaId in serversData[spaceId]) {
304
+ totalServers++;
305
+ const server = serversData[spaceId][replicaId];
306
+ const isOnline = server.isOnline;
307
+ const serverCard = document.getElementById(`server-${replicaId}`);
308
 
 
 
 
309
 
310
+ if (serverCard) {
311
+ const statusDot = serverCard.querySelector('.status-dot');
312
+ statusDot.className = `status-dot status-${isOnline ? 'online' : 'offline'}`;
313
+ }
 
 
 
 
 
314
 
 
 
 
 
 
 
315
 
316
+ if (isOnline) {
317
+ online++;
318
+ totalUpload += server.tx_bps;
319
+ totalDownload += server.rx_bps;
320
+
321
+ } else {
322
+ offline++;
323
+ }
324
+ }
325
+ }
326
+
327
+ document.getElementById('totalServers').textContent = totalServers;
328
+ document.getElementById('onlineServers').textContent = online;
329
+ document.getElementById('offlineServers').textContent = offline;
330
+ document.getElementById('totalUpload').textContent = `${formatBytes(totalUpload)}/s`;
331
+ document.getElementById('totalDownload').textContent = `${formatBytes(totalDownload)}/s`;
332
+ }
333
 
334
+ function formatBytes(bytes) {
335
  if (bytes === 0) return '0 B';
336
  const k = 1024;
337
  const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
338
  const i = Math.floor(Math.log(bytes) / Math.log(k));
339
  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
340
+ }
341
+
342
+ //初始更新
 
343
  for (const spaceId in serversData) {{
344
+ for (const replicaId in serversData[spaceId]) {{
345
+ const server = serversData[spaceId][replicaId];
346
+ updateServerCard(server, spaceId);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  }}
 
348
  }}
349
+ updateSummary();
350
+
351
+ // 定时更新
352
+ setInterval(() => {{
353
+ fetch('/update_data')
354
+ .then(response => response.json())
355
+ .then(data => {{
356
+ serversData = data.servers_data;
357
+ for (const spaceId in serversData) {{
358
+ for (const replicaId in serversData[spaceId]) {{
359
+ const server = serversData[spaceId][replicaId];
360
+ updateServerCard(server, spaceId);
361
+ }}
362
+ }}
363
+ updateSummary();
364
+ }});
365
+ }}, 2000);
366
  </script>
367
  </body>
368
  </html>
369
  """
370
 
371
 
372
+ USERNAME = 'yangtb24'
373
+ servers_data = {} # {space_id: {replica_id: {metrics}, ...}, ...}
374
+ data_lock = Lock()
375
+ last_fetch_time = {} # 记录每个space的上次获取时间
376
+ FETCH_INTERVAL = 5 # 5秒获取间隔
377
+ MAX_OFFLINE_TIME = 10 # 10秒判定为离线
378
+
379
+
380
  def fetch_instances(username):
 
381
  try:
382
  response = requests.get(f"https://huggingface.co/api/spaces?author={username}")
383
+ response.raise_for_status() # 检查请求是否成功
384
  user_instances = response.json()
385
+ return [{"id": instance["id"].split('/')[1], "owner": username} for instance in user_instances]
386
+ except requests.RequestException as e:
387
+ print(f"获取实例列表失败:{e}")
388
  return []
389
 
390
+ def fetch_metrics(username, space_id):
 
391
  try:
392
+ response = requests.get(f"https://api.hf.space/v1/{username}/{space_id}/live-metrics/sse", stream=True)
393
  response.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
+ for line in response.iter_lines():
396
+ if line:
397
+ decoded_line = line.decode('utf-8')
398
+ if decoded_line.startswith("event: metric"):
399
+ try:
400
+ data_str = decoded_line.split("data: ", 1)[1]
401
+ data = json.loads(data_str)
402
+ with data_lock:
403
+ if space_id not in servers_data:
404
+ servers_data[space_id] = {}
405
+ data['isOnline'] = True #初始都设置为在线
406
+ servers_data[space_id][data['replica']] = data
407
+ last_fetch_time[space_id] = datetime.now()
408
+
409
+ except json.JSONDecodeError as e:
410
+ print(f"解析数据失败 ({space_id}): {e}")
411
+
412
+ except requests.RequestException as e:
413
+ print(f"连接失败 ({username}/{space_id}): {e}")
414
+ with data_lock:
415
+ # 如果请求失败,设置isOnline为False
416
+ if space_id in servers_data:
417
+ for replica_id in servers_data[space_id]:
418
+ servers_data[space_id][replica_id]['isOnline'] = False
419
+
420
+ def update_metrics():
421
+ """定期更新所有实例的指标"""
422
+ while True:
423
+ instances = fetch_instances(USERNAME)
424
+ threads = []
425
+ for instance in instances:
426
+ thread = Thread(target=fetch_metrics, args=(instance['owner'], instance['id']))
427
+ threads.append(thread)
428
+ thread.start()
429
+
430
+ for thread in threads:
431
+ thread.join() # 等待所有线程完成,防止线程过多
432
+
433
+ time.sleep(300) # 每 5 分钟刷新一次实例列表
434
+
435
+
436
+ # 启动指标更新线程
437
+ update_thread = Thread(target=update_metrics)
438
+ update_thread.daemon = True # 设置为守护线程,主程序退出时自动退出
439
+ update_thread.start()
440
+
441
+ def check_online_status():
442
+ """定期检查服务器在线状态"""
443
+ while True:
444
+ now = datetime.now()
445
+ with data_lock:
446
+ for space_id in list(last_fetch_time.keys()): # 使用list防止迭代时字典大小改变
447
+ if now - last_fetch_time[space_id] > timedelta(seconds=MAX_OFFLINE_TIME):
448
+ if space_id in servers_data:
449
+ for replica_id in servers_data[space_id]:
450
+ servers_data[space_id][replica_id]['isOnline'] = False
451
+ time.sleep(2) #每2秒检查一次
452
+
453
+
454
+ # 启动在线状态检查线程
455
+ check_status_thread = Thread(target=check_online_status)
456
+ check_status_thread.daemon = True
457
+ check_status_thread.start()
458
+
459
+
460
+ @app.route('/')
461
+ def index():
462
+ with data_lock:
463
+ return render_template_string(htmlTemplate, servers_data=servers_data)
464
+
465
+ @app.route('/update_data')
466
+ def update_data():
467
+ with data_lock:
468
+ return {'servers_data': servers_data}
469
+
470
+ if __name__ == '__main__':
471
+ app.run(debug=True, host="0.0.0.0", port=7860) # HF Spaces 默认端口是 7860