yangtb24 commited on
Commit
9a21337
·
verified ·
1 Parent(s): dcf4e1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -163
app.py CHANGED
@@ -1,9 +1,8 @@
1
  from flask import Flask, render_template_string
2
  import requests
3
- import json
4
- from threading import Thread, Lock
5
  import time
6
- from datetime import datetime, timedelta
7
 
8
  app = Flask(__name__)
9
 
@@ -227,25 +226,20 @@ htmlTemplate = f"""
227
 
228
  <script>
229
  const username = 'yangtb24';
230
- let serversData = {{ servers_data|tojson }}; // 从 Flask 传入初始数据
231
 
232
- function updateServerCard(data, spaceId) {{
233
- const serverId = data.replica;
234
- const serverElement = document.getElementById(`server-${serverId}`);
235
- const owner = username; // Simplified, since we know the owner
236
-
237
- if (!serverElement) {{
238
  const card = document.createElement('div');
239
  card.id = `server-${serverId}`;
240
  card.className = 'server-card';
241
  card.innerHTML = `
242
  <div class="server-header">
243
  <div class="server-name">
244
- <div class="status-dot status-online"></div>
245
  <svg class="server-flag" width="20" height="20" viewBox="0 0 24 24" fill="currentColor">
246
  <path d="M21 3H3C1.9 3 1 3.9 1 5v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zm-1 5H4V6h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2z"/>
247
  </svg>
248
- <div>${{serverId}} (${{owner}}/${{spaceId}})</div>
249
  </div>
250
  </div>
251
  <div class="metric-grid">
@@ -274,6 +268,15 @@ htmlTemplate = f"""
274
  </div>
275
  `;
276
  document.getElementById('servers').appendChild(card);
 
 
 
 
 
 
 
 
 
277
  }}
278
 
279
  const card = document.getElementById(`server-${serverId}`);
@@ -290,182 +293,200 @@ htmlTemplate = f"""
290
 
291
  card.querySelector('.upload').textContent = `${formatBytes(uploadBps)}/s`;
292
  card.querySelector('.download').textContent = `${formatBytes(downloadBps)}/s`;
 
293
  }}
294
-
295
- function updateSummary() {
296
- let online = 0;
297
- let offline = 0;
298
- let totalUpload = 0;
299
- let totalDownload = 0;
300
- let totalServers = 0;
301
-
302
- for (const spaceId in serversData) {
303
- for (const replicaId in serversData[spaceId]) {
304
- totalServers++;
305
- const server = serversData[spaceId][replicaId];
306
- const isOnline = server.isOnline;
307
- const serverCard = document.getElementById(`server-${replicaId}`);
308
-
309
-
310
- if (serverCard) {
311
- const statusDot = serverCard.querySelector('.status-dot');
312
- statusDot.className = `status-dot status-${isOnline ? 'online' : 'offline'}`;
313
- }
314
-
315
-
316
- if (isOnline) {
317
- online++;
318
- totalUpload += server.tx_bps;
319
- totalDownload += server.rx_bps;
320
-
321
- } else {
322
- offline++;
323
- }
324
- }
325
- }
326
 
327
- document.getElementById('totalServers').textContent = totalServers;
328
- document.getElementById('onlineServers').textContent = online;
329
- document.getElementById('offlineServers').textContent = offline;
330
- document.getElementById('totalUpload').textContent = `${formatBytes(totalUpload)}/s`;
331
- document.getElementById('totalDownload').textContent = `${formatBytes(totalDownload)}/s`;
332
- }
333
 
334
- function formatBytes(bytes) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  if (bytes === 0) return '0 B';
336
  const k = 1024;
337
  const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
338
  const i = Math.floor(Math.log(bytes) / Math.log(k));
339
  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
340
- }
341
-
342
- //初始更新
343
- for (const spaceId in serversData) {{
344
- for (const replicaId in serversData[spaceId]) {{
345
- const server = serversData[spaceId][replicaId];
346
- updateServerCard(server, spaceId);
347
- }}
 
 
 
348
  }}
349
  updateSummary();
350
 
351
- // 定时更新
352
- setInterval(() => {{
353
- fetch('/update_data')
354
- .then(response => response.json())
355
- .then(data => {{
356
- serversData = data.servers_data;
357
- for (const spaceId in serversData) {{
358
- for (const replicaId in serversData[spaceId]) {{
359
- const server = serversData[spaceId][replicaId];
360
- updateServerCard(server, spaceId);
361
- }}
362
- }}
363
- updateSummary();
364
- }});
365
- }}, 2000);
366
  </script>
367
  </body>
368
  </html>
369
  """
370
 
371
 
372
- USERNAME = 'yangtb24'
373
- servers_data = {} # {space_id: {replica_id: {metrics}, ...}, ...}
374
- data_lock = Lock()
375
- last_fetch_time = {} # 记录每个space的上次获取时间
376
- FETCH_INTERVAL = 5 # 5秒获取间隔
377
- MAX_OFFLINE_TIME = 10 # 10秒判定为离线
378
-
379
-
380
- def fetch_instances(username):
381
- try:
382
- response = requests.get(f"https://huggingface.co/api/spaces?author={username}")
383
- response.raise_for_status() # 检查请求是否成功
384
- user_instances = response.json()
385
- return [{"id": instance["id"].split('/')[1], "owner": username} for instance in user_instances]
386
- except requests.RequestException as e:
387
- print(f"获取实例列表失败:{e}")
388
- return []
389
-
390
- def fetch_metrics(username, space_id):
391
- try:
392
- response = requests.get(f"https://api.hf.space/v1/{username}/{space_id}/live-metrics/sse", stream=True)
393
- response.raise_for_status()
394
-
395
- for line in response.iter_lines():
396
- if line:
397
- decoded_line = line.decode('utf-8')
398
- if decoded_line.startswith("event: metric"):
399
- try:
400
- data_str = decoded_line.split("data: ", 1)[1]
401
- data = json.loads(data_str)
402
- with data_lock:
403
- if space_id not in servers_data:
404
- servers_data[space_id] = {}
405
- data['isOnline'] = True #初始都设置为在线
406
- servers_data[space_id][data['replica']] = data
407
- last_fetch_time[space_id] = datetime.now()
408
-
409
- except json.JSONDecodeError as e:
410
- print(f"解析数据失败 ({space_id}): {e}")
411
-
412
- except requests.RequestException as e:
413
- print(f"连接失败 ({username}/{space_id}): {e}")
414
- with data_lock:
415
- # 如果请求失败,设置isOnline为False
416
- if space_id in servers_data:
417
- for replica_id in servers_data[space_id]:
418
- servers_data[space_id][replica_id]['isOnline'] = False
419
-
420
- def update_metrics():
421
- """定期更新所有实例的指标"""
422
- while True:
423
- instances = fetch_instances(USERNAME)
424
- threads = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  for instance in instances:
426
- thread = Thread(target=fetch_metrics, args=(instance['owner'], instance['id']))
427
- threads.append(thread)
428
- thread.start()
429
-
430
- for thread in threads:
431
- thread.join() # 等待所有线程完成,防止线程过多
432
 
433
- time.sleep(300) # 每 5 分钟刷新一次实例列表
 
 
 
 
434
 
 
 
 
435
 
436
- # 启动指标更新线程
437
- update_thread = Thread(target=update_metrics)
438
- update_thread.daemon = True # 设置为守护线程,主程序退出时自动退出
439
- update_thread.start()
440
 
441
- def check_online_status():
442
- """定期检查服务器在线状态"""
443
- while True:
444
- now = datetime.now()
445
- with data_lock:
446
- for space_id in list(last_fetch_time.keys()): # 使用list防止迭代时字典大小改变
447
- if now - last_fetch_time[space_id] > timedelta(seconds=MAX_OFFLINE_TIME):
448
- if space_id in servers_data:
449
- for replica_id in servers_data[space_id]:
450
- servers_data[space_id][replica_id]['isOnline'] = False
451
- time.sleep(2) #每2秒检查一次
452
 
 
 
 
 
453
 
454
- # 启动在线状态检查线程
455
- check_status_thread = Thread(target=check_online_status)
456
- check_status_thread.daemon = True
457
- check_status_thread.start()
458
 
 
 
 
 
 
459
 
460
- @app.route('/')
461
- def index():
462
- with data_lock:
463
- return render_template_string(htmlTemplate, servers_data=servers_data)
464
 
465
- @app.route('/update_data')
466
- def update_data():
467
- with data_lock:
468
- return {'servers_data': servers_data}
469
 
470
- if __name__ == '__main__':
471
- app.run(debug=True, host="0.0.0.0", port=7860) # HF Spaces 默认端口是 7860
 
1
  from flask import Flask, render_template_string
2
  import requests
3
+ import threading
 
4
  import time
5
+ from concurrent.futures import ThreadPoolExecutor
6
 
7
  app = Flask(__name__)
8
 
 
226
 
227
  <script>
228
  const username = 'yangtb24';
229
+ const serversData = {{ servers_data|tojson }};
230
 
231
+ function createServerCard(serverId, spaceId, owner) {{
 
 
 
 
 
232
  const card = document.createElement('div');
233
  card.id = `server-${serverId}`;
234
  card.className = 'server-card';
235
  card.innerHTML = `
236
  <div class="server-header">
237
  <div class="server-name">
238
+ <div class="status-dot status-offline"></div>
239
  <svg class="server-flag" width="20" height="20" viewBox="0 0 24 24" fill="currentColor">
240
  <path d="M21 3H3C1.9 3 1 3.9 1 5v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zm-1 5H4V6h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2z"/>
241
  </svg>
242
+ <div>${serverId} (${owner}/${spaceId})</div>
243
  </div>
244
  </div>
245
  <div class="metric-grid">
 
268
  </div>
269
  `;
270
  document.getElementById('servers').appendChild(card);
271
+ }}
272
+
273
+ function updateServerCard(data, spaceId) {{
274
+ const serverId = data.replica;
275
+ const serverElement = document.getElementById(`server-${serverId}`);
276
+ const owner = data.owner
277
+
278
+ if (!serverElement) {{
279
+ createServerCard(serverId, spaceId, owner);
280
  }}
281
 
282
  const card = document.getElementById(`server-${serverId}`);
 
293
 
294
  card.querySelector('.upload').textContent = `${formatBytes(uploadBps)}/s`;
295
  card.querySelector('.download').textContent = `${formatBytes(downloadBps)}/s`;
296
+ updateSummary(serverId, data);
297
  }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
 
 
 
 
 
 
299
 
300
+ function updateSummary(serverId, data) {{
301
+
302
+ const now = Date.now();
303
+ let online = 0;
304
+ let offline = 0;
305
+ let totalUpload = 0;
306
+ let totalDownload = 0;
307
+
308
+ for (const serverId in serversData) {{
309
+ const server = serversData[serverId];
310
+ const isOnline = server.isOnline;
311
+ const serverCard = document.getElementById(`server-${serverId}`);
312
+
313
+ if (serverCard) {{
314
+ const statusDot = serverCard.querySelector('.status-dot');
315
+ statusDot.className = `status-dot status-${isOnline ? 'online' : 'offline'}`;
316
+
317
+ if (isOnline && server.data) {{
318
+ const uploadText = formatBytes(server.data.tx_bps);
319
+ const downloadText = formatBytes(server.data.rx_bps);
320
+ totalUpload += server.data.tx_bps;
321
+ totalDownload += server.data.rx_bps;
322
+ }}
323
+ }}
324
+ isOnline ? online++ : offline++;
325
+ }}
326
+ document.getElementById('totalServers').textContent = Object.keys(serversData).length;
327
+ document.getElementById('onlineServers').textContent = online;
328
+ document.getElementById('offlineServers').textContent = offline;
329
+ document.getElementById('totalUpload').textContent = `${formatBytes(totalUpload)}/s`;
330
+ document.getElementById('totalDownload').textContent = `${formatBytes(totalDownload)}/s`;
331
+ }}
332
+
333
+
334
+
335
+ function formatBytes(bytes) {{
336
  if (bytes === 0) return '0 B';
337
  const k = 1024;
338
  const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
339
  const i = Math.floor(Math.log(bytes) / Math.log(k));
340
  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
341
+ }}
342
+
343
+
344
+ // Initial render based on initial data
345
+ for (const serverId in serversData) {{
346
+ const server = serversData[serverId];
347
+ if (server.data) {
348
+ updateServerCard(server.data, server.spaceId);
349
+ } else {
350
+ createServerCard(serverId, server.spaceId, server.owner)
351
+ }
352
  }}
353
  updateSummary();
354
 
355
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  </script>
357
  </body>
358
  </html>
359
  """
360
 
361
 
362
+ class MetricsManager:
363
+ def __init__(self, username):
364
+ self.username = username
365
+ self.executor = ThreadPoolExecutor(max_workers=10) # Adjust as needed
366
+ self.servers_data = {}
367
+ self.lock = threading.Lock() # Protect shared data
368
+ self.stop_event = threading.Event()
369
+
370
+
371
+ def fetch_instances(self):
372
+ try:
373
+ response = requests.get(f"https://huggingface.co/api/spaces?author={self.username}")
374
+ response.raise_for_status() # Raise an exception for bad status codes
375
+ user_instances = response.json()
376
+ return [
377
+ {"id": instance["id"].split("/")[1], "owner": self.username}
378
+ for instance in user_instances
379
+ ]
380
+ except requests.RequestException as e:
381
+ print(f"Error fetching instances: {e}")
382
+ return []
383
+
384
+ def connect(self, instance_id, username):
385
+ if self.stop_event.is_set():
386
+ return
387
+ url = f"https://api.hf.space/v1/{username}/{instance_id}/live-metrics/sse"
388
+ try:
389
+ # Use stream=True for streaming response
390
+ with requests.get(url, stream=True, timeout=(5, 60)) as response: # Connect timeout, read timeout
391
+ response.raise_for_status()
392
+
393
+ if response.encoding is None:
394
+ response.encoding = 'utf-8'
395
+
396
+ lines_iter = response.iter_lines(decode_unicode=True)
397
+
398
+ # Skip initial lines until we find the first "event: metric"
399
+ for line in lines_iter:
400
+ if line.strip() == "event: metric":
401
+ break
402
+
403
+ # Now process the actual metric data
404
+ for line in lines_iter:
405
+ if self.stop_event.is_set():
406
+ break
407
+ if line.strip().startswith("data:"):
408
+ try:
409
+ data_str = line.strip()[5:] # Remove "data:" prefix
410
+ data = eval(data_str) # Use eval (safer than direct JSON for this case)
411
+
412
+ server_id = data['replica']
413
+ with self.lock:
414
+ if server_id not in self.servers_data:
415
+ self.servers_data[server_id] = {
416
+ 'spaceId': instance_id,
417
+ 'owner': username,
418
+ 'data': None,
419
+ 'last_seen': 0,
420
+ 'isOnline': False,
421
+ }
422
+
423
+ self.servers_data[server_id]['data'] = data
424
+ self.servers_data[server_id]['last_seen'] = time.time()
425
+ self.servers_data[server_id]['isOnline'] = True
426
+
427
+ except Exception as e:
428
+ print(f"Error parsing data for {instance_id}: {e}")
429
+ print(f" Problematic line: {line.strip()}") # Debugging
430
+
431
+ except requests.exceptions.RequestException as e:
432
+ print(f"Connection failed ({username}/{instance_id}): {e}")
433
+ # Mark all replicas of this space as offline
434
+ with self.lock:
435
+ for server_id, server_info in list(self.servers_data.items()): # Iterate on a copy
436
+ if server_info['spaceId'] == instance_id:
437
+ server_info['isOnline'] = False
438
+ # Optionally remove after a longer timeout:
439
+ # if time.time() - server_info['last_seen'] > 30: # 30 seconds
440
+ # del self.servers_data[server_id]
441
+
442
+
443
+ def check_timeouts(self):
444
+ while not self.stop_event.is_set():
445
+ with self.lock:
446
+ now = time.time()
447
+ for server_id, server_info in list(self.servers_data.items()): # Iterate on a copy
448
+ if now - server_info['last_seen'] > 10:
449
+ server_info['isOnline'] = False
450
+ # Optionally remove after a longer timeout:
451
+ # if now - server_info['last_seen'] > 30: # 30 seconds
452
+ # del self.servers_data[server_id]
453
+ time.sleep(2)
454
+
455
+ def start_monitoring(self):
456
+ instances = self.fetch_instances()
457
  for instance in instances:
458
+ self.executor.submit(self.connect, instance['id'], instance['owner'])
459
+ self.timeout_thread = threading.Thread(target=self.check_timeouts, daemon=True)
460
+ self.timeout_thread.start()
 
 
 
461
 
462
+ def stop_monitoring(self):
463
+ self.stop_event.set()
464
+ self.executor.shutdown(wait=False)
465
+ if hasattr(self, 'timeout_thread'):
466
+ self.timeout_thread.join()
467
 
468
+ def get_data(self):
469
+ with self.lock:
470
+ return self.servers_data.copy()
471
 
 
 
 
 
472
 
473
+ metrics_manager = MetricsManager(username='yangtb24')
 
 
 
 
 
 
 
 
 
 
474
 
475
+ @app.route("/")
476
+ def home():
477
+ servers_data = metrics_manager.get_data()
478
+ return render_template_string(htmlTemplate, servers_data=servers_data)
479
 
480
+ # Initialize and start monitoring *before* running the Flask app
481
+ metrics_manager.start_monitoring()
 
 
482
 
483
+ # Important: Use a context manager to cleanly shut down monitoring
484
+ # when the Flask app stops. This prevents orphaned threads.
485
+ @app.teardown_appcontext
486
+ def shutdown_session(exception=None):
487
+ metrics_manager.stop_monitoring()
488
 
 
 
 
 
489
 
490
+ if __name__ == "__main__":
491
+ app.run(debug=False, host="0.0.0.0", port=7860) # Use port 7860 for HF Spaces
 
 
492