yangtb24 commited on
Commit
b2b93c9
·
verified ·
1 Parent(s): 9a61cfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -231
app.py CHANGED
@@ -1,9 +1,5 @@
1
  from flask import Flask, render_template_string
2
- import requests
3
- import json
4
- from threading import Thread, Lock
5
- import time
6
- from datetime import datetime, timedelta
7
 
8
  app = Flask(__name__)
9
 
@@ -205,6 +201,7 @@ htmlTemplate = f"""
205
  <meta charset="UTF-8">
206
  <title>HF Space Monitor</title>
207
  <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>🚀</text></svg>">
 
208
  <style>{lightModeStyle}</style>
209
  </head>
210
  <body>
@@ -223,111 +220,178 @@ htmlTemplate = f"""
223
  <!-- 服务器卡片将在这里动态生成 -->
224
  </div>
225
  </div>
226
- <script src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js" integrity="sha512-yFjZbTYRCJodnuyGlsKamNE/LlEaEA/3uWCGാരി7eIq7jWqVl3J8jL/kof/tfu9Xqzh/y/VM5sJd/tq5iEew==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
227
-
228
  <script>
229
- const serversData = {{ servers_data|tojson }};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  function updateServerCard(data, spaceId) {{
232
- const serverId = data.replica;
233
- const serverElement = document.getElementById(`server-${serverId}`);
234
- const owner = data.owner;
235
-
236
- if (!serverElement) {{
237
- const card = document.createElement('div');
238
- card.id = `server-${serverId}`;
239
- card.className = 'server-card';
240
- card.innerHTML = `
241
- <div class="server-header">
242
- <div class="server-name">
243
- <div class="status-dot status-online"></div>
244
- <svg class="server-flag" width="20" height="20" viewBox="0 0 24 24" fill="currentColor">
245
- <path d="M21 3H3C1.9 3 1 3.9 1 5v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zm-1 5H4V6h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2z"/>
246
- </svg>
247
- <div>${{serverId}} (${{owner}}/${{spaceId}})</div>
248
- </div>
249
- </div>
250
- <div class="metric-grid">
251
- <div class="metric-item">
252
- <div class="metric-label">CPU</div>
253
- <div class="progress-bar-container">
254
- <div class="cpu-progress-bar"></div>
255
- </div>
256
- <div class="metric-value cpu-usage">0%</div>
257
  </div>
258
- <div class="metric-item">
259
- <div class="metric-label">内存</div>
260
- <div class="progress-bar-container">
261
- <div class="memory-progress-bar"></div>
262
- </div>
263
- <div class="metric-value memory-usage">0%</div>
264
  </div>
265
- <div class="metric-item">
266
- <div class="metric-label">上传</div>
267
- <div class="metric-value upload">0 KB/s</div>
268
- </div>
269
- <div class="metric-item">
270
- <div class="metric-label">下载</div>
271
- <div class="metric-value download">0 KB/s</div>
272
- </div>
273
- </div>
274
- `;
275
- document.getElementById('servers').appendChild(card);
276
- }}
 
 
277
 
278
- const card = document.getElementById(`server-${serverId}`);
279
- const cpuUsage = data.cpu_usage_pct;
280
- const memoryUsage = (data.memory_used_bytes / data.memory_total_bytes) * 100;
281
- const uploadBps = data.tx_bps;
282
- const downloadBps = data.rx_bps;
283
 
284
- card.querySelector('.cpu-usage').textContent = `${cpuUsage.toFixed(2)}%`;
285
- card.querySelector('.cpu-progress-bar').style.width = `${cpuUsage}%`;
286
 
287
- card.querySelector('.memory-usage').textContent = `${memoryUsage.toFixed(2)}%`;
288
- card.querySelector('.memory-progress-bar').style.width = `${memoryUsage}%`;
289
 
290
- card.querySelector('.upload').textContent = `${formatBytes(uploadBps)}/s`;
291
- card.querySelector('.download').textContent = `${formatBytes(downloadBps)}/s`;
292
 
293
- updateSummary();
 
 
 
294
  }}
295
 
296
  function updateSummary() {{
297
- let online = 0;
298
- let offline = 0;
299
- let totalUpload = 0;
300
- let totalDownload = 0;
301
-
302
- for (const serverId in serversData) {{
303
- const serverData = serversData[serverId];
304
-
305
- if (!serverData) continue; // Skip if serverData is null
306
-
307
- const isOnline = serverData.status === 'online';
308
  const serverCard = document.getElementById(`server-${serverId}`);
309
-
310
  if (serverCard) {{
311
  const statusDot = serverCard.querySelector('.status-dot');
312
- statusDot.className = `status-dot status-${{isOnline ? 'online' : 'offline'}}`;
313
 
314
  if (isOnline) {{
315
- totalUpload += serverData.tx_bps;
316
- totalDownload += serverData.rx_bps;
 
 
317
  }}
318
  }}
319
  isOnline ? online++ : offline++;
320
- }}
321
-
322
 
323
- document.getElementById('totalServers').textContent = Object.keys(serversData).length;
324
  document.getElementById('onlineServers').textContent = online;
325
  document.getElementById('offlineServers').textContent = offline;
326
  document.getElementById('totalUpload').textContent = `${formatBytes(totalUpload)}/s`;
327
  document.getElementById('totalDownload').textContent = `${formatBytes(totalDownload)}/s`;
328
  }}
329
 
330
-
331
  function formatBytes(bytes) {{
332
  if (bytes === 0) return '0 B';
333
  const k = 1024;
@@ -336,159 +400,22 @@ htmlTemplate = f"""
336
  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
337
  }}
338
 
339
- // Initial update with existing data
340
- for (const spaceId in serversData) {{
341
- for(const replicaId in serversData[spaceId]){{
342
- updateServerCard(serversData[spaceId][replicaId], spaceId);
343
- }}
344
- }}
345
 
346
- // No need for setInterval here; data is updated via Flask
347
  </script>
348
  </body>
349
  </html>
350
  """
351
 
352
- class MetricsManager:
353
- def __init__(self, username):
354
- self.username = username
355
- self.servers_data = {} # Store all server data
356
- self.data_lock = Lock() # Lock for thread-safe updates
357
- self.last_fetch_time = {} # Track last fetch time for each instance
358
- self.instance_ids = set() # store instance ids
359
-
360
-
361
- def fetch_instances(self):
362
- """Fetches instances for the given username."""
363
- try:
364
- response = requests.get(f"https://huggingface.co/api/spaces?author={self.username}")
365
- response.raise_for_status() # Raise an exception for bad status codes
366
- user_instances = response.json()
367
- new_instance_ids = {instance['id'].split('/')[1] for instance in user_instances}
368
-
369
- # Check for removed instances
370
- for instance_id in list(self.instance_ids): # Iterate over a copy
371
- if instance_id not in new_instance_ids:
372
- self.remove_instance(instance_id)
373
- print(f"Instance removed: {instance_id}")
374
-
375
- self.instance_ids.update(new_instance_ids) # Update the set of instance IDs
376
- return [{'id': instance_id, 'owner': self.username} for instance_id in new_instance_ids]
377
-
378
- except requests.RequestException as e:
379
- print(f"Error fetching instances: {e}")
380
- return []
381
- except (KeyError, IndexError, json.JSONDecodeError) as e:
382
- print(f"Error parsing instance data: {e}")
383
- return []
384
-
385
- def remove_instance(self, instance_id):
386
- """Removes an instance and its associated data."""
387
- with self.data_lock:
388
- if instance_id in self.servers_data:
389
- del self.servers_data[instance_id]
390
- if instance_id in self.last_fetch_time:
391
- del self.last_fetch_time[instance_id]
392
- if instance_id in self.instance_ids:
393
- self.instance_ids.remove(instance_id)
394
-
395
- def fetch_metrics(self, instance_id, owner):
396
- """Fetches metrics for a single instance."""
397
- url = f"https://api.hf.space/v1/{owner}/{instance_id}/live-metrics/sse"
398
- try:
399
- response = requests.get(url, stream=True, timeout=10) # Timeout for connection
400
- response.raise_for_status()
401
-
402
- for line in response.iter_lines():
403
- if line:
404
- try:
405
- decoded_line = line.decode('utf-8')
406
- if decoded_line.startswith("event: metric"):
407
- data_part = decoded_line.split("data: ", 1)[1]
408
- data = json.loads(data_part)
409
- self.update_server_data(data, instance_id, owner)
410
- except (IndexError, json.JSONDecodeError) as e:
411
- print(f"Error parsing metric data for {instance_id}: {e}, Line: {line.decode('utf-8')}")
412
- continue # Continue to the next line
413
-
414
- except requests.exceptions.RequestException as e:
415
- print(f"Error fetching metrics for {instance_id}: {e}")
416
- self.mark_offline(instance_id) # Mark as offline
417
- except Exception as e:
418
- print(f"An unexpected error occurred for {instance_id}: {e}")
419
- self.mark_offline(instance_id)
420
-
421
-
422
-
423
- def update_server_data(self, data, space_id, owner):
424
- """Updates server data in a thread-safe manner."""
425
- with self.data_lock:
426
- replica_id = data['replica']
427
-
428
- # Check if the space_id exists in servers_data, if not, create it
429
- if space_id not in self.servers_data:
430
- self.servers_data[space_id] = {}
431
-
432
- # Now, update the data for the specific replica within that space_id
433
- if replica_id not in self.servers_data[space_id]:
434
- self.servers_data[space_id][replica_id] = {}
435
-
436
- self.servers_data[space_id][replica_id] = {
437
- 'replica': replica_id,
438
- 'owner': owner,
439
- 'cpu_usage_pct': data['cpu_usage_pct'],
440
- 'memory_used_bytes': data['memory_used_bytes'],
441
- 'memory_total_bytes': data['memory_total_bytes'],
442
- 'tx_bps': data['tx_bps'],
443
- 'rx_bps': data['rx_bps'],
444
- 'status': 'online' # Mark as online when data is received
445
- }
446
-
447
- self.last_fetch_time[space_id] = datetime.utcnow()
448
-
449
- def mark_offline(self, instance_id):
450
- """Marks an instance as offline."""
451
- with self.data_lock:
452
- if instance_id in self.servers_data:
453
- for replica_id in self.servers_data[instance_id]:
454
- if self.servers_data[instance_id][replica_id]: # Check if not None
455
- self.servers_data[instance_id][replica_id]['status'] = 'offline'
456
-
457
- def check_timeouts(self):
458
- """Checks for instances that haven't been updated recently."""
459
- now = datetime.utcnow()
460
- with self.data_lock:
461
- for instance_id in list(self.last_fetch_time.keys()): # Iterate on a copy
462
- if now - self.last_fetch_time.get(instance_id, datetime.min) > timedelta(seconds=10):
463
- self.mark_offline(instance_id)
464
-
465
- def run_fetch(self):
466
- """Fetches metrics for all instances in a loop."""
467
- while True:
468
- instances = self.fetch_instances()
469
- for instance in instances:
470
- self.fetch_metrics(instance['id'], instance['owner'])
471
- self.check_timeouts()
472
- time.sleep(2) # Fetch every 2 seconds
473
-
474
- def start(self):
475
- """Starts the data fetching in a separate thread."""
476
- thread = Thread(target=self.run_fetch)
477
- thread.daemon = True # Allow the program to exit even if the thread is running
478
- thread.start()
479
-
480
- # --- Flask App Setup ---
481
- username = 'yangtb24' # Replace with your Hugging Face username
482
- metrics_manager = MetricsManager(username)
483
- metrics_manager.start() # Start the data collection
484
-
485
- @app.route('/')
486
- def home():
487
- with metrics_manager.data_lock:
488
- # Make a copy to avoid modification during iteration
489
- servers_data_copy = metrics_manager.servers_data.copy()
490
- return render_template_string(htmlTemplate, servers_data=servers_data_copy)
491
-
492
- if __name__ == '__main__':
493
- app.run(debug=True, host='0.0.0.0', port=7860)
494
 
 
 
 
 
 
 
 
1
  from flask import Flask, render_template_string
2
+ import os
 
 
 
 
3
 
4
  app = Flask(__name__)
5
 
 
201
  <meta charset="UTF-8">
202
  <title>HF Space Monitor</title>
203
  <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>🚀</text></svg>">
204
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" integrity="sha512-9usAa10IRO0HhonpyAIVpjrylPvoDwiPUiKdWk5t3PyolY1cOd4DSE0Ga+ri4AuTroPR5aQvXU9xC6qOPnzFeg==" crossorigin="anonymous" referrerpolicy="no-referrer" />
205
  <style>{lightModeStyle}</style>
206
  </head>
207
  <body>
 
220
  <!-- 服务器卡片将在这里动态生成 -->
221
  </div>
222
  </div>
 
 
223
  <script>
224
+ const username = 'yangtb24';
225
+
226
+ async function fetchInstances() {
227
+ try {
228
+ const response = await fetch(`https://huggingface.co/api/spaces?author=${username}`);
229
+ const userInstances = await response.json();
230
+ return userInstances.map(instance => ({{
231
+ id: instance.id.split('/')[1],
232
+ owner: username
233
+ }}));
234
+ } catch (error) {
235
+ console.error("获取实例列表失败:", error);
236
+ return [];
237
+ }
238
+ }}
239
+
240
+ class MetricsManager {{
241
+ constructor() {{
242
+ this.eventSources = new Map();
243
+ this.servers = new Map();
244
+ this.instanceOwners = new Map();
245
+ this.spaceIds = new Map();
246
+ }}
247
+
248
+ async connect(instanceId, username) {{
249
+ if (this.eventSources.has(instanceId)) return;
250
+
251
+ try {{
252
+ const eventSource = new EventSource(
253
+ `https://api.hf.space/v1/${username}/${instanceId}/live-metrics/sse`
254
+ );
255
+
256
+ this.spaceIds.set(instanceId, instanceId);
257
+ this.instanceOwners.set(instanceId, username);
258
+
259
+ eventSource.addEventListener("metric", (event) => {{
260
+ try {{
261
+ const data = JSON.parse(event.data);
262
+ updateServerCard(data, instanceId);
263
+ }} catch (error) {{
264
+ console.error(`解析数据失败 (${instanceId}):`, error);
265
+ }}
266
+ }});
267
+
268
+ eventSource.onerror = (error) => {{
269
+ console.error(`EventSource 错误 (${instanceId}):`, error);
270
+ eventSource.close();
271
+ }};
272
+
273
+ this.eventSources.set(instanceId, eventSource);
274
+ }} catch (error) {{
275
+ console.error(`连接失败 (${username}/${instanceId}):`, error);
276
+ }}
277
+ }}
278
+
279
+ disconnectAll() {{
280
+ this.eventSources.forEach(es => es.close());
281
+ this.eventSources.clear();
282
+ }}
283
+ }}
284
+
285
+ const metricsManager = new MetricsManager();
286
+ const servers = new Map();
287
+
288
+ async function initialize() {{
289
+ const instances = await fetchInstances();
290
+ instances.forEach(instance => {{
291
+ metricsManager.connect(instance.id, instance.owner);
292
+ }});
293
+ }}
294
+
295
+ initialize();
296
 
297
  function updateServerCard(data, spaceId) {{
298
+ const serverId = data.replica;
299
+ const serverElement = document.getElementById(`server-${serverId}`);
300
+ const owner = metricsManager.instanceOwners.get(spaceId);
301
+
302
+ if (!serverElement) {{
303
+ const card = document.createElement('div');
304
+ card.id = `server-${serverId}`;
305
+ card.className = 'server-card';
306
+ card.innerHTML = `
307
+ <div class="server-header">
308
+ <div class="server-name">
309
+ <div class="status-dot status-online"></div>
310
+ <svg class="server-flag" width="20" height="20" viewBox="0 0 24 24" fill="currentColor">
311
+ <path d="M21 3H3C1.9 3 1 3.9 1 5v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zm-1 5H4V6h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2zm1 4H3c-1.1 0-2 .9-2 2v3c0 1.1.9 2 2 2h18c1.1 0 2-.9 2-2v-3c0-1.1-.9-2-2-2zm-1 5H4v-2h16v2z"/>
312
+ </svg>
313
+ <div>${serverId} (${owner}/${spaceId})</div>
314
+ </div>
315
+ </div>
316
+ <div class="metric-grid">
317
+ <div class="metric-item">
318
+ <div class="metric-label">CPU</div>
319
+ <div class="progress-bar-container">
320
+ <div class="cpu-progress-bar"></div>
 
 
321
  </div>
322
+ <div class="metric-value cpu-usage">0%</div>
323
+ </div>
324
+ <div class="metric-item">
325
+ <div class="metric-label">内存</div>
326
+ <div class="progress-bar-container">
327
+ <div class="memory-progress-bar"></div>
328
  </div>
329
+ <div class="metric-value memory-usage">0%</div>
330
+ </div>
331
+ <div class="metric-item">
332
+ <div class="metric-label">上传</div>
333
+ <div class="metric-value upload">0 KB/s</div>
334
+ </div>
335
+ <div class="metric-item">
336
+ <div class="metric-label">下载</div>
337
+ <div class="metric-value download">0 KB/s</div>
338
+ </div>
339
+ </div>
340
+ `;
341
+ document.getElementById('servers').appendChild(card);
342
+ }}
343
 
344
+ const card = document.getElementById(`server-${serverId}`);
345
+ const cpuUsage = data.cpu_usage_pct;
346
+ const memoryUsage = (data.memory_used_bytes / data.memory_total_bytes) * 100;
347
+ const uploadBps = data.tx_bps;
348
+ const downloadBps = data.rx_bps;
349
 
350
+ card.querySelector('.cpu-usage').textContent = `${cpuUsage.toFixed(2)}%`;
351
+ card.querySelector('.cpu-progress-bar').style.width = `${cpuUsage}%`;
352
 
353
+ card.querySelector('.memory-usage').textContent = `${memoryUsage.toFixed(2)}%`;
354
+ card.querySelector('.memory-progress-bar').style.width = `${memoryUsage}%`;
355
 
356
+ card.querySelector('.upload').textContent = `${formatBytes(uploadBps)}/s`;
 
357
 
358
+ card.querySelector('.download').textContent = `${formatBytes(downloadBps)}/s`;
359
+
360
+ servers.set(serverId, Date.now());
361
+ updateSummary();
362
  }}
363
 
364
  function updateSummary() {{
365
+ const now = Date.now();
366
+ let online = 0;
367
+ let offline = 0;
368
+ let totalUpload = 0;
369
+ let totalDownload = 0;
370
+
371
+ servers.forEach((lastSeen, serverId) => {{
372
+ const isOnline = (now - lastSeen) < 10000;
 
 
 
373
  const serverCard = document.getElementById(`server-${serverId}`);
 
374
  if (serverCard) {{
375
  const statusDot = serverCard.querySelector('.status-dot');
376
+ statusDot.className = `status-dot status-${isOnline ? 'online' : 'offline'}`;
377
 
378
  if (isOnline) {{
379
+ const uploadText = serverCard.querySelector('.upload').textContent;
380
+ const downloadText = serverCard.querySelector('.download').textContent;
381
+ totalUpload += parseFloat(uploadText) || 0;
382
+ totalDownload += parseFloat(downloadText) || 0;
383
  }}
384
  }}
385
  isOnline ? online++ : offline++;
386
+ }});
 
387
 
388
+ document.getElementById('totalServers').textContent = servers.size;
389
  document.getElementById('onlineServers').textContent = online;
390
  document.getElementById('offlineServers').textContent = offline;
391
  document.getElementById('totalUpload').textContent = `${formatBytes(totalUpload)}/s`;
392
  document.getElementById('totalDownload').textContent = `${formatBytes(totalDownload)}/s`;
393
  }}
394
 
 
395
  function formatBytes(bytes) {{
396
  if (bytes === 0) return '0 B';
397
  const k = 1024;
 
400
  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
401
  }}
402
 
403
+ setInterval(updateSummary, 2000);
404
+
405
+ setInterval(async () => {{
406
+ metricsManager.disconnectAll();
407
+ await initialize();
408
+ }}, 300000);
409
 
 
410
  </script>
411
  </body>
412
  </html>
413
  """
414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
+ @app.route("/")
417
+ def index():
418
+ return render_template_string(htmlTemplate)
419
+
420
+ if __name__ == "__main__":
421
+ app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))