Spaces:
Running
Running
File size: 8,114 Bytes
6578cff | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | #!/usr/bin/env python3
"""
QUASAR Hub HTTP Diagnostic β Check metrics without WebSocket
This is the FASTEST way to diagnose what's happening.
No wscat, no WebSocket needed β just HTTP polling.
Usage:
python3 http_diagnostic.py [--watch]
"""
import json
import sys
import time
import requests
from datetime import datetime
def get_hub_state(hub_url: str = "http://127.0.0.1:7860"):
"""Fetch current hub state."""
try:
resp = requests.get(f"{hub_url}/api/state", timeout=2)
resp.raise_for_status()
return resp.json()
except requests.exceptions.ConnectionError:
return None
except Exception as e:
print(f"[!] Error: {e}")
return None
def get_hub_health(hub_url: str = "http://127.0.0.1:7860"):
"""Fetch hub health."""
try:
resp = requests.get(f"{hub_url}/api/health", timeout=2)
resp.raise_for_status()
return resp.json()
except Exception:
return None
def format_value(key: str, value):
"""Format metric value with color coding."""
if "loss" in key.lower():
# Green if decreasing (< 0.5), red if high (> 0.7), yellow otherwise
if isinstance(value, (int, float)):
if value < 0.3:
return f"π’ {value:.4f}"
elif value > 0.7:
return f"π΄ {value:.4f}"
else:
return f"π‘ {value:.4f}"
elif "accuracy" in key.lower():
if isinstance(value, (int, float)):
if value > 0.7:
return f"π’ {value:.4f}"
elif value < 0.3:
return f"π΄ {value:.4f}"
else:
return f"π‘ {value:.4f}"
elif isinstance(value, float):
return f"{value:.6f}"
return str(value)
def print_diagnostic(hub_url: str = "http://127.0.0.1:7860"):
"""Print a complete diagnostic report."""
print("\n" + "=" * 70)
print(f"QUASAR Hub Diagnostic β {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 70)
# Check health
print("\n[1] HUB HEALTH")
print("-" * 70)
health = get_hub_health(hub_url)
if not health:
print(f"β Hub not responding at {hub_url}")
print(" Try: curl http://127.0.0.1:7860/api/health")
return False
print(f"β Hub is online")
print(f" Status: {health.get('status', '?')}")
print(f" Service: {health.get('service', '?')}")
print(f" Spaces connected: {health.get('spaces_connected', 0)}")
print(f" Messages RX: {health.get('messages_rx', 0)}")
print(f" Uptime: {health.get('uptime_seconds', 0):.0f}s")
# Get state
print("\n[2] ASSET RANKINGS")
print("-" * 70)
state = get_hub_state(hub_url)
if not state:
print("β Could not fetch state")
return False
rankings = state.get("rankings", [])
if not rankings:
print("β NO ASSETS CONNECTED YET")
print(" β Asset spaces need to connect and send metrics")
print(" β Check if asset spaces are running")
return False
print(f"β {len(rankings)} assets connected\n")
# Show each asset
for i, asset in enumerate(rankings[:8], 1):
name = asset.get("space_name", "?")
print(f"{i}. {name}")
training = asset.get("training", {})
voting = asset.get("voting", {})
# Training metrics
actor_loss = training.get("actor_loss", 0)
avn_acc = training.get("avn_accuracy", 0)
training_steps = training.get("training_steps", 0)
print(f" Training: {training_steps:5d} steps")
print(f" Actor Loss: {format_value('actor_loss', actor_loss)} (expect < 0.3)")
print(f" AVN Acc: {format_value('avn_accuracy', avn_acc)} (expect > 0.7)")
# Voting metrics
buy = voting.get("buy_count", 0)
sell = voting.get("sell_count", 0)
signal = voting.get("dominant_signal", "?")
print(f" Signal: {signal} ({buy}B / {sell}S)")
# Check if metrics are being updated
last_updated = asset.get("last_updated", 0)
if last_updated:
age = time.time() - last_updated
if age < 10:
print(f" β Updated {age:.0f}s ago")
else:
print(f" β Stale ({age:.0f}s old) β space may have disconnected")
print()
# Metric history
print("[3] METRIC HISTORY")
print("-" * 70)
history = state.get("metric_history", {})
if history:
print(f"β Collecting metric history for {len(history)} assets")
for name, points in list(history.items())[:3]:
if points:
latest = points[-1]
print(f" {name}: {len(points)} data points (latest: {latest})")
else:
print("β No metric history yet")
print(" β Metrics will be recorded once spaces send non-zero values")
print("\n" + "=" * 70)
print("DIAGNOSIS")
print("=" * 70)
if not rankings:
print("""
β PROBLEM: No assets connected to hub
β Asset spaces need to connect to ws://hub:7860/ws/publish/{name}
β Check if asset spaces are running and have correct hub URL
β Check firewall/network rules allow port 7860
""")
return False
# Check for zero metrics
has_nonzero_metrics = False
for asset in rankings:
training = asset.get("training", {})
if training.get("actor_loss", 0) != 0 or training.get("avn_accuracy", 0) != 0:
has_nonzero_metrics = True
break
if not has_nonzero_metrics:
print("""
β WARNING: Assets connected but no training metrics being sent
β Asset spaces are sending voting data but NOT training metrics
β They need to include "training" field with loss/accuracy
β Expected format:
{
"training": {
"actor_loss": 0.234,
"critic_loss": 0.567,
"avn_loss": 0.123,
"avn_accuracy": 0.87
}
}
""")
return False
print("""
β
EVERYTHING LOOKS GOOD!
β Assets are connected
β Training metrics are flowing
β Dashboard should display metrics in real-time
""")
return True
def watch_mode(hub_url: str = "http://127.0.0.1:7860", interval: int = 5):
"""Continuously monitor hub state."""
print(f"[*] Watching hub at {hub_url} (update every {interval}s)")
print(" Press Ctrl+C to stop\n")
iteration = 0
try:
while True:
iteration += 1
print(f"\n{'='*70}")
print(f"Update #{iteration} β {datetime.now().strftime('%H:%M:%S')}")
print(f"{'='*70}\n")
state = get_hub_state(hub_url)
if not state:
print("β Hub not responding")
break
rankings = state.get("rankings", [])
print(f"Connected: {len(rankings)} assets\n")
for asset in rankings[:5]:
name = asset.get("space_name", "?")
training = asset.get("training", {})
voting = asset.get("voting", {})
actor_loss = training.get("actor_loss", 0)
avn_acc = training.get("avn_accuracy", 0)
# Simple bar graph
loss_bar = "β" * int(actor_loss * 10) + "β" * (10 - int(actor_loss * 10))
acc_bar = "β" * int(avn_acc * 10) + "β" * (10 - int(avn_acc * 10))
print(f"{name:15} | Loss: [{loss_bar}] {actor_loss:.3f}")
print(f"{'':15} | Acc: [{acc_bar}] {avn_acc:.3f}\n")
time.sleep(interval)
except KeyboardInterrupt:
print("\n[*] Stopped.")
if __name__ == "__main__":
watch = "--watch" in sys.argv
if watch:
watch_mode()
else:
success = print_diagnostic()
sys.exit(0 if success else 1)
|