Quasar-Executo / http_diagnostic.py
KarlQuant's picture
Upload 2 files
6578cff verified
#!/usr/bin/env python3
"""
QUASAR Hub HTTP Diagnostic β€” Check metrics without WebSocket
This is the FASTEST way to diagnose what's happening.
No wscat, no WebSocket needed β€” just HTTP polling.
Usage:
python3 http_diagnostic.py [--watch]
"""
import json
import sys
import time
import requests
from datetime import datetime
def get_hub_state(hub_url: str = "http://127.0.0.1:7860"):
"""Fetch current hub state."""
try:
resp = requests.get(f"{hub_url}/api/state", timeout=2)
resp.raise_for_status()
return resp.json()
except requests.exceptions.ConnectionError:
return None
except Exception as e:
print(f"[!] Error: {e}")
return None
def get_hub_health(hub_url: str = "http://127.0.0.1:7860"):
"""Fetch hub health."""
try:
resp = requests.get(f"{hub_url}/api/health", timeout=2)
resp.raise_for_status()
return resp.json()
except Exception:
return None
def format_value(key: str, value):
"""Format metric value with color coding."""
if "loss" in key.lower():
# Green if decreasing (< 0.5), red if high (> 0.7), yellow otherwise
if isinstance(value, (int, float)):
if value < 0.3:
return f"🟒 {value:.4f}"
elif value > 0.7:
return f"πŸ”΄ {value:.4f}"
else:
return f"🟑 {value:.4f}"
elif "accuracy" in key.lower():
if isinstance(value, (int, float)):
if value > 0.7:
return f"🟒 {value:.4f}"
elif value < 0.3:
return f"πŸ”΄ {value:.4f}"
else:
return f"🟑 {value:.4f}"
elif isinstance(value, float):
return f"{value:.6f}"
return str(value)
def print_diagnostic(hub_url: str = "http://127.0.0.1:7860"):
"""Print a complete diagnostic report."""
print("\n" + "=" * 70)
print(f"QUASAR Hub Diagnostic β€” {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 70)
# Check health
print("\n[1] HUB HEALTH")
print("-" * 70)
health = get_hub_health(hub_url)
if not health:
print(f"βœ— Hub not responding at {hub_url}")
print(" Try: curl http://127.0.0.1:7860/api/health")
return False
print(f"βœ“ Hub is online")
print(f" Status: {health.get('status', '?')}")
print(f" Service: {health.get('service', '?')}")
print(f" Spaces connected: {health.get('spaces_connected', 0)}")
print(f" Messages RX: {health.get('messages_rx', 0)}")
print(f" Uptime: {health.get('uptime_seconds', 0):.0f}s")
# Get state
print("\n[2] ASSET RANKINGS")
print("-" * 70)
state = get_hub_state(hub_url)
if not state:
print("βœ— Could not fetch state")
return False
rankings = state.get("rankings", [])
if not rankings:
print("⚠ NO ASSETS CONNECTED YET")
print(" β†’ Asset spaces need to connect and send metrics")
print(" β†’ Check if asset spaces are running")
return False
print(f"βœ“ {len(rankings)} assets connected\n")
# Show each asset
for i, asset in enumerate(rankings[:8], 1):
name = asset.get("space_name", "?")
print(f"{i}. {name}")
training = asset.get("training", {})
voting = asset.get("voting", {})
# Training metrics
actor_loss = training.get("actor_loss", 0)
avn_acc = training.get("avn_accuracy", 0)
training_steps = training.get("training_steps", 0)
print(f" Training: {training_steps:5d} steps")
print(f" Actor Loss: {format_value('actor_loss', actor_loss)} (expect < 0.3)")
print(f" AVN Acc: {format_value('avn_accuracy', avn_acc)} (expect > 0.7)")
# Voting metrics
buy = voting.get("buy_count", 0)
sell = voting.get("sell_count", 0)
signal = voting.get("dominant_signal", "?")
print(f" Signal: {signal} ({buy}B / {sell}S)")
# Check if metrics are being updated
last_updated = asset.get("last_updated", 0)
if last_updated:
age = time.time() - last_updated
if age < 10:
print(f" βœ“ Updated {age:.0f}s ago")
else:
print(f" ⚠ Stale ({age:.0f}s old) β€” space may have disconnected")
print()
# Metric history
print("[3] METRIC HISTORY")
print("-" * 70)
history = state.get("metric_history", {})
if history:
print(f"βœ“ Collecting metric history for {len(history)} assets")
for name, points in list(history.items())[:3]:
if points:
latest = points[-1]
print(f" {name}: {len(points)} data points (latest: {latest})")
else:
print("⚠ No metric history yet")
print(" β†’ Metrics will be recorded once spaces send non-zero values")
print("\n" + "=" * 70)
print("DIAGNOSIS")
print("=" * 70)
if not rankings:
print("""
❌ PROBLEM: No assets connected to hub
β†’ Asset spaces need to connect to ws://hub:7860/ws/publish/{name}
β†’ Check if asset spaces are running and have correct hub URL
β†’ Check firewall/network rules allow port 7860
""")
return False
# Check for zero metrics
has_nonzero_metrics = False
for asset in rankings:
training = asset.get("training", {})
if training.get("actor_loss", 0) != 0 or training.get("avn_accuracy", 0) != 0:
has_nonzero_metrics = True
break
if not has_nonzero_metrics:
print("""
⚠ WARNING: Assets connected but no training metrics being sent
β†’ Asset spaces are sending voting data but NOT training metrics
β†’ They need to include "training" field with loss/accuracy
β†’ Expected format:
{
"training": {
"actor_loss": 0.234,
"critic_loss": 0.567,
"avn_loss": 0.123,
"avn_accuracy": 0.87
}
}
""")
return False
print("""
βœ… EVERYTHING LOOKS GOOD!
β†’ Assets are connected
β†’ Training metrics are flowing
β†’ Dashboard should display metrics in real-time
""")
return True
def watch_mode(hub_url: str = "http://127.0.0.1:7860", interval: int = 5):
"""Continuously monitor hub state."""
print(f"[*] Watching hub at {hub_url} (update every {interval}s)")
print(" Press Ctrl+C to stop\n")
iteration = 0
try:
while True:
iteration += 1
print(f"\n{'='*70}")
print(f"Update #{iteration} β€” {datetime.now().strftime('%H:%M:%S')}")
print(f"{'='*70}\n")
state = get_hub_state(hub_url)
if not state:
print("βœ— Hub not responding")
break
rankings = state.get("rankings", [])
print(f"Connected: {len(rankings)} assets\n")
for asset in rankings[:5]:
name = asset.get("space_name", "?")
training = asset.get("training", {})
voting = asset.get("voting", {})
actor_loss = training.get("actor_loss", 0)
avn_acc = training.get("avn_accuracy", 0)
# Simple bar graph
loss_bar = "β–ˆ" * int(actor_loss * 10) + "β–‘" * (10 - int(actor_loss * 10))
acc_bar = "β–ˆ" * int(avn_acc * 10) + "β–‘" * (10 - int(avn_acc * 10))
print(f"{name:15} | Loss: [{loss_bar}] {actor_loss:.3f}")
print(f"{'':15} | Acc: [{acc_bar}] {avn_acc:.3f}\n")
time.sleep(interval)
except KeyboardInterrupt:
print("\n[*] Stopped.")
if __name__ == "__main__":
watch = "--watch" in sys.argv
if watch:
watch_mode()
else:
success = print_diagnostic()
sys.exit(0 if success else 1)