Spaces:

KarlQuant
/

Quasar-Executo

Running

App Files Files Community

Quasar-Executo / http_diagnostic.py

KarlQuant

Upload 2 files

6578cff verified about 1 month ago

raw

history blame contribute delete

8.11 kB

	#!/usr/bin/env python3
	"""
	QUASAR Hub HTTP Diagnostic — Check metrics without WebSocket

	This is the FASTEST way to diagnose what's happening.
	No wscat, no WebSocket needed — just HTTP polling.

	Usage:
	python3 http_diagnostic.py [--watch]
	"""

	import json
	import sys
	import time
	import requests
	from datetime import datetime

	def get_hub_state(hub_url: str = "http://127.0.0.1:7860"):
	"""Fetch current hub state."""
	try:
	resp = requests.get(f"{hub_url}/api/state", timeout=2)
	resp.raise_for_status()
	return resp.json()
	except requests.exceptions.ConnectionError:
	return None
	except Exception as e:
	print(f"[!] Error: {e}")
	return None


	def get_hub_health(hub_url: str = "http://127.0.0.1:7860"):
	"""Fetch hub health."""
	try:
	resp = requests.get(f"{hub_url}/api/health", timeout=2)
	resp.raise_for_status()
	return resp.json()
	except Exception:
	return None


	def format_value(key: str, value):
	"""Format metric value with color coding."""
	if "loss" in key.lower():
	# Green if decreasing (< 0.5), red if high (> 0.7), yellow otherwise
	if isinstance(value, (int, float)):
	if value < 0.3:
	return f"🟢 {value:.4f}"
	elif value > 0.7:
	return f"🔴 {value:.4f}"
	else:
	return f"🟡 {value:.4f}"
	elif "accuracy" in key.lower():
	if isinstance(value, (int, float)):
	if value > 0.7:
	return f"🟢 {value:.4f}"
	elif value < 0.3:
	return f"🔴 {value:.4f}"
	else:
	return f"🟡 {value:.4f}"
	elif isinstance(value, float):
	return f"{value:.6f}"

	return str(value)


	def print_diagnostic(hub_url: str = "http://127.0.0.1:7860"):
	"""Print a complete diagnostic report."""
	print("\n" + "=" * 70)
	print(f"QUASAR Hub Diagnostic — {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	print("=" * 70)

	# Check health
	print("\n[1] HUB HEALTH")
	print("-" * 70)
	health = get_hub_health(hub_url)
	if not health:
	print(f"✗ Hub not responding at {hub_url}")
	print(" Try: curl http://127.0.0.1:7860/api/health")
	return False

	print(f"✓ Hub is online")
	print(f" Status: {health.get('status', '?')}")
	print(f" Service: {health.get('service', '?')}")
	print(f" Spaces connected: {health.get('spaces_connected', 0)}")
	print(f" Messages RX: {health.get('messages_rx', 0)}")
	print(f" Uptime: {health.get('uptime_seconds', 0):.0f}s")

	# Get state
	print("\n[2] ASSET RANKINGS")
	print("-" * 70)
	state = get_hub_state(hub_url)
	if not state:
	print("✗ Could not fetch state")
	return False

	rankings = state.get("rankings", [])
	if not rankings:
	print("⚠ NO ASSETS CONNECTED YET")
	print(" → Asset spaces need to connect and send metrics")
	print(" → Check if asset spaces are running")
	return False

	print(f"✓ {len(rankings)} assets connected\n")

	# Show each asset
	for i, asset in enumerate(rankings[:8], 1):
	name = asset.get("space_name", "?")
	print(f"{i}. {name}")

	training = asset.get("training", {})
	voting = asset.get("voting", {})

	# Training metrics
	actor_loss = training.get("actor_loss", 0)
	avn_acc = training.get("avn_accuracy", 0)
	training_steps = training.get("training_steps", 0)

	print(f" Training: {training_steps:5d} steps")
	print(f" Actor Loss: {format_value('actor_loss', actor_loss)} (expect < 0.3)")
	print(f" AVN Acc: {format_value('avn_accuracy', avn_acc)} (expect > 0.7)")

	# Voting metrics
	buy = voting.get("buy_count", 0)
	sell = voting.get("sell_count", 0)
	signal = voting.get("dominant_signal", "?")

	print(f" Signal: {signal} ({buy}B / {sell}S)")

	# Check if metrics are being updated
	last_updated = asset.get("last_updated", 0)
	if last_updated:
	age = time.time() - last_updated
	if age < 10:
	print(f" ✓ Updated {age:.0f}s ago")
	else:
	print(f" ⚠ Stale ({age:.0f}s old) — space may have disconnected")

	print()

	# Metric history
	print("[3] METRIC HISTORY")
	print("-" * 70)
	history = state.get("metric_history", {})
	if history:
	print(f"✓ Collecting metric history for {len(history)} assets")
	for name, points in list(history.items())[:3]:
	if points:
	latest = points[-1]
	print(f" {name}: {len(points)} data points (latest: {latest})")
	else:
	print("⚠ No metric history yet")
	print(" → Metrics will be recorded once spaces send non-zero values")

	print("\n" + "=" * 70)
	print("DIAGNOSIS")
	print("=" * 70)

	if not rankings:
	print("""
	❌ PROBLEM: No assets connected to hub
	→ Asset spaces need to connect to ws://hub:7860/ws/publish/{name}
	→ Check if asset spaces are running and have correct hub URL
	→ Check firewall/network rules allow port 7860
	""")
	return False

	# Check for zero metrics
	has_nonzero_metrics = False
	for asset in rankings:
	training = asset.get("training", {})
	if training.get("actor_loss", 0) != 0 or training.get("avn_accuracy", 0) != 0:
	has_nonzero_metrics = True
	break

	if not has_nonzero_metrics:
	print("""
	⚠ WARNING: Assets connected but no training metrics being sent
	→ Asset spaces are sending voting data but NOT training metrics
	→ They need to include "training" field with loss/accuracy
	→ Expected format:
	{
	"training": {
	"actor_loss": 0.234,
	"critic_loss": 0.567,
	"avn_loss": 0.123,
	"avn_accuracy": 0.87
	}
	}
	""")
	return False

	print("""
	✅ EVERYTHING LOOKS GOOD!
	→ Assets are connected
	→ Training metrics are flowing
	→ Dashboard should display metrics in real-time
	""")
	return True


	def watch_mode(hub_url: str = "http://127.0.0.1:7860", interval: int = 5):
	"""Continuously monitor hub state."""
	print(f"[*] Watching hub at {hub_url} (update every {interval}s)")
	print(" Press Ctrl+C to stop\n")

	iteration = 0
	try:
	while True:
	iteration += 1
	print(f"\n{'='*70}")
	print(f"Update #{iteration} — {datetime.now().strftime('%H:%M:%S')}")
	print(f"{'='*70}\n")

	state = get_hub_state(hub_url)
	if not state:
	print("✗ Hub not responding")
	break

	rankings = state.get("rankings", [])
	print(f"Connected: {len(rankings)} assets\n")

	for asset in rankings[:5]:
	name = asset.get("space_name", "?")
	training = asset.get("training", {})
	voting = asset.get("voting", {})

	actor_loss = training.get("actor_loss", 0)
	avn_acc = training.get("avn_accuracy", 0)

	# Simple bar graph
	loss_bar = "█" * int(actor_loss * 10) + "░" * (10 - int(actor_loss * 10))
	acc_bar = "█" * int(avn_acc * 10) + "░" * (10 - int(avn_acc * 10))

	print(f"{name:15} \| Loss: [{loss_bar}] {actor_loss:.3f}")
	print(f"{'':15} \| Acc: [{acc_bar}] {avn_acc:.3f}\n")

	time.sleep(interval)

	except KeyboardInterrupt:
	print("\n[*] Stopped.")


	if __name__ == "__main__":
	watch = "--watch" in sys.argv

	if watch:
	watch_mode()
	else:
	success = print_diagnostic()
	sys.exit(0 if success else 1)