Raiff1982's picture
Upload 120 files
ed1b365 verified
"""
Dashboard - ASCII-formatted system status display for the Codette training lab.
Shows:
- Latest training run stats
- Best adapter scores
- Dataset sizes and quality
- Failure rates
- Improvement trends
No web framework required; pure terminal output.
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
_THIS_DIR = Path(__file__).resolve().parent
_PROJECT_ROOT = _THIS_DIR.parent
if str(_PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(_PROJECT_ROOT))
from observatory.metrics_logger import MetricsLogger
from observatory.performance_tracker import PerformanceTracker
from observatory.dataset_quality_monitor import DatasetQualityMonitor
class Dashboard:
"""ASCII dashboard for the Codette training lab."""
WIDTH = 76
def __init__(
self,
metrics_log: Optional[str] = None,
quality_log: Optional[str] = None,
eval_results: Optional[str] = None,
):
self.logger = MetricsLogger(log_file=metrics_log)
self.tracker = PerformanceTracker(logger=self.logger)
self.quality_monitor = DatasetQualityMonitor(quality_file=quality_log)
self.eval_results_path = eval_results
# -- sections ----------------------------------------------------------
def _header(self) -> List[str]:
lines = []
lines.append("")
lines.append("+" + "=" * (self.WIDTH - 2) + "+")
lines.append("|" + " CODETTE TRAINING LAB OBSERVATORY ".center(self.WIDTH - 2) + "|")
lines.append("|" + f" {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')} ".center(self.WIDTH - 2) + "|")
lines.append("+" + "=" * (self.WIDTH - 2) + "+")
return lines
def _section(self, title: str) -> List[str]:
lines = []
lines.append("")
lines.append("+" + "-" * (self.WIDTH - 2) + "+")
lines.append("|" + f" {title} ".ljust(self.WIDTH - 2) + "|")
lines.append("+" + "-" * (self.WIDTH - 2) + "+")
return lines
def _row(self, label: str, value: str) -> str:
"""Single label: value row."""
content = f" {label:<30s} {value}"
return "|" + content.ljust(self.WIDTH - 2) + "|"
def _bar_row(self, label: str, value: float, max_width: int = 30) -> str:
"""Row with ASCII progress bar."""
filled = int(value * max_width)
bar = "[" + "#" * filled + "." * (max_width - filled) + "]"
content = f" {label:<22s} {value:>6.3f} {bar}"
return "|" + content.ljust(self.WIDTH - 2) + "|"
def _empty_row(self) -> str:
return "|" + " " * (self.WIDTH - 2) + "|"
def _footer(self) -> List[str]:
return ["+" + "=" * (self.WIDTH - 2) + "+", ""]
# -- sections ----------------------------------------------------------
def _latest_training_section(self) -> List[str]:
lines = self._section("LATEST TRAINING RUN")
latest = self.logger.get_latest()
if not latest:
lines.append(self._row("Status", "No training runs logged yet"))
return lines
lines.append(self._row("Adapter", latest.get("adapter", "N/A")))
lines.append(self._row("Timestamp", latest.get("timestamp", "N/A")))
lines.append(self._row("Dataset Version", latest.get("dataset_version", "N/A")))
lines.append(self._row("Dataset Size", str(latest.get("dataset_size", 0))))
lines.append(self._row("Epoch", str(latest.get("epoch", 0))))
lines.append(self._bar_row("Reasoning Score", latest.get("reasoning_score", 0)))
lines.append(self._row("Loss", f"{latest.get('loss', 0):.6f}"))
params = latest.get("training_params", {})
if params:
lines.append(self._empty_row())
lines.append(self._row("Training Parameters", ""))
for k, v in list(params.items())[:6]:
lines.append(self._row(f" {k}", str(v)))
return lines
def _best_adapters_section(self) -> List[str]:
lines = self._section("TOP ADAPTERS")
best = self.tracker.best_adapters(top_n=5)
if not best:
lines.append(self._row("Status", "No adapter data available"))
return lines
# Table header
hdr = f" {'#':<3} {'Adapter':<26} {'Score':>7} {'Loss':>7} {'Epoch':>5}"
lines.append("|" + hdr.ljust(self.WIDTH - 2) + "|")
sep = f" {'--':<3} {'------':<26} {'-----':>7} {'----':>7} {'-----':>5}"
lines.append("|" + sep.ljust(self.WIDTH - 2) + "|")
for i, entry in enumerate(best, 1):
name = entry.get("adapter", "?")[:25]
score = entry.get("reasoning_score", 0)
loss = entry.get("loss", 0)
epoch = entry.get("epoch", 0)
row = f" {i:<3} {name:<26} {score:>7.4f} {loss:>7.4f} {epoch:>5}"
lines.append("|" + row.ljust(self.WIDTH - 2) + "|")
return lines
def _dataset_quality_section(self) -> List[str]:
lines = self._section("DATASET QUALITY")
latest = self.quality_monitor.get_latest()
if not latest:
lines.append(self._row("Status", "No quality data recorded"))
return lines
lines.append(self._row("Dataset Version", latest.get("dataset_version", "N/A")))
lines.append(self._row("Total Examples", str(latest.get("total_examples", 0))))
lines.append(self._row("Valid Examples", str(latest.get("valid_examples", 0))))
lines.append(self._bar_row("Validity Rate", latest.get("validity_rate", 0)))
lines.append(self._row("Avg Response Length", f"{latest.get('avg_response_length', 0):.1f} words"))
lines.append(self._row("Duplicate Rate", f"{latest.get('duplicate_rate', 0):.2%}"))
lines.append(self._row("Near-Duplicate Rate", f"{latest.get('near_duplicate_rate', 0):.2%}"))
lines.append(self._bar_row("Topic Diversity", min(latest.get("topic_diversity", 0) * 10, 1.0)))
lines.append(self._row("Topic Concentration", f"{latest.get('topic_concentration', 0):.2%}"))
# Regressions
regressions = self.quality_monitor.check_latest_regressions()
if regressions:
lines.append(self._empty_row())
for r in regressions:
sev = r["severity"].upper()
msg = f" [{sev}] {r['metric']}: {r['percent_change']:+.1f}%"
lines.append("|" + msg.ljust(self.WIDTH - 2) + "|")
return lines
def _improvement_trends_section(self) -> List[str]:
lines = self._section("IMPROVEMENT TRENDS")
trends = self.tracker.improvement_trends()
if not trends:
lines.append(self._row("Status", "Insufficient data for trends"))
return lines
for t in trends[:5]:
name = t["adapter"][:22]
delta = t["delta"]
pct = t["percent_change"]
runs = t["num_runs"]
sign = "+" if delta >= 0 else ""
indicator = "^" if delta > 0.01 else ("v" if delta < -0.01 else "=")
row = (f" {indicator} {name:<22} "
f"delta: {sign}{delta:.4f} "
f"({sign}{pct:.1f}%) "
f"[{runs} runs]")
lines.append("|" + row.ljust(self.WIDTH - 2) + "|")
return lines
def _failure_rates_section(self) -> List[str]:
lines = self._section("EVALUATION FAILURE RATES")
if not self.eval_results_path or not os.path.exists(self.eval_results_path):
lines.append(self._row("Status", "No evaluation results file specified"))
return lines
try:
with open(self.eval_results_path, "r", encoding="utf-8") as f:
results = json.load(f)
except (json.JSONDecodeError, OSError):
lines.append(self._row("Status", "Could not load evaluation results"))
return lines
# Overall score
overall = results.get("overall", {})
if overall:
overall_score = overall.get("overall", 0)
lines.append(self._bar_row("Overall Score", overall_score))
lines.append(self._empty_row())
# Per-category scores
categories = results.get("categories", {})
if categories:
hdr = f" {'Category':<20} {'Score':>7} {'Prompts':>8}"
lines.append("|" + hdr.ljust(self.WIDTH - 2) + "|")
sep = f" {'--------':<20} {'-----':>7} {'-------':>8}"
lines.append("|" + sep.ljust(self.WIDTH - 2) + "|")
for cat, data in sorted(categories.items()):
avg = data.get("average_scores", {}).get("overall", 0)
n = data.get("prompts_scored", 0)
status = "*" if avg < 0.4 else ("~" if avg < 0.55 else " ")
row = f" {status}{cat:<19} {avg:>7.4f} {n:>8}"
lines.append("|" + row.ljust(self.WIDTH - 2) + "|")
lines.append(self._empty_row())
lines.append("|" + " * = failing, ~ = weak".ljust(self.WIDTH - 2) + "|")
return lines
def _sparkline_section(self) -> List[str]:
lines = self._section("SCORE HISTORY")
adapters = self.logger.get_unique_adapters()
if not adapters:
lines.append(self._row("Status", "No history data"))
return lines
for adapter in adapters[:6]:
progression = self.tracker.score_progression(adapter)
if not progression:
continue
scores = [p["reasoning_score"] for p in progression]
spark = PerformanceTracker._sparkline(scores, width=30)
name = adapter[:20]
row = f" {name:<21} {spark} [{scores[0]:.3f}->{scores[-1]:.3f}]"
lines.append("|" + row.ljust(self.WIDTH - 2) + "|")
return lines
# -- main render -------------------------------------------------------
def render(self) -> str:
"""Render the complete dashboard."""
all_lines: List[str] = []
all_lines.extend(self._header())
all_lines.extend(self._latest_training_section())
all_lines.extend(self._best_adapters_section())
all_lines.extend(self._dataset_quality_section())
all_lines.extend(self._improvement_trends_section())
all_lines.extend(self._failure_rates_section())
all_lines.extend(self._sparkline_section())
all_lines.extend(self._footer())
return "\n".join(all_lines)
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser(
description="Codette Observatory Dashboard - ASCII system status display"
)
parser.add_argument(
"--metrics-log", "-m",
default=None,
help="Path to observatory_metrics.json",
)
parser.add_argument(
"--quality-log", "-q",
default=None,
help="Path to dataset_quality_log.json",
)
parser.add_argument(
"--eval-results", "-e",
default=None,
help="Path to benchmark evaluation results JSON",
)
parser.add_argument(
"--section", "-s",
choices=["training", "adapters", "quality", "trends", "failures", "history", "all"],
default="all",
help="Show only a specific section (default: all)",
)
args = parser.parse_args()
dashboard = Dashboard(
metrics_log=args.metrics_log,
quality_log=args.quality_log,
eval_results=args.eval_results,
)
if args.section == "all":
print(dashboard.render())
else:
section_map = {
"training": dashboard._latest_training_section,
"adapters": dashboard._best_adapters_section,
"quality": dashboard._dataset_quality_section,
"trends": dashboard._improvement_trends_section,
"failures": dashboard._failure_rates_section,
"history": dashboard._sparkline_section,
}
func = section_map.get(args.section)
if func:
lines = dashboard._header()
lines.extend(func())
lines.extend(dashboard._footer())
print("\n".join(lines))
if __name__ == "__main__":
main()