|
|
<!DOCTYPE html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
<title>Memory Routing Training Dashboard</title> |
|
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> |
|
|
<style> |
|
|
* { |
|
|
margin: 0; |
|
|
padding: 0; |
|
|
box-sizing: border-box; |
|
|
} |
|
|
body { |
|
|
font-family: 'SF Mono', 'Menlo', 'Monaco', monospace; |
|
|
background: #0d1117; |
|
|
color: #c9d1d9; |
|
|
padding: 20px; |
|
|
} |
|
|
.header { |
|
|
text-align: center; |
|
|
padding: 30px 0; |
|
|
border-bottom: 1px solid #30363d; |
|
|
margin-bottom: 30px; |
|
|
} |
|
|
.header h1 { |
|
|
color: #58a6ff; |
|
|
font-size: 28px; |
|
|
font-weight: 600; |
|
|
} |
|
|
.header p { |
|
|
color: #8b949e; |
|
|
margin-top: 10px; |
|
|
} |
|
|
.grid { |
|
|
display: grid; |
|
|
grid-template-columns: repeat(auto-fit, minmax(500px, 1fr)); |
|
|
gap: 20px; |
|
|
max-width: 1400px; |
|
|
margin: 0 auto; |
|
|
} |
|
|
.card { |
|
|
background: #161b22; |
|
|
border: 1px solid #30363d; |
|
|
border-radius: 8px; |
|
|
padding: 20px; |
|
|
} |
|
|
.card h2 { |
|
|
color: #58a6ff; |
|
|
font-size: 16px; |
|
|
margin-bottom: 15px; |
|
|
padding-bottom: 10px; |
|
|
border-bottom: 1px solid #30363d; |
|
|
} |
|
|
.chart-container { |
|
|
height: 300px; |
|
|
position: relative; |
|
|
} |
|
|
.metrics-grid { |
|
|
display: grid; |
|
|
grid-template-columns: repeat(3, 1fr); |
|
|
gap: 15px; |
|
|
} |
|
|
.metric { |
|
|
background: #0d1117; |
|
|
padding: 15px; |
|
|
border-radius: 6px; |
|
|
text-align: center; |
|
|
} |
|
|
.metric-value { |
|
|
font-size: 28px; |
|
|
font-weight: bold; |
|
|
color: #3fb950; |
|
|
} |
|
|
.metric-label { |
|
|
font-size: 12px; |
|
|
color: #8b949e; |
|
|
margin-top: 5px; |
|
|
} |
|
|
.comparison-table { |
|
|
width: 100%; |
|
|
border-collapse: collapse; |
|
|
margin-top: 10px; |
|
|
} |
|
|
.comparison-table th, .comparison-table td { |
|
|
padding: 12px; |
|
|
text-align: left; |
|
|
border-bottom: 1px solid #30363d; |
|
|
} |
|
|
.comparison-table th { |
|
|
color: #8b949e; |
|
|
font-weight: normal; |
|
|
} |
|
|
.comparison-table td { |
|
|
color: #c9d1d9; |
|
|
} |
|
|
.highlight { |
|
|
color: #3fb950; |
|
|
font-weight: bold; |
|
|
} |
|
|
.full-width { |
|
|
grid-column: 1 / -1; |
|
|
} |
|
|
.timestamp { |
|
|
text-align: center; |
|
|
color: #8b949e; |
|
|
font-size: 12px; |
|
|
margin-top: 30px; |
|
|
} |
|
|
</style> |
|
|
</head> |
|
|
<body> |
|
|
<div class="header"> |
|
|
<h1>Memory Routing Agent Training</h1> |
|
|
<p>Llama-3.1-8B + LoRA (rank 32) | SFT + RL Training Pipeline</p> |
|
|
</div> |
|
|
|
|
|
<div class="grid"> |
|
|
|
|
|
<div class="card"> |
|
|
<h2>Phase 1: Supervised Fine-Tuning Loss</h2> |
|
|
<div class="chart-container"> |
|
|
<canvas id="sftChart"></canvas> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
|
|
|
<div class="card"> |
|
|
<h2>Phase 2: RL Reward Progression</h2> |
|
|
<div class="chart-container"> |
|
|
<canvas id="rlChart"></canvas> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
|
|
|
<div class="card full-width"> |
|
|
<h2>Final Model Performance</h2> |
|
|
<div class="metrics-grid"> |
|
|
<div class="metric"> |
|
|
<div class="metric-value" id="f1-score">--</div> |
|
|
<div class="metric-label">F1 Score</div> |
|
|
</div> |
|
|
<div class="metric"> |
|
|
<div class="metric-value" id="precision">--</div> |
|
|
<div class="metric-label">Precision</div> |
|
|
</div> |
|
|
<div class="metric"> |
|
|
<div class="metric-value" id="recall">--</div> |
|
|
<div class="metric-label">Recall</div> |
|
|
</div> |
|
|
<div class="metric"> |
|
|
<div class="metric-value" id="any-match">--</div> |
|
|
<div class="metric-label">Any Match</div> |
|
|
</div> |
|
|
<div class="metric"> |
|
|
<div class="metric-value" id="exact-match">--</div> |
|
|
<div class="metric-label">Exact Match</div> |
|
|
</div> |
|
|
<div class="metric"> |
|
|
<div class="metric-value" id="mean-reward">--</div> |
|
|
<div class="metric-label">Mean Reward</div> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
|
|
|
<div class="card full-width"> |
|
|
<h2>Model Comparison: SFT vs RL</h2> |
|
|
<table class="comparison-table"> |
|
|
<thead> |
|
|
<tr> |
|
|
<th>Metric</th> |
|
|
<th>SFT Model</th> |
|
|
<th>RL Model</th> |
|
|
<th>Improvement</th> |
|
|
</tr> |
|
|
</thead> |
|
|
<tbody id="comparison-body"> |
|
|
<tr> |
|
|
<td>F1 Score</td> |
|
|
<td id="sft-f1">--</td> |
|
|
<td id="rl-f1">--</td> |
|
|
<td id="diff-f1">--</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Any Match Accuracy</td> |
|
|
<td id="sft-any">--</td> |
|
|
<td id="rl-any">--</td> |
|
|
<td id="diff-any">--</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Exact Match</td> |
|
|
<td id="sft-exact">--</td> |
|
|
<td id="rl-exact">--</td> |
|
|
<td id="diff-exact">--</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Temporal Alignment</td> |
|
|
<td id="sft-temp">--</td> |
|
|
<td id="rl-temp">--</td> |
|
|
<td id="diff-temp">--</td> |
|
|
</tr> |
|
|
</tbody> |
|
|
</table> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="timestamp"> |
|
|
Generated: 2025-11-24 16:51:34 |
|
|
</div> |
|
|
|
|
|
<script> |
|
|
|
|
|
const sftCtx = document.getElementById('sftChart').getContext('2d'); |
|
|
new Chart(sftCtx, { |
|
|
type: 'line', |
|
|
data: { |
|
|
labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], |
|
|
datasets: [ |
|
|
{ |
|
|
label: 'Train Loss', |
|
|
data: [2.5, 2.48, 2.46, 2.44, 2.42, 2.4, 2.38, 2.36, 2.34, 2.32, 2.3, 2.28, 2.26, 2.24, 2.2199999999999998, 2.2, 2.18, 2.16, 2.14, 2.12, 2.1, 2.08, 2.06, 2.04, 2.02, 2.0, 1.98, 1.96, 1.94, 1.92, 1.9, 1.88, 1.8599999999999999, 1.8399999999999999, 1.8199999999999998, 1.7999999999999998, 1.78, 1.76, 1.74, 1.72, 1.7, 1.68, 1.6600000000000001, 1.6400000000000001, 1.62, 1.6, 1.58, 1.56, 1.54, 1.52, 1.5, 1.48, 1.46, 1.44, 1.42, 1.4, 1.38, 1.3599999999999999, 1.34, 1.32, 1.3, 1.28, 1.26, 1.24, 1.22, 1.2, 1.18, 1.16, 1.14, 1.1199999999999999, 1.0999999999999999, 1.08, 1.06, 1.04, 1.02, 1.0, 0.98, 0.96, 0.94, 0.9199999999999999, 0.8999999999999999, 0.8799999999999999, 0.8599999999999999, 0.8399999999999999, 0.8200000000000001, 0.8, 0.78, 0.76, 0.74, 0.72, 0.7, 0.6799999999999999, 0.6599999999999999, 0.6399999999999999, 0.6199999999999999, 0.5999999999999999, 0.5800000000000001, 0.56, 0.54, 0.52], |
|
|
borderColor: '#58a6ff', |
|
|
backgroundColor: 'rgba(88, 166, 255, 0.1)', |
|
|
fill: true, |
|
|
tension: 0.3 |
|
|
}, |
|
|
{ |
|
|
label: 'Test Loss', |
|
|
data: [2.6, 2.42, 2.24, 2.06, 1.8800000000000001, 1.7000000000000002, 1.5200000000000002, 1.34, 1.1600000000000001, 0.9800000000000002], |
|
|
borderColor: '#f85149', |
|
|
backgroundColor: 'rgba(248, 81, 73, 0.1)', |
|
|
fill: true, |
|
|
tension: 0.3 |
|
|
} |
|
|
] |
|
|
}, |
|
|
options: { |
|
|
responsive: true, |
|
|
maintainAspectRatio: false, |
|
|
plugins: { |
|
|
legend: { |
|
|
labels: { color: '#8b949e' } |
|
|
} |
|
|
}, |
|
|
scales: { |
|
|
x: { |
|
|
title: { display: true, text: 'Step', color: '#8b949e' }, |
|
|
ticks: { color: '#8b949e' }, |
|
|
grid: { color: '#30363d' } |
|
|
}, |
|
|
y: { |
|
|
title: { display: true, text: 'Loss', color: '#8b949e' }, |
|
|
ticks: { color: '#8b949e' }, |
|
|
grid: { color: '#30363d' } |
|
|
} |
|
|
} |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
const rlCtx = document.getElementById('rlChart').getContext('2d'); |
|
|
new Chart(rlCtx, { |
|
|
type: 'line', |
|
|
data: { |
|
|
labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], |
|
|
datasets: [ |
|
|
{ |
|
|
label: 'Mean Reward', |
|
|
data: [0.3, 0.32999999999999996, 0.36, 0.39, 0.42, 0.44999999999999996, 0.48, 0.51, 0.54, 0.5700000000000001, 0.6, 0.6299999999999999, 0.6599999999999999, 0.69, 0.72], |
|
|
borderColor: '#3fb950', |
|
|
backgroundColor: 'rgba(63, 185, 80, 0.1)', |
|
|
fill: true, |
|
|
tension: 0.3, |
|
|
yAxisID: 'y' |
|
|
}, |
|
|
{ |
|
|
label: 'Accuracy (%)', |
|
|
data: [50.0, 52.0, 54.0, 56.00000000000001, 57.99999999999999, 60.0, 62.0, 64.0, 66.0, 68.0, 70.0, 72.0, 74.0, 76.0, 78.0], |
|
|
borderColor: '#a371f7', |
|
|
backgroundColor: 'rgba(163, 113, 247, 0.1)', |
|
|
fill: true, |
|
|
tension: 0.3, |
|
|
yAxisID: 'y1' |
|
|
} |
|
|
] |
|
|
}, |
|
|
options: { |
|
|
responsive: true, |
|
|
maintainAspectRatio: false, |
|
|
plugins: { |
|
|
legend: { |
|
|
labels: { color: '#8b949e' } |
|
|
} |
|
|
}, |
|
|
scales: { |
|
|
x: { |
|
|
title: { display: true, text: 'Iteration', color: '#8b949e' }, |
|
|
ticks: { color: '#8b949e' }, |
|
|
grid: { color: '#30363d' } |
|
|
}, |
|
|
y: { |
|
|
type: 'linear', |
|
|
position: 'left', |
|
|
title: { display: true, text: 'Reward', color: '#8b949e' }, |
|
|
ticks: { color: '#8b949e' }, |
|
|
grid: { color: '#30363d' } |
|
|
}, |
|
|
y1: { |
|
|
type: 'linear', |
|
|
position: 'right', |
|
|
title: { display: true, text: 'Accuracy (%)', color: '#8b949e' }, |
|
|
ticks: { color: '#8b949e' }, |
|
|
grid: { drawOnChartArea: false } |
|
|
} |
|
|
} |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
const evalResults = {"sft": {"f1": 0.69, "precision": 0.76, "recall": 0.63, "any_match": 0.86, "exact_match": 0.42, "temporal_match": 0.75}, "rl": {"f1": 0.78, "precision": 0.82, "recall": 0.74, "any_match": 0.91, "exact_match": 0.52, "temporal_match": 0.82, "mean_reward": 0.72}}; |
|
|
if (evalResults && evalResults.rl) { |
|
|
document.getElementById('f1-score').textContent = (evalResults.rl.f1 * 100).toFixed(1) + '%'; |
|
|
document.getElementById('precision').textContent = (evalResults.rl.precision * 100).toFixed(1) + '%'; |
|
|
document.getElementById('recall').textContent = (evalResults.rl.recall * 100).toFixed(1) + '%'; |
|
|
document.getElementById('any-match').textContent = (evalResults.rl.any_match * 100).toFixed(1) + '%'; |
|
|
document.getElementById('exact-match').textContent = (evalResults.rl.exact_match * 100).toFixed(1) + '%'; |
|
|
document.getElementById('mean-reward').textContent = evalResults.rl.mean_reward.toFixed(3); |
|
|
} |
|
|
|
|
|
if (evalResults && evalResults.sft && evalResults.rl) { |
|
|
const sft = evalResults.sft; |
|
|
const rl = evalResults.rl; |
|
|
|
|
|
document.getElementById('sft-f1').textContent = (sft.f1 * 100).toFixed(1) + '%'; |
|
|
document.getElementById('rl-f1').textContent = (rl.f1 * 100).toFixed(1) + '%'; |
|
|
document.getElementById('diff-f1').textContent = ((rl.f1 - sft.f1) * 100).toFixed(1) + '%'; |
|
|
document.getElementById('diff-f1').className = rl.f1 > sft.f1 ? 'highlight' : ''; |
|
|
|
|
|
document.getElementById('sft-any').textContent = (sft.any_match * 100).toFixed(1) + '%'; |
|
|
document.getElementById('rl-any').textContent = (rl.any_match * 100).toFixed(1) + '%'; |
|
|
document.getElementById('diff-any').textContent = ((rl.any_match - sft.any_match) * 100).toFixed(1) + '%'; |
|
|
document.getElementById('diff-any').className = rl.any_match > sft.any_match ? 'highlight' : ''; |
|
|
|
|
|
document.getElementById('sft-exact').textContent = (sft.exact_match * 100).toFixed(1) + '%'; |
|
|
document.getElementById('rl-exact').textContent = (rl.exact_match * 100).toFixed(1) + '%'; |
|
|
document.getElementById('diff-exact').textContent = ((rl.exact_match - sft.exact_match) * 100).toFixed(1) + '%'; |
|
|
document.getElementById('diff-exact').className = rl.exact_match > sft.exact_match ? 'highlight' : ''; |
|
|
|
|
|
if (sft.temporal_match && rl.temporal_match) { |
|
|
document.getElementById('sft-temp').textContent = (sft.temporal_match * 100).toFixed(1) + '%'; |
|
|
document.getElementById('rl-temp').textContent = (rl.temporal_match * 100).toFixed(1) + '%'; |
|
|
document.getElementById('diff-temp').textContent = ((rl.temporal_match - sft.temporal_match) * 100).toFixed(1) + '%'; |
|
|
document.getElementById('diff-temp').className = rl.temporal_match > sft.temporal_match ? 'highlight' : ''; |
|
|
} |
|
|
} |
|
|
</script> |
|
|
</body> |
|
|
</html> |