OutOfMystic's picture
Rewrite play.html for per-piece training mode
cbba880
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tetris OpenEnv — Per-Piece Training Mode</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
background: #1a1a2e;
color: #e0e0e0;
font-family: 'Courier New', monospace;
display: flex;
justify-content: center;
align-items: center;
min-height: 100vh;
}
.game-container {
display: flex;
gap: 24px;
align-items: flex-start;
}
.board-wrap {
background: #16213e;
border: 2px solid #0f3460;
border-radius: 8px;
padding: 8px;
}
.board {
display: grid;
grid-template-columns: repeat(10, 28px);
grid-template-rows: repeat(20, 28px);
gap: 1px;
background: #0a0a1a;
}
.cell {
width: 28px;
height: 28px;
border-radius: 3px;
}
.cell-empty { background: #16213e; }
.cell-placed { background: #e94560; border: 1px solid #ff6b81; }
.cell-current { background: #53d769; border: 1px solid #7bff8e; }
.sidebar {
display: flex;
flex-direction: column;
gap: 10px;
width: 270px;
min-width: 270px;
max-width: 270px;
}
.panel {
background: #16213e;
border: 2px solid #0f3460;
border-radius: 8px;
padding: 10px;
}
.panel h3 {
color: #e94560;
margin-bottom: 5px;
font-size: 12px;
text-transform: uppercase;
}
.stat-row {
display: flex;
justify-content: space-between;
margin: 2px 0;
font-size: 12px;
}
.stat-value { color: #53d769; font-weight: bold; }
.stat-value.warn { color: #e94560; }
.next-piece {
display: grid;
grid-template-columns: repeat(4, 20px);
gap: 1px;
margin-top: 6px;
}
.next-cell {
width: 20px;
height: 20px;
border-radius: 2px;
background: #0a0a1a;
}
.next-cell.filled { background: #e94560; border: 1px solid #ff6b81; }
.controls {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 5px;
}
.controls button {
background: #0f3460;
color: #e0e0e0;
border: 1px solid #e94560;
border-radius: 6px;
padding: 8px 4px;
font-family: 'Courier New', monospace;
font-size: 11px;
cursor: pointer;
transition: background 0.15s;
}
.controls button:hover { background: #e94560; }
.controls button:active { background: #ff6b81; }
.controls button.wide { grid-column: span 3; }
.status {
font-size: 12px;
text-align: center;
padding: 8px;
border-radius: 6px;
}
.status.connected { background: #1b4332; color: #53d769; }
.status.disconnected { background: #4a1525; color: #e94560; }
.status.gameover { background: #4a1525; color: #ff6b81; font-size: 13px; font-weight: bold; }
.status.complete { background: #1b4332; color: #53d769; font-size: 13px; font-weight: bold; }
.progress-bar {
background: #0a0a1a;
border-radius: 4px;
height: 18px;
overflow: hidden;
position: relative;
}
.progress-fill {
background: linear-gradient(90deg, #53d769, #e94560);
height: 100%;
transition: width 0.15s;
border-radius: 4px;
}
.progress-text {
position: absolute;
top: 0; left: 0; right: 0; bottom: 0;
display: flex;
align-items: center;
justify-content: center;
font-size: 10px;
font-weight: bold;
}
.training-reward {
text-align: center;
font-size: 22px;
font-weight: bold;
padding: 4px;
}
.reward-pos { color: #53d769; }
.reward-neg { color: #e94560; }
.reward-neutral { color: #888; }
.action-log {
font-size: 11px;
color: #888;
word-break: break-all;
max-height: 60px;
overflow-y: auto;
overflow-x: hidden;
margin-top: 4px;
}
.piece-badge {
display: inline-block;
background: #0f3460;
color: #53d769;
border-radius: 3px;
padding: 1px 6px;
font-size: 11px;
font-weight: bold;
}
.forced-drop {
color: #e94560;
font-size: 10px;
font-weight: bold;
text-align: center;
margin-top: 3px;
}
</style>
</head>
<body>
<div class="game-container">
<div class="board-wrap">
<div class="board" id="board"></div>
</div>
<div class="sidebar">
<div id="status" class="status disconnected">Connecting...</div>
<!-- Current piece situation -->
<div class="panel">
<h3>Current Piece <span class="piece-badge" id="piece-name">?</span></h3>
<div class="stat-row">
<span>Actions this piece</span>
<span class="stat-value" id="piece-actions">0 / 20</span>
</div>
<div class="progress-bar">
<div class="progress-fill" id="piece-progress" style="width:0%"></div>
<div class="progress-text" id="piece-progress-text">0 / 20</div>
</div>
<div class="action-log" id="piece-action-log"></div>
<div class="forced-drop" id="forced-drop-msg"></div>
</div>
<!-- Game progress -->
<div class="panel">
<h3>Game Progress</h3>
<div class="stat-row"><span>Total steps</span><span class="stat-value" id="total-steps">0 / 200</span></div>
<div class="stat-row"><span>Pieces placed</span><span class="stat-value" id="pieces-placed">0</span></div>
<div class="stat-row"><span>Lines cleared</span><span class="stat-value" id="lines">0</span></div>
<div class="progress-bar" style="margin-top:4px">
<div class="progress-fill" id="game-progress" style="width:0%"></div>
<div class="progress-text" id="game-progress-text">0 / 200</div>
</div>
</div>
<!-- Training reward -->
<div class="panel">
<h3>Training Reward</h3>
<div class="training-reward reward-neutral" id="training-reward">-</div>
<div style="font-size:9px; color:#666; margin-top:4px;">
engine_rewards + lines*100/line<br>
L/R: -0.1 | placed: +1 | no-place: -10
</div>
</div>
<div class="panel">
<h3>Board State</h3>
<div class="stat-row"><span>Score</span><span class="stat-value" id="score">0</span></div>
<div class="stat-row"><span>Height</span><span class="stat-value" id="height">0</span></div>
<div class="stat-row"><span>Holes</span><span class="stat-value" id="holes">0</span></div>
</div>
<div class="panel">
<h3>Next Piece</h3>
<div class="next-piece" id="next-piece"></div>
</div>
<div class="panel">
<h3>Controls</h3>
<div class="controls">
<button onclick="send('rotate_ccw')">W: CCW &#x21BA;</button>
<button onclick="send('drop')">D: DROP</button>
<button onclick="send('rotate_cw')">C: CW &#x21BB;</button>
<button onclick="send('left')">L: &#x2190; Left</button>
<button onclick="send('down')">S: &#x2193; Down</button>
<button onclick="send('right')">R: Right &#x2192;</button>
<button class="wide" onclick="doReset()" style="background:#4a1525">NEW GAME &nbsp;(N)</button>
</div>
</div>
<div class="panel" style="font-size:9px; color:#888;">
<b>Per-piece mode:</b> max 20 actions per piece, 200 steps total.<br>
If piece not placed in 20 actions: forced drop + penalty.
</div>
</div>
</div>
<script>
const MAX_ACTIONS_PER_PIECE = 20;
const MAX_STEPS_PER_GAME = 200;
const LR_PENALTY = -0.1;
const PIECE_PLACED_BONUS = 1.0;
const NO_PLACE_PENALTY = -10.0;
const LINE_CLEAR_BONUS = 100.0;
const ACTION_CHARS = {
'left': 'L', 'right': 'R', 'rotate_cw': 'C',
'rotate_ccw': 'W', 'drop': 'D', 'down': 'S'
};
const KEY_TO_ACTION = {
'l': 'left', 'r': 'right', 'c': 'rotate_cw',
'w': 'rotate_ccw', 'd': 'drop', 's': 'down',
'arrowleft': 'left', 'arrowright': 'right', 'arrowdown': 'down',
' ': 'drop'
};
const boardEl = document.getElementById('board');
const COLS = 10, ROWS = 20;
let ws = null;
let gameOver = false;
let episodeDone = false;
// Per-piece tracking
let currentPieceName = null;
let pieceActionCount = 0;
let pieceActionLog = [];
// Game tracking
let totalSteps = 0;
let piecesPlaced = 0;
let totalLines = 0;
let trainingReward = 0;
let engineRewardAccum = 0; // sum of server-reported rewards
let lastObs = null;
// Build board cells
for (let i = 0; i < ROWS * COLS; i++) {
const cell = document.createElement('div');
cell.className = 'cell cell-empty';
boardEl.appendChild(cell);
}
function connect() {
const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
const url = proto + '//' + location.host + '/ws';
ws = new WebSocket(url);
ws.onopen = () => { setStatus('connected', 'Connected'); doReset(); };
ws.onmessage = (e) => {
const msg = JSON.parse(e.data);
if (msg.data) handleObs(msg.data);
};
ws.onclose = () => {
setStatus('disconnected', 'Disconnected — reconnecting...');
setTimeout(connect, 2000);
};
ws.onerror = () => ws.close();
}
function setStatus(cls, text) {
const el = document.getElementById('status');
el.className = 'status ' + cls;
el.textContent = text;
}
function send(action) {
if (!ws || ws.readyState !== 1) return;
if (gameOver || episodeDone) return;
pieceActionCount++;
totalSteps++;
pieceActionLog.push(ACTION_CHARS[action]);
// L/R penalty
if (action === 'left' || action === 'right') {
trainingReward += LR_PENALTY;
}
ws.send(JSON.stringify({type: 'step', data: {action: action, metadata: {}}}));
}
function doReset() {
if (!ws || ws.readyState !== 1) return;
gameOver = false;
episodeDone = false;
currentPieceName = null;
pieceActionCount = 0;
pieceActionLog = [];
totalSteps = 0;
piecesPlaced = 0;
totalLines = 0;
trainingReward = 0;
engineRewardAccum = 0;
lastObs = null;
updateUI();
document.getElementById('forced-drop-msg').textContent = '';
setStatus('connected', 'Game ready — place pieces!');
ws.send(JSON.stringify({type: 'reset', data: {}}));
}
function updateUI() {
// Piece progress
const pct = Math.min(100, (pieceActionCount / MAX_ACTIONS_PER_PIECE) * 100);
document.getElementById('piece-progress').style.width = pct + '%';
document.getElementById('piece-progress-text').textContent = pieceActionCount + ' / ' + MAX_ACTIONS_PER_PIECE;
document.getElementById('piece-actions').textContent = pieceActionCount + ' / ' + MAX_ACTIONS_PER_PIECE;
if (pieceActionCount > 15) {
document.getElementById('piece-actions').className = 'stat-value warn';
} else {
document.getElementById('piece-actions').className = 'stat-value';
}
document.getElementById('piece-action-log').textContent = pieceActionLog.join(' ');
// Game progress
const gpct = Math.min(100, (totalSteps / MAX_STEPS_PER_GAME) * 100);
document.getElementById('game-progress').style.width = gpct + '%';
document.getElementById('game-progress-text').textContent = totalSteps + ' / ' + MAX_STEPS_PER_GAME;
document.getElementById('total-steps').textContent = totalSteps + ' / ' + MAX_STEPS_PER_GAME;
document.getElementById('pieces-placed').textContent = piecesPlaced;
document.getElementById('lines').textContent = totalLines;
// Training reward
const rEl = document.getElementById('training-reward');
rEl.textContent = trainingReward >= 0 ? '+' + trainingReward.toFixed(1) : trainingReward.toFixed(1);
rEl.className = 'training-reward ' + (trainingReward >= 0 ? 'reward-pos' : 'reward-neg');
}
function handleObs(data) {
const obs = data.observation || data;
const done = data.done || obs.done;
const reward = data.reward || obs.reward || 0;
// Accumulate engine reward
engineRewardAccum += reward;
trainingReward = engineRewardAccum;
// Re-add training modifiers (already added in send() for L/R)
// We track LR penalty separately in send(), so just update engine part
// Actually, let's track everything in trainingReward directly
// Engine reward is added via engineRewardAccum, LR penalty via send()
// Need to sync: trainingReward = engineRewardAccum + (LR penalties already added)
// Fix: compute total as sum of parts
trainingReward = engineRewardAccum;
// LR penalties are already in trainingReward from send() — no, they're separate
// Let me simplify: track all reward components in trainingReward
// Detect piece change (piece was placed)
const newPieceName = obs.current_piece;
if (currentPieceName !== null && newPieceName !== currentPieceName && !done) {
// Piece was placed!
piecesPlaced++;
trainingReward += PIECE_PLACED_BONUS;
// Line clear bonus
const newLines = obs.total_lines || 0;
const linesCleared = newLines - totalLines;
if (linesCleared > 0) {
trainingReward += linesCleared * LINE_CLEAR_BONUS;
}
totalLines = newLines;
// Reset per-piece tracking
pieceActionCount = 0;
pieceActionLog = [];
document.getElementById('forced-drop-msg').textContent = '';
}
currentPieceName = newPieceName;
document.getElementById('piece-name').textContent = newPieceName || '?';
// Update lines
totalLines = obs.total_lines || 0;
lastObs = obs;
// Parse board
if (obs.board) {
const lines = obs.board.split('\n').filter(l => l.startsWith('|'));
const cells = boardEl.children;
for (let r = 0; r < ROWS; r++) {
const row = lines[r] || '';
const chars = row.slice(1, 11);
for (let c = 0; c < COLS; c++) {
const ch = chars[c] || '.';
const idx = r * COLS + c;
if (ch === '#') cells[idx].className = 'cell cell-placed';
else if (ch === '@') cells[idx].className = 'cell cell-current';
else cells[idx].className = 'cell cell-empty';
}
}
}
// Stats
document.getElementById('score').textContent = obs.score || 0;
document.getElementById('height').textContent = obs.max_height || 0;
document.getElementById('holes').textContent = obs.holes || 0;
// Next piece
if (obs.next_piece_shape) renderNextPiece(obs.next_piece_shape);
updateUI();
// Check: piece not placed in 20 actions -> force drop
if (!done && !gameOver && pieceActionCount >= MAX_ACTIONS_PER_PIECE) {
trainingReward += NO_PLACE_PENALTY;
document.getElementById('forced-drop-msg').textContent = 'FORCED DROP (-10 penalty)';
// Auto-send drop
pieceActionCount = 0;
pieceActionLog = [];
totalSteps++;
ws.send(JSON.stringify({type: 'step', data: {action: 'drop', metadata: {}}}));
return;
}
// Check game end
if (done) {
gameOver = true;
setStatus('gameover', 'GAME OVER — Steps: ' + totalSteps + ' Reward: ' + trainingReward.toFixed(1) + ' (N)');
} else if (totalSteps >= MAX_STEPS_PER_GAME) {
episodeDone = true;
setStatus('complete', 'MAX STEPS — Pieces: ' + piecesPlaced + ' Reward: ' + trainingReward.toFixed(1) + ' (N)');
}
}
function renderNextPiece(shape) {
const el = document.getElementById('next-piece');
el.innerHTML = '';
const rows = shape.split('\n');
for (let r = 0; r < 4; r++) {
for (let c = 0; c < 4; c++) {
const cell = document.createElement('div');
const ch = (rows[r] || '')[c] || '.';
cell.className = 'next-cell' + (ch === '#' ? ' filled' : '');
el.appendChild(cell);
}
}
}
// Keyboard
document.addEventListener('keydown', (e) => {
const key = e.key.toLowerCase();
if (key === 'n') { doReset(); return; }
const action = KEY_TO_ACTION[key];
if (action) { e.preventDefault(); send(action); }
});
connect();
</script>
</body>
</html>