thomas-schweich's picture
download
raw
47.4 kB
{
"next_trial_id": 11,
"trials": {
"0": {
"trial_id": 0,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 4000,
"val_games": 500,
"min_ply": 10,
"total_steps": 100,
"batch_size": 64,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.05,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": null,
"log_interval": 20,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "float16",
"no_compile": false,
"sdpa_math": true,
"num_workers": 0,
"device": "cuda",
"log_dir": "/workspace/logs/trial_0000",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": null,
"bottleneck_dim": 16,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0000/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 4000,
"val_games": 500,
"min_ply": 10,
"total_steps": 100,
"batch_size": 64,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.05,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": null,
"log_interval": 20,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "float16",
"no_compile": false,
"sdpa_math": true,
"num_workers": 0,
"device": "cuda",
"log_dir": "/workspace/logs/trial_0000",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": null,
"bottleneck_dim": 16,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "completed",
"pid": 60913,
"gpu_id": 0,
"start_time": 1775346146.9661582,
"end_time": 1775346192.2253375,
"current_step": 100,
"total_steps": 100,
"steps_per_sec": 2.499625056241564,
"last_train_loss": 2.6580834102494304,
"last_train_acc": 0.2578537154243756,
"best_val_loss": 2.6815053782271594,
"best_accuracy": 0.25373134694665184,
"actual_param_count": 262144,
"log_path": "/workspace/sweep_results/trial_0000.log",
"run_dir": "/workspace/logs/trial_0000/bottleneck_20260404_234228_keen-bison",
"optuna_number": null,
"notes": "MCP smoke test: 256k bottleneck, 100 steps, MATH+compile+fp16",
"tags": [
"mcp-test",
"bottleneck-small"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "17h18m"
},
"1": {
"trial_id": 1,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 4000,
"val_games": 500,
"min_ply": 10,
"total_steps": 30,
"batch_size": 64,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.05,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": null,
"log_interval": 20,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "float16",
"no_compile": false,
"sdpa_math": true,
"num_workers": 0,
"device": "cuda",
"log_dir": "/workspace/logs/trial_0000",
"hf_repo": null,
"local_checkpoints": true,
"resume": "/workspace/logs/trial_0000/bottleneck_20260404_234228_keen-bison/checkpoints/best",
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": null,
"bottleneck_dim": 16,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0001/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 4000,
"val_games": 500,
"min_ply": 10,
"total_steps": 30,
"batch_size": 64,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.05,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": null,
"log_interval": 20,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "float16",
"no_compile": false,
"sdpa_math": true,
"num_workers": 0,
"device": "cuda",
"log_dir": "/workspace/logs/trial_0000",
"hf_repo": null,
"local_checkpoints": true,
"resume": "/workspace/logs/trial_0000/bottleneck_20260404_234228_keen-bison/checkpoints/best",
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": null,
"bottleneck_dim": 16,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "killed",
"pid": 61357,
"gpu_id": 0,
"start_time": 1775346206.78868,
"end_time": 1775346209.3660913,
"current_step": 0,
"total_steps": 30,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0001.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"mcp-test",
"bottleneck-small"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "17h17m"
},
"2": {
"trial_id": 2,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 3500000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 50,
"pause_after_steps": 20000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": false,
"sdpa_math": true,
"num_workers": 4,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0002/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 3500000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 50,
"pause_after_steps": 20000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": false,
"sdpa_math": true,
"num_workers": 4,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "killed",
"pid": 71057,
"gpu_id": 0,
"start_time": 1775351012.9564307,
"end_time": 1775351080.0435607,
"current_step": 0,
"total_steps": 200000,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0002.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"18h-push",
"scaling",
"20M"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "15h57m"
},
"3": {
"trial_id": 3,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 50,
"pause_after_steps": 20000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": false,
"sdpa_math": true,
"num_workers": 4,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0003/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 50,
"pause_after_steps": 20000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": false,
"sdpa_math": true,
"num_workers": 4,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "killed",
"pid": 72205,
"gpu_id": 0,
"start_time": 1775351111.2472177,
"end_time": 1775351835.388752,
"current_step": 0,
"total_steps": 200000,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0003.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"18h-push",
"scaling",
"20M",
"full-data"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "15h55m"
},
"4": {
"trial_id": 4,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 50,
"pause_after_steps": 15000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 4,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0004/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 50,
"pause_after_steps": 15000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 4,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "killed",
"pid": 73543,
"gpu_id": 0,
"start_time": 1775351848.6815178,
"end_time": 1775352133.8517451,
"current_step": 0,
"total_steps": 200000,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0004.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"18h-push",
"scaling",
"20M",
"eager"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "15h43m"
},
"5": {
"trial_id": 5,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 50,
"pause_after_steps": 15000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0005/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 50,
"pause_after_steps": 15000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "killed",
"pid": 74605,
"gpu_id": 0,
"start_time": 1775352154.8011737,
"end_time": 1775352288.1328025,
"current_step": 0,
"total_steps": 200000,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0005.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"18h-push",
"scaling",
"20M",
"workers2"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "15h38m"
},
"6": {
"trial_id": 6,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 5000,
"min_ply": 10,
"total_steps": 100,
"batch_size": 64,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 20,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0006/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 5000,
"min_ply": 10,
"total_steps": 100,
"batch_size": 64,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 20,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "completed",
"pid": 75207,
"gpu_id": 0,
"start_time": 1775352312.4274437,
"end_time": 1775352342.5313256,
"current_step": 0,
"total_steps": 100,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0006.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"diagnostic",
"small-bs"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "15h35m"
},
"7": {
"trial_id": 7,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 5000,
"min_ply": 10,
"total_steps": 100,
"batch_size": 128,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 20,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0007/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 5000,
"min_ply": 10,
"total_steps": 100,
"batch_size": 128,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 20,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "completed",
"pid": 75775,
"gpu_id": 0,
"start_time": 1775352384.985535,
"end_time": 1775352420.1244109,
"current_step": 0,
"total_steps": 100,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0007.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"diagnostic",
"bs128"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "15h34m"
},
"8": {
"trial_id": 8,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 5000,
"min_ply": 10,
"total_steps": 50,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 10,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0008/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 5000,
"min_ply": 10,
"total_steps": 50,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 10,
"pause_after_steps": null,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "completed",
"pid": 76342,
"gpu_id": 0,
"start_time": 1775352440.4786208,
"end_time": 1775352475.6319711,
"current_step": 0,
"total_steps": 50,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0008.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"diagnostic",
"bs256"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "15h33m"
},
"9": {
"trial_id": 9,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 10,
"pause_after_steps": 15000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0009/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 10,
"pause_after_steps": 15000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "killed",
"pid": 77221,
"gpu_id": 0,
"start_time": 1775352573.273321,
"end_time": 1775352817.925155,
"current_step": 0,
"total_steps": 200000,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0009.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"18h-push",
"scaling",
"20M",
"bs256"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "15h31m"
},
"10": {
"trial_id": 10,
"strategy": "bottleneck",
"params": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 100,
"pause_after_steps": 15000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"cli_command": [
"python3",
"/opt/pawn/scripts/train.py",
"--config",
"/workspace/logs/trial_0010/run_config.json"
],
"config": {
"elo_min": 1800,
"elo_max": 1900,
"max_games": 30000000,
"val_games": 50000,
"min_ply": 10,
"total_steps": 200000,
"batch_size": 256,
"lr": 0.0003,
"weight_decay": 0.0,
"warmup_frac": 0.02,
"warmup_steps": null,
"max_grad_norm": 1.0,
"patience": 9999,
"eval_interval": 2500,
"log_interval": 100,
"pause_after_steps": 15000,
"mate_boost": 0.0,
"no_outcome_token": false,
"discard_ply_limit": false,
"amp_dtype": "bfloat16",
"no_compile": true,
"sdpa_math": true,
"num_workers": 2,
"device": "cuda",
"log_dir": "/workspace/logs",
"hf_repo": null,
"local_checkpoints": true,
"resume": null,
"wandb": false,
"cache_dir": "/dev/shm/pawn_cache",
"run_type": "adapter",
"strategy": "bottleneck",
"checkpoint": "thomas-schweich/pawn-base",
"pgn": "thomas-schweich/pawn-lichess-full",
"adapter_layers": "4,5,6,7",
"bottleneck_dim": 2440,
"no_adapt_attn": false,
"no_adapt_ffn": false,
"lora_rank": null,
"lora_targets": null,
"lora_ffn": false,
"density": null,
"sparse_targets": null,
"sparse_ffn": false,
"use_output_film": false,
"rosa_mode": null,
"rosa_warmup_steps": 128,
"mask_samples": 32,
"grad_alpha": 2,
"unfreeze_layers": null,
"d_model": null,
"n_layers": null,
"n_heads": null,
"epochs": 9999,
"val_every": 9999
},
"status": "completed",
"pid": 78502,
"gpu_id": 0,
"start_time": 1775352861.543256,
"end_time": 1775356243.7400186,
"current_step": 0,
"total_steps": 200000,
"steps_per_sec": 0.0,
"last_train_loss": null,
"last_train_acc": null,
"best_val_loss": null,
"best_accuracy": null,
"actual_param_count": null,
"log_path": "/workspace/sweep_results/trial_0010.log",
"run_dir": null,
"optuna_number": null,
"notes": "",
"tags": [
"18h-push",
"20M",
"bottleneck",
"scaling"
],
"eta_seconds": null,
"eta_human": "?",
"elapsed_human": "15h26m"
}
},
"event_seq": 27,
"start_time": 1775342775.318943,
"cost_per_hour": 2.49
}

Xet Storage Details

Size:
47.4 kB
·
Xet hash:
1953023a05efa261201d13ec6f0839a775e1f7d503b91f10a20f3e23754ccab3

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.